diff options
Diffstat (limited to 'tools/testing')
323 files changed, 30640 insertions, 3510 deletions
diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild index 0392153a0009..10ddf223055b 100644 --- a/tools/testing/nvdimm/Kbuild +++ b/tools/testing/nvdimm/Kbuild @@ -22,6 +22,7 @@ NVDIMM_SRC := $(DRIVERS)/nvdimm ACPI_SRC := $(DRIVERS)/acpi/nfit DAX_SRC := $(DRIVERS)/dax ccflags-y := -I$(src)/$(NVDIMM_SRC)/ +ccflags-y += -I$(src)/$(ACPI_SRC)/ obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o @@ -36,6 +37,7 @@ obj-$(CONFIG_DEV_DAX) += device_dax.o obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o nfit-y := $(ACPI_SRC)/core.o +nfit-y += $(ACPI_SRC)/intel.o nfit-$(CONFIG_X86_MCE) += $(ACPI_SRC)/mce.o nfit-y += acpi_nfit_test.o nfit-y += config_check.o @@ -78,6 +80,8 @@ libnvdimm-$(CONFIG_ND_CLAIM) += $(NVDIMM_SRC)/claim.o libnvdimm-$(CONFIG_BTT) += $(NVDIMM_SRC)/btt_devs.o libnvdimm-$(CONFIG_NVDIMM_PFN) += $(NVDIMM_SRC)/pfn_devs.o libnvdimm-$(CONFIG_NVDIMM_DAX) += $(NVDIMM_SRC)/dax_devs.o +libnvdimm-$(CONFIG_NVDIMM_KEYS) += $(NVDIMM_SRC)/security.o +libnvdimm-y += dimm_devs.o libnvdimm-y += libnvdimm_test.o libnvdimm-y += config_check.o diff --git a/tools/testing/nvdimm/acpi_nfit_test.c b/tools/testing/nvdimm/acpi_nfit_test.c index 43521512e577..fec8fb1b7715 100644 --- a/tools/testing/nvdimm/acpi_nfit_test.c +++ b/tools/testing/nvdimm/acpi_nfit_test.c @@ -4,5 +4,13 @@ #include <linux/module.h> #include <linux/printk.h> #include "watermark.h" +#include <nfit.h> nfit_test_watermark(acpi_nfit); + +/* strong / override definition of nfit_intel_shutdown_status */ +void nfit_intel_shutdown_status(struct nfit_mem *nfit_mem) +{ + set_bit(NFIT_MEM_DIRTY_COUNT, &nfit_mem->flags); + nfit_mem->dirty_shutdown = 42; +} diff --git a/tools/testing/nvdimm/dimm_devs.c b/tools/testing/nvdimm/dimm_devs.c new file mode 100644 index 000000000000..2d4baf57822f --- /dev/null +++ b/tools/testing/nvdimm/dimm_devs.c @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright Intel Corp. 2018 */ +#include <linux/init.h> +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/nd.h> +#include "pmem.h" +#include "pfn.h" +#include "nd.h" +#include "nd-core.h" + +ssize_t security_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvdimm *nvdimm = to_nvdimm(dev); + + /* + * For the test version we need to poll the "hardware" in order + * to get the updated status for unlock testing. + */ + nvdimm->sec.state = nvdimm_security_state(nvdimm, NVDIMM_USER); + nvdimm->sec.ext_state = nvdimm_security_state(nvdimm, NVDIMM_MASTER); + + switch (nvdimm->sec.state) { + case NVDIMM_SECURITY_DISABLED: + return sprintf(buf, "disabled\n"); + case NVDIMM_SECURITY_UNLOCKED: + return sprintf(buf, "unlocked\n"); + case NVDIMM_SECURITY_LOCKED: + return sprintf(buf, "locked\n"); + case NVDIMM_SECURITY_FROZEN: + return sprintf(buf, "frozen\n"); + case NVDIMM_SECURITY_OVERWRITE: + return sprintf(buf, "overwrite\n"); + default: + return -ENOTTY; + } + + return -ENOTTY; +} + diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c index ff9d3a5825e1..c6635fee27d8 100644 --- a/tools/testing/nvdimm/test/iomap.c +++ b/tools/testing/nvdimm/test/iomap.c @@ -104,16 +104,29 @@ void *__wrap_devm_memremap(struct device *dev, resource_size_t offset, } EXPORT_SYMBOL(__wrap_devm_memremap); +static void nfit_test_kill(void *_pgmap) +{ + struct dev_pagemap *pgmap = _pgmap; + + pgmap->kill(pgmap->ref); +} + void *__wrap_devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) { resource_size_t offset = pgmap->res.start; struct nfit_test_resource *nfit_res = get_nfit_res(offset); - if (nfit_res) + if (nfit_res) { + int rc; + + rc = devm_add_action_or_reset(dev, nfit_test_kill, pgmap); + if (rc) + return ERR_PTR(rc); return nfit_res->buf + offset - nfit_res->res.start; + } return devm_memremap_pages(dev, pgmap); } -EXPORT_SYMBOL(__wrap_devm_memremap_pages); +EXPORT_SYMBOL_GPL(__wrap_devm_memremap_pages); pfn_t __wrap_phys_to_pfn_t(phys_addr_t addr, unsigned long flags) { diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index cffc2c5a778d..b579f962451d 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -15,6 +15,7 @@ #include <linux/dma-mapping.h> #include <linux/workqueue.h> #include <linux/libnvdimm.h> +#include <linux/genalloc.h> #include <linux/vmalloc.h> #include <linux/device.h> #include <linux/module.h> @@ -24,6 +25,7 @@ #include <linux/list.h> #include <linux/slab.h> #include <nd-core.h> +#include <intel.h> #include <nfit.h> #include <nd.h> #include "nfit_test.h" @@ -139,8 +141,15 @@ static u32 handle[] = { [6] = NFIT_DIMM_HANDLE(1, 0, 0, 0, 1), }; -static unsigned long dimm_fail_cmd_flags[NUM_DCR]; -static int dimm_fail_cmd_code[NUM_DCR]; +static unsigned long dimm_fail_cmd_flags[ARRAY_SIZE(handle)]; +static int dimm_fail_cmd_code[ARRAY_SIZE(handle)]; +struct nfit_test_sec { + u8 state; + u8 ext_state; + u8 passphrase[32]; + u8 master_passphrase[32]; + u64 overwrite_end_time; +} dimm_sec_info[NUM_DCR]; static const struct nd_intel_smart smart_def = { .flags = ND_INTEL_SMART_HEALTH_VALID @@ -148,6 +157,7 @@ static const struct nd_intel_smart smart_def = { | ND_INTEL_SMART_ALARM_VALID | ND_INTEL_SMART_USED_VALID | ND_INTEL_SMART_SHUTDOWN_VALID + | ND_INTEL_SMART_SHUTDOWN_COUNT_VALID | ND_INTEL_SMART_MTEMP_VALID | ND_INTEL_SMART_CTEMP_VALID, .health = ND_INTEL_SMART_NON_CRITICAL_HEALTH, @@ -160,8 +170,8 @@ static const struct nd_intel_smart smart_def = { .ait_status = 1, .life_used = 5, .shutdown_state = 0, + .shutdown_count = 42, .vendor_size = 0, - .shutdown_count = 100, }; struct nfit_test_fw { @@ -203,7 +213,7 @@ struct nfit_test { unsigned long deadline; spinlock_t lock; } ars_state; - struct device *dimm_dev[NUM_DCR]; + struct device *dimm_dev[ARRAY_SIZE(handle)]; struct nd_intel_smart *smart; struct nd_intel_smart_threshold *smart_threshold; struct badrange badrange; @@ -213,6 +223,8 @@ struct nfit_test { static struct workqueue_struct *nfit_wq; +static struct gen_pool *nfit_pool; + static struct nfit_test *to_nfit_test(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); @@ -931,6 +943,242 @@ static int override_return_code(int dimm, unsigned int func, int rc) return rc; } +static int nd_intel_test_cmd_security_status(struct nfit_test *t, + struct nd_intel_get_security_state *nd_cmd, + unsigned int buf_len, int dimm) +{ + struct device *dev = &t->pdev.dev; + struct nfit_test_sec *sec = &dimm_sec_info[dimm]; + + nd_cmd->status = 0; + nd_cmd->state = sec->state; + nd_cmd->extended_state = sec->ext_state; + dev_dbg(dev, "security state (%#x) returned\n", nd_cmd->state); + + return 0; +} + +static int nd_intel_test_cmd_unlock_unit(struct nfit_test *t, + struct nd_intel_unlock_unit *nd_cmd, + unsigned int buf_len, int dimm) +{ + struct device *dev = &t->pdev.dev; + struct nfit_test_sec *sec = &dimm_sec_info[dimm]; + + if (!(sec->state & ND_INTEL_SEC_STATE_LOCKED) || + (sec->state & ND_INTEL_SEC_STATE_FROZEN)) { + nd_cmd->status = ND_INTEL_STATUS_INVALID_STATE; + dev_dbg(dev, "unlock unit: invalid state: %#x\n", + sec->state); + } else if (memcmp(nd_cmd->passphrase, sec->passphrase, + ND_INTEL_PASSPHRASE_SIZE) != 0) { + nd_cmd->status = ND_INTEL_STATUS_INVALID_PASS; + dev_dbg(dev, "unlock unit: invalid passphrase\n"); + } else { + nd_cmd->status = 0; + sec->state = ND_INTEL_SEC_STATE_ENABLED; + dev_dbg(dev, "Unit unlocked\n"); + } + + dev_dbg(dev, "unlocking status returned: %#x\n", nd_cmd->status); + return 0; +} + +static int nd_intel_test_cmd_set_pass(struct nfit_test *t, + struct nd_intel_set_passphrase *nd_cmd, + unsigned int buf_len, int dimm) +{ + struct device *dev = &t->pdev.dev; + struct nfit_test_sec *sec = &dimm_sec_info[dimm]; + + if (sec->state & ND_INTEL_SEC_STATE_FROZEN) { + nd_cmd->status = ND_INTEL_STATUS_INVALID_STATE; + dev_dbg(dev, "set passphrase: wrong security state\n"); + } else if (memcmp(nd_cmd->old_pass, sec->passphrase, + ND_INTEL_PASSPHRASE_SIZE) != 0) { + nd_cmd->status = ND_INTEL_STATUS_INVALID_PASS; + dev_dbg(dev, "set passphrase: wrong passphrase\n"); + } else { + memcpy(sec->passphrase, nd_cmd->new_pass, + ND_INTEL_PASSPHRASE_SIZE); + sec->state |= ND_INTEL_SEC_STATE_ENABLED; + nd_cmd->status = 0; + dev_dbg(dev, "passphrase updated\n"); + } + + return 0; +} + +static int nd_intel_test_cmd_freeze_lock(struct nfit_test *t, + struct nd_intel_freeze_lock *nd_cmd, + unsigned int buf_len, int dimm) +{ + struct device *dev = &t->pdev.dev; + struct nfit_test_sec *sec = &dimm_sec_info[dimm]; + + if (!(sec->state & ND_INTEL_SEC_STATE_ENABLED)) { + nd_cmd->status = ND_INTEL_STATUS_INVALID_STATE; + dev_dbg(dev, "freeze lock: wrong security state\n"); + } else { + sec->state |= ND_INTEL_SEC_STATE_FROZEN; + nd_cmd->status = 0; + dev_dbg(dev, "security frozen\n"); + } + + return 0; +} + +static int nd_intel_test_cmd_disable_pass(struct nfit_test *t, + struct nd_intel_disable_passphrase *nd_cmd, + unsigned int buf_len, int dimm) +{ + struct device *dev = &t->pdev.dev; + struct nfit_test_sec *sec = &dimm_sec_info[dimm]; + + if (!(sec->state & ND_INTEL_SEC_STATE_ENABLED) || + (sec->state & ND_INTEL_SEC_STATE_FROZEN)) { + nd_cmd->status = ND_INTEL_STATUS_INVALID_STATE; + dev_dbg(dev, "disable passphrase: wrong security state\n"); + } else if (memcmp(nd_cmd->passphrase, sec->passphrase, + ND_INTEL_PASSPHRASE_SIZE) != 0) { + nd_cmd->status = ND_INTEL_STATUS_INVALID_PASS; + dev_dbg(dev, "disable passphrase: wrong passphrase\n"); + } else { + memset(sec->passphrase, 0, ND_INTEL_PASSPHRASE_SIZE); + sec->state = 0; + dev_dbg(dev, "disable passphrase: done\n"); + } + + return 0; +} + +static int nd_intel_test_cmd_secure_erase(struct nfit_test *t, + struct nd_intel_secure_erase *nd_cmd, + unsigned int buf_len, int dimm) +{ + struct device *dev = &t->pdev.dev; + struct nfit_test_sec *sec = &dimm_sec_info[dimm]; + + if (!(sec->state & ND_INTEL_SEC_STATE_ENABLED) || + (sec->state & ND_INTEL_SEC_STATE_FROZEN)) { + nd_cmd->status = ND_INTEL_STATUS_INVALID_STATE; + dev_dbg(dev, "secure erase: wrong security state\n"); + } else if (memcmp(nd_cmd->passphrase, sec->passphrase, + ND_INTEL_PASSPHRASE_SIZE) != 0) { + nd_cmd->status = ND_INTEL_STATUS_INVALID_PASS; + dev_dbg(dev, "secure erase: wrong passphrase\n"); + } else { + memset(sec->passphrase, 0, ND_INTEL_PASSPHRASE_SIZE); + memset(sec->master_passphrase, 0, ND_INTEL_PASSPHRASE_SIZE); + sec->state = 0; + sec->ext_state = ND_INTEL_SEC_ESTATE_ENABLED; + dev_dbg(dev, "secure erase: done\n"); + } + + return 0; +} + +static int nd_intel_test_cmd_overwrite(struct nfit_test *t, + struct nd_intel_overwrite *nd_cmd, + unsigned int buf_len, int dimm) +{ + struct device *dev = &t->pdev.dev; + struct nfit_test_sec *sec = &dimm_sec_info[dimm]; + + if ((sec->state & ND_INTEL_SEC_STATE_ENABLED) && + memcmp(nd_cmd->passphrase, sec->passphrase, + ND_INTEL_PASSPHRASE_SIZE) != 0) { + nd_cmd->status = ND_INTEL_STATUS_INVALID_PASS; + dev_dbg(dev, "overwrite: wrong passphrase\n"); + return 0; + } + + memset(sec->passphrase, 0, ND_INTEL_PASSPHRASE_SIZE); + sec->state = ND_INTEL_SEC_STATE_OVERWRITE; + dev_dbg(dev, "overwrite progressing.\n"); + sec->overwrite_end_time = get_jiffies_64() + 5 * HZ; + + return 0; +} + +static int nd_intel_test_cmd_query_overwrite(struct nfit_test *t, + struct nd_intel_query_overwrite *nd_cmd, + unsigned int buf_len, int dimm) +{ + struct device *dev = &t->pdev.dev; + struct nfit_test_sec *sec = &dimm_sec_info[dimm]; + + if (!(sec->state & ND_INTEL_SEC_STATE_OVERWRITE)) { + nd_cmd->status = ND_INTEL_STATUS_OQUERY_SEQUENCE_ERR; + return 0; + } + + if (time_is_before_jiffies64(sec->overwrite_end_time)) { + sec->overwrite_end_time = 0; + sec->state = 0; + sec->ext_state = ND_INTEL_SEC_ESTATE_ENABLED; + dev_dbg(dev, "overwrite is complete\n"); + } else + nd_cmd->status = ND_INTEL_STATUS_OQUERY_INPROGRESS; + return 0; +} + +static int nd_intel_test_cmd_master_set_pass(struct nfit_test *t, + struct nd_intel_set_master_passphrase *nd_cmd, + unsigned int buf_len, int dimm) +{ + struct device *dev = &t->pdev.dev; + struct nfit_test_sec *sec = &dimm_sec_info[dimm]; + + if (!(sec->ext_state & ND_INTEL_SEC_ESTATE_ENABLED)) { + nd_cmd->status = ND_INTEL_STATUS_NOT_SUPPORTED; + dev_dbg(dev, "master set passphrase: in wrong state\n"); + } else if (sec->ext_state & ND_INTEL_SEC_ESTATE_PLIMIT) { + nd_cmd->status = ND_INTEL_STATUS_INVALID_STATE; + dev_dbg(dev, "master set passphrase: in wrong security state\n"); + } else if (memcmp(nd_cmd->old_pass, sec->master_passphrase, + ND_INTEL_PASSPHRASE_SIZE) != 0) { + nd_cmd->status = ND_INTEL_STATUS_INVALID_PASS; + dev_dbg(dev, "master set passphrase: wrong passphrase\n"); + } else { + memcpy(sec->master_passphrase, nd_cmd->new_pass, + ND_INTEL_PASSPHRASE_SIZE); + sec->ext_state = ND_INTEL_SEC_ESTATE_ENABLED; + dev_dbg(dev, "master passphrase: updated\n"); + } + + return 0; +} + +static int nd_intel_test_cmd_master_secure_erase(struct nfit_test *t, + struct nd_intel_master_secure_erase *nd_cmd, + unsigned int buf_len, int dimm) +{ + struct device *dev = &t->pdev.dev; + struct nfit_test_sec *sec = &dimm_sec_info[dimm]; + + if (!(sec->ext_state & ND_INTEL_SEC_ESTATE_ENABLED)) { + nd_cmd->status = ND_INTEL_STATUS_NOT_SUPPORTED; + dev_dbg(dev, "master secure erase: in wrong state\n"); + } else if (sec->ext_state & ND_INTEL_SEC_ESTATE_PLIMIT) { + nd_cmd->status = ND_INTEL_STATUS_INVALID_STATE; + dev_dbg(dev, "master secure erase: in wrong security state\n"); + } else if (memcmp(nd_cmd->passphrase, sec->master_passphrase, + ND_INTEL_PASSPHRASE_SIZE) != 0) { + nd_cmd->status = ND_INTEL_STATUS_INVALID_PASS; + dev_dbg(dev, "master secure erase: wrong passphrase\n"); + } else { + /* we do not erase master state passphrase ever */ + sec->ext_state = ND_INTEL_SEC_ESTATE_ENABLED; + memset(sec->passphrase, 0, ND_INTEL_PASSPHRASE_SIZE); + sec->state = 0; + dev_dbg(dev, "master secure erase: done\n"); + } + + return 0; +} + + static int get_dimm(struct nfit_mem *nfit_mem, unsigned int func) { int i; @@ -978,6 +1226,46 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, return i; switch (func) { + case NVDIMM_INTEL_GET_SECURITY_STATE: + rc = nd_intel_test_cmd_security_status(t, + buf, buf_len, i); + break; + case NVDIMM_INTEL_UNLOCK_UNIT: + rc = nd_intel_test_cmd_unlock_unit(t, + buf, buf_len, i); + break; + case NVDIMM_INTEL_SET_PASSPHRASE: + rc = nd_intel_test_cmd_set_pass(t, + buf, buf_len, i); + break; + case NVDIMM_INTEL_DISABLE_PASSPHRASE: + rc = nd_intel_test_cmd_disable_pass(t, + buf, buf_len, i); + break; + case NVDIMM_INTEL_FREEZE_LOCK: + rc = nd_intel_test_cmd_freeze_lock(t, + buf, buf_len, i); + break; + case NVDIMM_INTEL_SECURE_ERASE: + rc = nd_intel_test_cmd_secure_erase(t, + buf, buf_len, i); + break; + case NVDIMM_INTEL_OVERWRITE: + rc = nd_intel_test_cmd_overwrite(t, + buf, buf_len, i - t->dcr_idx); + break; + case NVDIMM_INTEL_QUERY_OVERWRITE: + rc = nd_intel_test_cmd_query_overwrite(t, + buf, buf_len, i - t->dcr_idx); + break; + case NVDIMM_INTEL_SET_MASTER_PASSPHRASE: + rc = nd_intel_test_cmd_master_set_pass(t, + buf, buf_len, i); + break; + case NVDIMM_INTEL_MASTER_SECURE_ERASE: + rc = nd_intel_test_cmd_master_secure_erase(t, + buf, buf_len, i); + break; case ND_INTEL_ENABLE_LSS_STATUS: rc = nd_intel_test_cmd_set_lss_status(t, buf, buf_len); @@ -1130,6 +1418,9 @@ static void release_nfit_res(void *data) list_del(&nfit_res->list); spin_unlock(&nfit_test_lock); + if (resource_size(&nfit_res->res) >= DIMM_SIZE) + gen_pool_free(nfit_pool, nfit_res->res.start, + resource_size(&nfit_res->res)); vfree(nfit_res->buf); kfree(nfit_res); } @@ -1142,7 +1433,7 @@ static void *__test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma, GFP_KERNEL); int rc; - if (!buf || !nfit_res) + if (!buf || !nfit_res || !*dma) goto err; rc = devm_add_action(dev, release_nfit_res, nfit_res); if (rc) @@ -1162,6 +1453,8 @@ static void *__test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma, return nfit_res->buf; err: + if (*dma && size >= DIMM_SIZE) + gen_pool_free(nfit_pool, *dma, size); if (buf) vfree(buf); kfree(nfit_res); @@ -1170,9 +1463,16 @@ static void *__test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma, static void *test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma) { + struct genpool_data_align data = { + .align = SZ_128M, + }; void *buf = vmalloc(size); - *dma = (unsigned long) buf; + if (size >= DIMM_SIZE) + *dma = gen_pool_alloc_algo(nfit_pool, size, + gen_pool_first_fit_align, &data); + else + *dma = (unsigned long) buf; return __test_alloc(t, size, dma, buf); } @@ -1311,10 +1611,22 @@ static ssize_t fail_cmd_code_store(struct device *dev, struct device_attribute * } static DEVICE_ATTR_RW(fail_cmd_code); +static ssize_t lock_dimm_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t size) +{ + int dimm = dimm_name_to_id(dev); + struct nfit_test_sec *sec = &dimm_sec_info[dimm]; + + sec->state = ND_INTEL_SEC_STATE_ENABLED | ND_INTEL_SEC_STATE_LOCKED; + return size; +} +static DEVICE_ATTR_WO(lock_dimm); + static struct attribute *nfit_test_dimm_attributes[] = { &dev_attr_fail_cmd.attr, &dev_attr_fail_cmd_code.attr, &dev_attr_handle.attr, + &dev_attr_lock_dimm.attr, NULL, }; @@ -1344,6 +1656,17 @@ static int nfit_test_dimm_init(struct nfit_test *t) return 0; } +static void security_init(struct nfit_test *t) +{ + int i; + + for (i = 0; i < t->num_dcr; i++) { + struct nfit_test_sec *sec = &dimm_sec_info[i]; + + sec->ext_state = ND_INTEL_SEC_ESTATE_ENABLED; + } +} + static void smart_init(struct nfit_test *t) { int i; @@ -1422,6 +1745,7 @@ static int nfit_test0_alloc(struct nfit_test *t) if (nfit_test_dimm_init(t)) return -ENOMEM; smart_init(t); + security_init(t); return ars_state_init(&t->pdev.dev, &t->ars_state); } @@ -2193,6 +2517,20 @@ static void nfit_test0_setup(struct nfit_test *t) set_bit(ND_INTEL_FW_FINISH_UPDATE, &acpi_desc->dimm_cmd_force_en); set_bit(ND_INTEL_FW_FINISH_QUERY, &acpi_desc->dimm_cmd_force_en); set_bit(ND_INTEL_ENABLE_LSS_STATUS, &acpi_desc->dimm_cmd_force_en); + set_bit(NVDIMM_INTEL_GET_SECURITY_STATE, + &acpi_desc->dimm_cmd_force_en); + set_bit(NVDIMM_INTEL_SET_PASSPHRASE, &acpi_desc->dimm_cmd_force_en); + set_bit(NVDIMM_INTEL_DISABLE_PASSPHRASE, + &acpi_desc->dimm_cmd_force_en); + set_bit(NVDIMM_INTEL_UNLOCK_UNIT, &acpi_desc->dimm_cmd_force_en); + set_bit(NVDIMM_INTEL_FREEZE_LOCK, &acpi_desc->dimm_cmd_force_en); + set_bit(NVDIMM_INTEL_SECURE_ERASE, &acpi_desc->dimm_cmd_force_en); + set_bit(NVDIMM_INTEL_OVERWRITE, &acpi_desc->dimm_cmd_force_en); + set_bit(NVDIMM_INTEL_QUERY_OVERWRITE, &acpi_desc->dimm_cmd_force_en); + set_bit(NVDIMM_INTEL_SET_MASTER_PASSPHRASE, + &acpi_desc->dimm_cmd_force_en); + set_bit(NVDIMM_INTEL_MASTER_SECURE_ERASE, + &acpi_desc->dimm_cmd_force_en); } static void nfit_test1_setup(struct nfit_test *t) @@ -2678,7 +3016,7 @@ static int nfit_test_probe(struct platform_device *pdev) u32 nfit_handle = __to_nfit_memdev(nfit_mem)->device_handle; int i; - for (i = 0; i < NUM_DCR; i++) + for (i = 0; i < ARRAY_SIZE(handle); i++) if (nfit_handle == handle[i]) dev_set_drvdata(nfit_test->dimm_dev[i], nfit_mem); @@ -2837,6 +3175,17 @@ static __init int nfit_test_init(void) goto err_register; } + nfit_pool = gen_pool_create(ilog2(SZ_4M), NUMA_NO_NODE); + if (!nfit_pool) { + rc = -ENOMEM; + goto err_register; + } + + if (gen_pool_add(nfit_pool, SZ_4G, SZ_4G, NUMA_NO_NODE)) { + rc = -ENOMEM; + goto err_register; + } + for (i = 0; i < NUM_NFITS; i++) { struct nfit_test *nfit_test; struct platform_device *pdev; @@ -2892,6 +3241,9 @@ static __init int nfit_test_init(void) return 0; err_register: + if (nfit_pool) + gen_pool_destroy(nfit_pool); + destroy_workqueue(nfit_wq); for (i = 0; i < NUM_NFITS; i++) if (instances[i]) @@ -2915,6 +3267,8 @@ static __exit void nfit_test_exit(void) platform_driver_unregister(&nfit_test_driver); nfit_test_teardown(); + gen_pool_destroy(nfit_pool); + for (i = 0; i < NUM_NFITS; i++) put_device(&instances[i]->pdev.dev); class_destroy(nfit_test_dimm); diff --git a/tools/testing/nvdimm/test/nfit_test.h b/tools/testing/nvdimm/test/nfit_test.h index 33752e06ff8d..ade14fe3837e 100644 --- a/tools/testing/nvdimm/test/nfit_test.h +++ b/tools/testing/nvdimm/test/nfit_test.h @@ -117,30 +117,6 @@ struct nd_cmd_ars_err_inj_stat { #define ND_INTEL_SMART_INJECT_FATAL (1 << 2) #define ND_INTEL_SMART_INJECT_SHUTDOWN (1 << 3) -struct nd_intel_smart { - __u32 status; - union { - struct { - __u32 flags; - __u8 reserved0[4]; - __u8 health; - __u8 spares; - __u8 life_used; - __u8 alarm_flags; - __u16 media_temperature; - __u16 ctrl_temperature; - __u32 shutdown_count; - __u8 ait_status; - __u16 pmic_temperature; - __u8 reserved1[8]; - __u8 shutdown_state; - __u32 vendor_size; - __u8 vendor_data[92]; - } __packed; - __u8 data[128]; - }; -} __packed; - struct nd_intel_smart_threshold { __u32 status; union { diff --git a/tools/testing/radix-tree/.gitignore b/tools/testing/radix-tree/.gitignore index d4706c0ffceb..3834899b6693 100644 --- a/tools/testing/radix-tree/.gitignore +++ b/tools/testing/radix-tree/.gitignore @@ -4,3 +4,4 @@ idr-test main multiorder radix-tree.c +xarray diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile index 37baecc3766f..397d6b612502 100644 --- a/tools/testing/radix-tree/Makefile +++ b/tools/testing/radix-tree/Makefile @@ -4,9 +4,10 @@ CFLAGS += -I. -I../../include -g -Og -Wall -D_LGPL_SOURCE -fsanitize=address \ -fsanitize=undefined LDFLAGS += -fsanitize=address -fsanitize=undefined LDLIBS+= -lpthread -lurcu -TARGETS = main idr-test multiorder -CORE_OFILES := radix-tree.o idr.o linux.o test.o find_bit.o +TARGETS = main idr-test multiorder xarray +CORE_OFILES := xarray.o radix-tree.o idr.o linux.o test.o find_bit.o bitmap.o OFILES = main.o $(CORE_OFILES) regression1.o regression2.o regression3.o \ + regression4.o \ tag_check.o multiorder.o idr-test.o iteration_check.o benchmark.o ifndef SHIFT @@ -25,6 +26,8 @@ main: $(OFILES) idr-test.o: ../../../lib/test_ida.c idr-test: idr-test.o $(CORE_OFILES) +xarray: $(CORE_OFILES) + multiorder: multiorder.o $(CORE_OFILES) clean: @@ -35,6 +38,7 @@ vpath %.c ../../lib $(OFILES): Makefile *.h */*.h generated/map-shift.h \ ../../include/linux/*.h \ ../../include/asm/*.h \ + ../../../include/linux/xarray.h \ ../../../include/linux/radix-tree.h \ ../../../include/linux/idr.h @@ -44,8 +48,10 @@ radix-tree.c: ../../../lib/radix-tree.c idr.c: ../../../lib/idr.c sed -e 's/^static //' -e 's/__always_inline //' -e 's/inline //' < $< > $@ +xarray.o: ../../../lib/xarray.c ../../../lib/test_xarray.c + generated/map-shift.h: @if ! grep -qws $(SHIFT) generated/map-shift.h; then \ - echo "#define RADIX_TREE_MAP_SHIFT $(SHIFT)" > \ + echo "#define XA_CHUNK_SHIFT $(SHIFT)" > \ generated/map-shift.h; \ fi diff --git a/tools/testing/radix-tree/benchmark.c b/tools/testing/radix-tree/benchmark.c index 99c40f3ed133..7e195ed8e92d 100644 --- a/tools/testing/radix-tree/benchmark.c +++ b/tools/testing/radix-tree/benchmark.c @@ -17,9 +17,6 @@ #include <time.h> #include "test.h" -#define for_each_index(i, base, order) \ - for (i = base; i < base + (1 << order); i++) - #define NSEC_PER_SEC 1000000000L static long long benchmark_iter(struct radix_tree_root *root, bool tagged) @@ -61,7 +58,7 @@ again: } static void benchmark_insert(struct radix_tree_root *root, - unsigned long size, unsigned long step, int order) + unsigned long size, unsigned long step) { struct timespec start, finish; unsigned long index; @@ -70,19 +67,19 @@ static void benchmark_insert(struct radix_tree_root *root, clock_gettime(CLOCK_MONOTONIC, &start); for (index = 0 ; index < size ; index += step) - item_insert_order(root, index, order); + item_insert(root, index); clock_gettime(CLOCK_MONOTONIC, &finish); nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC + (finish.tv_nsec - start.tv_nsec); - printv(2, "Size: %8ld, step: %8ld, order: %d, insertion: %15lld ns\n", - size, step, order, nsec); + printv(2, "Size: %8ld, step: %8ld, insertion: %15lld ns\n", + size, step, nsec); } static void benchmark_tagging(struct radix_tree_root *root, - unsigned long size, unsigned long step, int order) + unsigned long size, unsigned long step) { struct timespec start, finish; unsigned long index; @@ -98,138 +95,53 @@ static void benchmark_tagging(struct radix_tree_root *root, nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC + (finish.tv_nsec - start.tv_nsec); - printv(2, "Size: %8ld, step: %8ld, order: %d, tagging: %17lld ns\n", - size, step, order, nsec); + printv(2, "Size: %8ld, step: %8ld, tagging: %17lld ns\n", + size, step, nsec); } static void benchmark_delete(struct radix_tree_root *root, - unsigned long size, unsigned long step, int order) + unsigned long size, unsigned long step) { struct timespec start, finish; - unsigned long index, i; + unsigned long index; long long nsec; clock_gettime(CLOCK_MONOTONIC, &start); for (index = 0 ; index < size ; index += step) - for_each_index(i, index, order) - item_delete(root, i); + item_delete(root, index); clock_gettime(CLOCK_MONOTONIC, &finish); nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC + (finish.tv_nsec - start.tv_nsec); - printv(2, "Size: %8ld, step: %8ld, order: %d, deletion: %16lld ns\n", - size, step, order, nsec); + printv(2, "Size: %8ld, step: %8ld, deletion: %16lld ns\n", + size, step, nsec); } -static void benchmark_size(unsigned long size, unsigned long step, int order) +static void benchmark_size(unsigned long size, unsigned long step) { RADIX_TREE(tree, GFP_KERNEL); long long normal, tagged; - benchmark_insert(&tree, size, step, order); - benchmark_tagging(&tree, size, step, order); + benchmark_insert(&tree, size, step); + benchmark_tagging(&tree, size, step); tagged = benchmark_iter(&tree, true); normal = benchmark_iter(&tree, false); - printv(2, "Size: %8ld, step: %8ld, order: %d, tagged iteration: %8lld ns\n", - size, step, order, tagged); - printv(2, "Size: %8ld, step: %8ld, order: %d, normal iteration: %8lld ns\n", - size, step, order, normal); + printv(2, "Size: %8ld, step: %8ld, tagged iteration: %8lld ns\n", + size, step, tagged); + printv(2, "Size: %8ld, step: %8ld, normal iteration: %8lld ns\n", + size, step, normal); - benchmark_delete(&tree, size, step, order); + benchmark_delete(&tree, size, step); item_kill_tree(&tree); rcu_barrier(); } -static long long __benchmark_split(unsigned long index, - int old_order, int new_order) -{ - struct timespec start, finish; - long long nsec; - RADIX_TREE(tree, GFP_ATOMIC); - - item_insert_order(&tree, index, old_order); - - clock_gettime(CLOCK_MONOTONIC, &start); - radix_tree_split(&tree, index, new_order); - clock_gettime(CLOCK_MONOTONIC, &finish); - nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC + - (finish.tv_nsec - start.tv_nsec); - - item_kill_tree(&tree); - - return nsec; - -} - -static void benchmark_split(unsigned long size, unsigned long step) -{ - int i, j, idx; - long long nsec = 0; - - - for (idx = 0; idx < size; idx += step) { - for (i = 3; i < 11; i++) { - for (j = 0; j < i; j++) { - nsec += __benchmark_split(idx, i, j); - } - } - } - - printv(2, "Size %8ld, step %8ld, split time %10lld ns\n", - size, step, nsec); - -} - -static long long __benchmark_join(unsigned long index, - unsigned order1, unsigned order2) -{ - unsigned long loc; - struct timespec start, finish; - long long nsec; - void *item, *item2 = item_create(index + 1, order1); - RADIX_TREE(tree, GFP_KERNEL); - - item_insert_order(&tree, index, order2); - item = radix_tree_lookup(&tree, index); - - clock_gettime(CLOCK_MONOTONIC, &start); - radix_tree_join(&tree, index + 1, order1, item2); - clock_gettime(CLOCK_MONOTONIC, &finish); - nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC + - (finish.tv_nsec - start.tv_nsec); - - loc = find_item(&tree, item); - if (loc == -1) - free(item); - - item_kill_tree(&tree); - - return nsec; -} - -static void benchmark_join(unsigned long step) -{ - int i, j, idx; - long long nsec = 0; - - for (idx = 0; idx < 1 << 10; idx += step) { - for (i = 1; i < 15; i++) { - for (j = 0; j < i; j++) { - nsec += __benchmark_join(idx, i, j); - } - } - } - - printv(2, "Size %8d, step %8ld, join time %10lld ns\n", - 1 << 10, step, nsec); -} - void benchmark(void) { unsigned long size[] = {1 << 10, 1 << 20, 0}; @@ -242,16 +154,5 @@ void benchmark(void) for (c = 0; size[c]; c++) for (s = 0; step[s]; s++) - benchmark_size(size[c], step[s], 0); - - for (c = 0; size[c]; c++) - for (s = 0; step[s]; s++) - benchmark_size(size[c], step[s] << 9, 9); - - for (c = 0; size[c]; c++) - for (s = 0; step[s]; s++) - benchmark_split(size[c], step[s]); - - for (s = 0; step[s]; s++) - benchmark_join(step[s]); + benchmark_size(size[c], step[s]); } diff --git a/tools/testing/radix-tree/bitmap.c b/tools/testing/radix-tree/bitmap.c new file mode 100644 index 000000000000..66ec4a24a203 --- /dev/null +++ b/tools/testing/radix-tree/bitmap.c @@ -0,0 +1,23 @@ +/* lib/bitmap.c pulls in at least two other files. */ + +#include <linux/bitmap.h> + +void bitmap_clear(unsigned long *map, unsigned int start, int len) +{ + unsigned long *p = map + BIT_WORD(start); + const unsigned int size = start + len; + int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG); + unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start); + + while (len - bits_to_clear >= 0) { + *p &= ~mask_to_clear; + len -= bits_to_clear; + bits_to_clear = BITS_PER_LONG; + mask_to_clear = ~0UL; + p++; + } + if (len) { + mask_to_clear &= BITMAP_LAST_WORD_MASK(size); + *p &= ~mask_to_clear; + } +} diff --git a/tools/testing/radix-tree/generated/autoconf.h b/tools/testing/radix-tree/generated/autoconf.h index cf88dc5b8832..2218b3cc184e 100644 --- a/tools/testing/radix-tree/generated/autoconf.h +++ b/tools/testing/radix-tree/generated/autoconf.h @@ -1 +1 @@ -#define CONFIG_RADIX_TREE_MULTIORDER 1 +#define CONFIG_XARRAY_MULTI 1 diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c index 321ba92c70d2..1b63bdb7688f 100644 --- a/tools/testing/radix-tree/idr-test.c +++ b/tools/testing/radix-tree/idr-test.c @@ -19,7 +19,7 @@ #include "test.h" -#define DUMMY_PTR ((void *)0x12) +#define DUMMY_PTR ((void *)0x10) int item_idr_free(int id, void *p, void *data) { @@ -227,6 +227,66 @@ void idr_u32_test(int base) idr_u32_test1(&idr, 0xffffffff); } +static void idr_align_test(struct idr *idr) +{ + char name[] = "Motorola 68000"; + int i, id; + void *entry; + + for (i = 0; i < 9; i++) { + BUG_ON(idr_alloc(idr, &name[i], 0, 0, GFP_KERNEL) != i); + idr_for_each_entry(idr, entry, id); + } + idr_destroy(idr); + + for (i = 1; i < 10; i++) { + BUG_ON(idr_alloc(idr, &name[i], 0, 0, GFP_KERNEL) != i - 1); + idr_for_each_entry(idr, entry, id); + } + idr_destroy(idr); + + for (i = 2; i < 11; i++) { + BUG_ON(idr_alloc(idr, &name[i], 0, 0, GFP_KERNEL) != i - 2); + idr_for_each_entry(idr, entry, id); + } + idr_destroy(idr); + + for (i = 3; i < 12; i++) { + BUG_ON(idr_alloc(idr, &name[i], 0, 0, GFP_KERNEL) != i - 3); + idr_for_each_entry(idr, entry, id); + } + idr_destroy(idr); + + for (i = 0; i < 8; i++) { + BUG_ON(idr_alloc(idr, &name[i], 0, 0, GFP_KERNEL) != 0); + BUG_ON(idr_alloc(idr, &name[i + 1], 0, 0, GFP_KERNEL) != 1); + idr_for_each_entry(idr, entry, id); + idr_remove(idr, 1); + idr_for_each_entry(idr, entry, id); + idr_remove(idr, 0); + BUG_ON(!idr_is_empty(idr)); + } + + for (i = 0; i < 8; i++) { + BUG_ON(idr_alloc(idr, NULL, 0, 0, GFP_KERNEL) != 0); + idr_for_each_entry(idr, entry, id); + idr_replace(idr, &name[i], 0); + idr_for_each_entry(idr, entry, id); + BUG_ON(idr_find(idr, 0) != &name[i]); + idr_remove(idr, 0); + } + + for (i = 0; i < 8; i++) { + BUG_ON(idr_alloc(idr, &name[i], 0, 0, GFP_KERNEL) != 0); + BUG_ON(idr_alloc(idr, NULL, 0, 0, GFP_KERNEL) != 1); + idr_remove(idr, 1); + idr_for_each_entry(idr, entry, id); + idr_replace(idr, &name[i + 1], 0); + idr_for_each_entry(idr, entry, id); + idr_remove(idr, 0); + } +} + void idr_checks(void) { unsigned long i; @@ -307,6 +367,7 @@ void idr_checks(void) idr_u32_test(4); idr_u32_test(1); idr_u32_test(0); + idr_align_test(&idr); } #define module_init(x) @@ -344,16 +405,16 @@ void ida_check_conv_user(void) DEFINE_IDA(ida); unsigned long i; - radix_tree_cpu_dead(1); for (i = 0; i < 1000000; i++) { int id = ida_alloc(&ida, GFP_NOWAIT); if (id == -ENOMEM) { - IDA_BUG_ON(&ida, (i % IDA_BITMAP_BITS) != - BITS_PER_LONG - 2); + IDA_BUG_ON(&ida, ((i % IDA_BITMAP_BITS) != + BITS_PER_XA_VALUE) && + ((i % IDA_BITMAP_BITS) != 0)); id = ida_alloc(&ida, GFP_KERNEL); } else { IDA_BUG_ON(&ida, (i % IDA_BITMAP_BITS) == - BITS_PER_LONG - 2); + BITS_PER_XA_VALUE); } IDA_BUG_ON(&ida, id != i); } diff --git a/tools/testing/radix-tree/iteration_check.c b/tools/testing/radix-tree/iteration_check.c index a92bab513701..238db187aa15 100644 --- a/tools/testing/radix-tree/iteration_check.c +++ b/tools/testing/radix-tree/iteration_check.c @@ -1,5 +1,5 @@ /* - * iteration_check.c: test races having to do with radix tree iteration + * iteration_check.c: test races having to do with xarray iteration * Copyright (c) 2016 Intel Corporation * Author: Ross Zwisler <ross.zwisler@linux.intel.com> * @@ -12,41 +12,54 @@ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. */ -#include <linux/radix-tree.h> #include <pthread.h> #include "test.h" #define NUM_THREADS 5 #define MAX_IDX 100 -#define TAG 0 -#define NEW_TAG 1 +#define TAG XA_MARK_0 +#define NEW_TAG XA_MARK_1 -static pthread_mutex_t tree_lock = PTHREAD_MUTEX_INITIALIZER; static pthread_t threads[NUM_THREADS]; static unsigned int seeds[3]; -static RADIX_TREE(tree, GFP_KERNEL); +static DEFINE_XARRAY(array); static bool test_complete; static int max_order; -/* relentlessly fill the tree with tagged entries */ +void my_item_insert(struct xarray *xa, unsigned long index) +{ + XA_STATE(xas, xa, index); + struct item *item = item_create(index, 0); + int order; + +retry: + xas_lock(&xas); + for (order = max_order; order >= 0; order--) { + xas_set_order(&xas, index, order); + item->order = order; + if (xas_find_conflict(&xas)) + continue; + xas_store(&xas, item); + xas_set_mark(&xas, TAG); + break; + } + xas_unlock(&xas); + if (xas_nomem(&xas, GFP_KERNEL)) + goto retry; + if (order < 0) + free(item); +} + +/* relentlessly fill the array with tagged entries */ static void *add_entries_fn(void *arg) { rcu_register_thread(); while (!test_complete) { unsigned long pgoff; - int order; for (pgoff = 0; pgoff < MAX_IDX; pgoff++) { - pthread_mutex_lock(&tree_lock); - for (order = max_order; order >= 0; order--) { - if (item_insert_order(&tree, pgoff, order) - == 0) { - item_tag_set(&tree, pgoff, TAG); - break; - } - } - pthread_mutex_unlock(&tree_lock); + my_item_insert(&array, pgoff); } } @@ -56,33 +69,25 @@ static void *add_entries_fn(void *arg) } /* - * Iterate over the tagged entries, doing a radix_tree_iter_retry() as we find - * things that have been removed and randomly resetting our iteration to the - * next chunk with radix_tree_iter_resume(). Both radix_tree_iter_retry() and - * radix_tree_iter_resume() cause radix_tree_next_slot() to be called with a - * NULL 'slot' variable. + * Iterate over tagged entries, retrying when we find ourselves in a deleted + * node and randomly pausing the iteration. */ static void *tagged_iteration_fn(void *arg) { - struct radix_tree_iter iter; - void **slot; + XA_STATE(xas, &array, 0); + void *entry; rcu_register_thread(); while (!test_complete) { + xas_set(&xas, 0); rcu_read_lock(); - radix_tree_for_each_tagged(slot, &tree, &iter, 0, TAG) { - void *entry = radix_tree_deref_slot(slot); - if (unlikely(!entry)) + xas_for_each_marked(&xas, entry, ULONG_MAX, TAG) { + if (xas_retry(&xas, entry)) continue; - if (radix_tree_deref_retry(entry)) { - slot = radix_tree_iter_retry(&iter); - continue; - } - if (rand_r(&seeds[0]) % 50 == 0) { - slot = radix_tree_iter_resume(slot, &iter); + xas_pause(&xas); rcu_read_unlock(); rcu_barrier(); rcu_read_lock(); @@ -97,33 +102,25 @@ static void *tagged_iteration_fn(void *arg) } /* - * Iterate over the entries, doing a radix_tree_iter_retry() as we find things - * that have been removed and randomly resetting our iteration to the next - * chunk with radix_tree_iter_resume(). Both radix_tree_iter_retry() and - * radix_tree_iter_resume() cause radix_tree_next_slot() to be called with a - * NULL 'slot' variable. + * Iterate over the entries, retrying when we find ourselves in a deleted + * node and randomly pausing the iteration. */ static void *untagged_iteration_fn(void *arg) { - struct radix_tree_iter iter; - void **slot; + XA_STATE(xas, &array, 0); + void *entry; rcu_register_thread(); while (!test_complete) { + xas_set(&xas, 0); rcu_read_lock(); - radix_tree_for_each_slot(slot, &tree, &iter, 0) { - void *entry = radix_tree_deref_slot(slot); - if (unlikely(!entry)) + xas_for_each(&xas, entry, ULONG_MAX) { + if (xas_retry(&xas, entry)) continue; - if (radix_tree_deref_retry(entry)) { - slot = radix_tree_iter_retry(&iter); - continue; - } - if (rand_r(&seeds[1]) % 50 == 0) { - slot = radix_tree_iter_resume(slot, &iter); + xas_pause(&xas); rcu_read_unlock(); rcu_barrier(); rcu_read_lock(); @@ -138,7 +135,7 @@ static void *untagged_iteration_fn(void *arg) } /* - * Randomly remove entries to help induce radix_tree_iter_retry() calls in the + * Randomly remove entries to help induce retries in the * two iteration functions. */ static void *remove_entries_fn(void *arg) @@ -147,12 +144,13 @@ static void *remove_entries_fn(void *arg) while (!test_complete) { int pgoff; + struct item *item; pgoff = rand_r(&seeds[2]) % MAX_IDX; - pthread_mutex_lock(&tree_lock); - item_delete(&tree, pgoff); - pthread_mutex_unlock(&tree_lock); + item = xa_erase(&array, pgoff); + if (item) + item_free(item, pgoff); } rcu_unregister_thread(); @@ -165,8 +163,7 @@ static void *tag_entries_fn(void *arg) rcu_register_thread(); while (!test_complete) { - tag_tagged_items(&tree, &tree_lock, 0, MAX_IDX, 10, TAG, - NEW_TAG); + tag_tagged_items(&array, 0, MAX_IDX, 10, TAG, NEW_TAG); } rcu_unregister_thread(); return NULL; @@ -217,5 +214,5 @@ void iteration_test(unsigned order, unsigned test_duration) } } - item_kill_tree(&tree); + item_kill_tree(&array); } diff --git a/tools/testing/radix-tree/linux/bug.h b/tools/testing/radix-tree/linux/bug.h index 23b8ed52f8c8..03dc8a57eb99 100644 --- a/tools/testing/radix-tree/linux/bug.h +++ b/tools/testing/radix-tree/linux/bug.h @@ -1 +1,2 @@ +#include <stdio.h> #include "asm/bug.h" diff --git a/tools/testing/radix-tree/linux/kconfig.h b/tools/testing/radix-tree/linux/kconfig.h new file mode 100644 index 000000000000..6c8675859913 --- /dev/null +++ b/tools/testing/radix-tree/linux/kconfig.h @@ -0,0 +1 @@ +#include "../../../../include/linux/kconfig.h" diff --git a/tools/testing/radix-tree/linux/kernel.h b/tools/testing/radix-tree/linux/kernel.h index 426f32f28547..4568248222ae 100644 --- a/tools/testing/radix-tree/linux/kernel.h +++ b/tools/testing/radix-tree/linux/kernel.h @@ -14,7 +14,12 @@ #include "../../../include/linux/kconfig.h" #define printk printf +#define pr_info printk #define pr_debug printk #define pr_cont printk +#define __acquires(x) +#define __releases(x) +#define __must_hold(x) + #endif /* _KERNEL_H */ diff --git a/tools/testing/radix-tree/linux/lockdep.h b/tools/testing/radix-tree/linux/lockdep.h new file mode 100644 index 000000000000..565fccdfe6e9 --- /dev/null +++ b/tools/testing/radix-tree/linux/lockdep.h @@ -0,0 +1,11 @@ +#ifndef _LINUX_LOCKDEP_H +#define _LINUX_LOCKDEP_H +struct lock_class_key { + unsigned int a; +}; + +static inline void lockdep_set_class(spinlock_t *lock, + struct lock_class_key *key) +{ +} +#endif /* _LINUX_LOCKDEP_H */ diff --git a/tools/testing/radix-tree/linux/radix-tree.h b/tools/testing/radix-tree/linux/radix-tree.h index 24f13d27a8da..d1635a5bef02 100644 --- a/tools/testing/radix-tree/linux/radix-tree.h +++ b/tools/testing/radix-tree/linux/radix-tree.h @@ -2,7 +2,6 @@ #ifndef _TEST_RADIX_TREE_H #define _TEST_RADIX_TREE_H -#include "generated/map-shift.h" #include "../../../../include/linux/radix-tree.h" extern int kmalloc_verbose; diff --git a/tools/testing/radix-tree/linux/rcupdate.h b/tools/testing/radix-tree/linux/rcupdate.h index 73ed33658203..fd280b070fdb 100644 --- a/tools/testing/radix-tree/linux/rcupdate.h +++ b/tools/testing/radix-tree/linux/rcupdate.h @@ -6,5 +6,7 @@ #define rcu_dereference_raw(p) rcu_dereference(p) #define rcu_dereference_protected(p, cond) rcu_dereference(p) +#define rcu_dereference_check(p, cond) rcu_dereference(p) +#define RCU_INIT_POINTER(p, v) (p) = (v) #endif diff --git a/tools/testing/radix-tree/main.c b/tools/testing/radix-tree/main.c index b741686e53d6..7a22d6e3732e 100644 --- a/tools/testing/radix-tree/main.c +++ b/tools/testing/radix-tree/main.c @@ -214,7 +214,7 @@ void copy_tag_check(void) } // printf("\ncopying tags...\n"); - tagged = tag_tagged_items(&tree, NULL, start, end, ITEMS, 0, 1); + tagged = tag_tagged_items(&tree, start, end, ITEMS, XA_MARK_0, XA_MARK_1); // printf("checking copied tags\n"); assert(tagged == count); @@ -223,7 +223,7 @@ void copy_tag_check(void) /* Copy tags in several rounds */ // printf("\ncopying tags...\n"); tmp = rand() % (count / 10 + 2); - tagged = tag_tagged_items(&tree, NULL, start, end, tmp, 0, 2); + tagged = tag_tagged_items(&tree, start, end, tmp, XA_MARK_0, XA_MARK_2); assert(tagged == count); // printf("%lu %lu %lu\n", tagged, tmp, count); @@ -236,63 +236,6 @@ void copy_tag_check(void) item_kill_tree(&tree); } -static void __locate_check(struct radix_tree_root *tree, unsigned long index, - unsigned order) -{ - struct item *item; - unsigned long index2; - - item_insert_order(tree, index, order); - item = item_lookup(tree, index); - index2 = find_item(tree, item); - if (index != index2) { - printv(2, "index %ld order %d inserted; found %ld\n", - index, order, index2); - abort(); - } -} - -static void __order_0_locate_check(void) -{ - RADIX_TREE(tree, GFP_KERNEL); - int i; - - for (i = 0; i < 50; i++) - __locate_check(&tree, rand() % INT_MAX, 0); - - item_kill_tree(&tree); -} - -static void locate_check(void) -{ - RADIX_TREE(tree, GFP_KERNEL); - unsigned order; - unsigned long offset, index; - - __order_0_locate_check(); - - for (order = 0; order < 20; order++) { - for (offset = 0; offset < (1 << (order + 3)); - offset += (1UL << order)) { - for (index = 0; index < (1UL << (order + 5)); - index += (1UL << order)) { - __locate_check(&tree, index + offset, order); - } - if (find_item(&tree, &tree) != -1) - abort(); - - item_kill_tree(&tree); - } - } - - if (find_item(&tree, &tree) != -1) - abort(); - __locate_check(&tree, -1, 0); - if (find_item(&tree, &tree) != -1) - abort(); - item_kill_tree(&tree); -} - static void single_thread_tests(bool long_run) { int i; @@ -303,10 +246,6 @@ static void single_thread_tests(bool long_run) rcu_barrier(); printv(2, "after multiorder_check: %d allocated, preempt %d\n", nr_allocated, preempt_count); - locate_check(); - rcu_barrier(); - printv(2, "after locate_check: %d allocated, preempt %d\n", - nr_allocated, preempt_count); tag_check(); rcu_barrier(); printv(2, "after tag_check: %d allocated, preempt %d\n", @@ -365,9 +304,11 @@ int main(int argc, char **argv) rcu_register_thread(); radix_tree_init(); + xarray_tests(); regression1_test(); regression2_test(); regression3_test(); + regression4_test(); iteration_test(0, 10 + 90 * long_run); iteration_test(7, 10 + 90 * long_run); single_thread_tests(long_run); diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c index 7bf405638b0b..ff27a74d9762 100644 --- a/tools/testing/radix-tree/multiorder.c +++ b/tools/testing/radix-tree/multiorder.c @@ -20,230 +20,39 @@ #include "test.h" -#define for_each_index(i, base, order) \ - for (i = base; i < base + (1 << order); i++) - -static void __multiorder_tag_test(int index, int order) -{ - RADIX_TREE(tree, GFP_KERNEL); - int base, err, i; - - /* our canonical entry */ - base = index & ~((1 << order) - 1); - - printv(2, "Multiorder tag test with index %d, canonical entry %d\n", - index, base); - - err = item_insert_order(&tree, index, order); - assert(!err); - - /* - * Verify we get collisions for covered indices. We try and fail to - * insert an exceptional entry so we don't leak memory via - * item_insert_order(). - */ - for_each_index(i, base, order) { - err = __radix_tree_insert(&tree, i, order, - (void *)(0xA0 | RADIX_TREE_EXCEPTIONAL_ENTRY)); - assert(err == -EEXIST); - } - - for_each_index(i, base, order) { - assert(!radix_tree_tag_get(&tree, i, 0)); - assert(!radix_tree_tag_get(&tree, i, 1)); - } - - assert(radix_tree_tag_set(&tree, index, 0)); - - for_each_index(i, base, order) { - assert(radix_tree_tag_get(&tree, i, 0)); - assert(!radix_tree_tag_get(&tree, i, 1)); - } - - assert(tag_tagged_items(&tree, NULL, 0, ~0UL, 10, 0, 1) == 1); - assert(radix_tree_tag_clear(&tree, index, 0)); - - for_each_index(i, base, order) { - assert(!radix_tree_tag_get(&tree, i, 0)); - assert(radix_tree_tag_get(&tree, i, 1)); - } - - assert(radix_tree_tag_clear(&tree, index, 1)); - - assert(!radix_tree_tagged(&tree, 0)); - assert(!radix_tree_tagged(&tree, 1)); - - item_kill_tree(&tree); -} - -static void __multiorder_tag_test2(unsigned order, unsigned long index2) +static int item_insert_order(struct xarray *xa, unsigned long index, + unsigned order) { - RADIX_TREE(tree, GFP_KERNEL); - unsigned long index = (1 << order); - index2 += index; - - assert(item_insert_order(&tree, 0, order) == 0); - assert(item_insert(&tree, index2) == 0); - - assert(radix_tree_tag_set(&tree, 0, 0)); - assert(radix_tree_tag_set(&tree, index2, 0)); - - assert(tag_tagged_items(&tree, NULL, 0, ~0UL, 10, 0, 1) == 2); - - item_kill_tree(&tree); -} - -static void multiorder_tag_tests(void) -{ - int i, j; - - /* test multi-order entry for indices 0-7 with no sibling pointers */ - __multiorder_tag_test(0, 3); - __multiorder_tag_test(5, 3); - - /* test multi-order entry for indices 8-15 with no sibling pointers */ - __multiorder_tag_test(8, 3); - __multiorder_tag_test(15, 3); - - /* - * Our order 5 entry covers indices 0-31 in a tree with height=2. - * This is broken up as follows: - * 0-7: canonical entry - * 8-15: sibling 1 - * 16-23: sibling 2 - * 24-31: sibling 3 - */ - __multiorder_tag_test(0, 5); - __multiorder_tag_test(29, 5); - - /* same test, but with indices 32-63 */ - __multiorder_tag_test(32, 5); - __multiorder_tag_test(44, 5); - - /* - * Our order 8 entry covers indices 0-255 in a tree with height=3. - * This is broken up as follows: - * 0-63: canonical entry - * 64-127: sibling 1 - * 128-191: sibling 2 - * 192-255: sibling 3 - */ - __multiorder_tag_test(0, 8); - __multiorder_tag_test(190, 8); - - /* same test, but with indices 256-511 */ - __multiorder_tag_test(256, 8); - __multiorder_tag_test(300, 8); - - __multiorder_tag_test(0x12345678UL, 8); - - for (i = 1; i < 10; i++) - for (j = 0; j < (10 << i); j++) - __multiorder_tag_test2(i, j); -} - -static void multiorder_check(unsigned long index, int order) -{ - unsigned long i; - unsigned long min = index & ~((1UL << order) - 1); - unsigned long max = min + (1UL << order); - void **slot; - struct item *item2 = item_create(min, order); - RADIX_TREE(tree, GFP_KERNEL); - - printv(2, "Multiorder index %ld, order %d\n", index, order); - - assert(item_insert_order(&tree, index, order) == 0); - - for (i = min; i < max; i++) { - struct item *item = item_lookup(&tree, i); - assert(item != 0); - assert(item->index == index); - } - for (i = 0; i < min; i++) - item_check_absent(&tree, i); - for (i = max; i < 2*max; i++) - item_check_absent(&tree, i); - for (i = min; i < max; i++) - assert(radix_tree_insert(&tree, i, item2) == -EEXIST); - - slot = radix_tree_lookup_slot(&tree, index); - free(*slot); - radix_tree_replace_slot(&tree, slot, item2); - for (i = min; i < max; i++) { - struct item *item = item_lookup(&tree, i); - assert(item != 0); - assert(item->index == min); - } - - assert(item_delete(&tree, min) != 0); - - for (i = 0; i < 2*max; i++) - item_check_absent(&tree, i); -} - -static void multiorder_shrink(unsigned long index, int order) -{ - unsigned long i; - unsigned long max = 1 << order; - RADIX_TREE(tree, GFP_KERNEL); - struct radix_tree_node *node; - - printv(2, "Multiorder shrink index %ld, order %d\n", index, order); + XA_STATE_ORDER(xas, xa, index, order); + struct item *item = item_create(index, order); - assert(item_insert_order(&tree, 0, order) == 0); - - node = tree.rnode; - - assert(item_insert(&tree, index) == 0); - assert(node != tree.rnode); - - assert(item_delete(&tree, index) != 0); - assert(node == tree.rnode); - - for (i = 0; i < max; i++) { - struct item *item = item_lookup(&tree, i); - assert(item != 0); - assert(item->index == 0); - } - for (i = max; i < 2*max; i++) - item_check_absent(&tree, i); - - if (!item_delete(&tree, 0)) { - printv(2, "failed to delete index %ld (order %d)\n", index, order); - abort(); - } - - for (i = 0; i < 2*max; i++) - item_check_absent(&tree, i); -} - -static void multiorder_insert_bug(void) -{ - RADIX_TREE(tree, GFP_KERNEL); + do { + xas_lock(&xas); + xas_store(&xas, item); + xas_unlock(&xas); + } while (xas_nomem(&xas, GFP_KERNEL)); - item_insert(&tree, 0); - radix_tree_tag_set(&tree, 0, 0); - item_insert_order(&tree, 3 << 6, 6); + if (!xas_error(&xas)) + return 0; - item_kill_tree(&tree); + free(item); + return xas_error(&xas); } -void multiorder_iteration(void) +void multiorder_iteration(struct xarray *xa) { - RADIX_TREE(tree, GFP_KERNEL); - struct radix_tree_iter iter; - void **slot; + XA_STATE(xas, xa, 0); + struct item *item; int i, j, err; - printv(1, "Multiorder iteration test\n"); - #define NUM_ENTRIES 11 int index[NUM_ENTRIES] = {0, 2, 4, 8, 16, 32, 34, 36, 64, 72, 128}; int order[NUM_ENTRIES] = {1, 1, 2, 3, 4, 1, 0, 1, 3, 0, 7}; + printv(1, "Multiorder iteration test\n"); + for (i = 0; i < NUM_ENTRIES; i++) { - err = item_insert_order(&tree, index[i], order[i]); + err = item_insert_order(xa, index[i], order[i]); assert(!err); } @@ -252,14 +61,14 @@ void multiorder_iteration(void) if (j <= (index[i] | ((1 << order[i]) - 1))) break; - radix_tree_for_each_slot(slot, &tree, &iter, j) { - int height = order[i] / RADIX_TREE_MAP_SHIFT; - int shift = height * RADIX_TREE_MAP_SHIFT; + xas_set(&xas, j); + xas_for_each(&xas, item, ULONG_MAX) { + int height = order[i] / XA_CHUNK_SHIFT; + int shift = height * XA_CHUNK_SHIFT; unsigned long mask = (1UL << order[i]) - 1; - struct item *item = *slot; - assert((iter.index | mask) == (index[i] | mask)); - assert(iter.shift == shift); + assert((xas.xa_index | mask) == (index[i] | mask)); + assert(xas.xa_node->shift == shift); assert(!radix_tree_is_internal_node(item)); assert((item->index | mask) == (index[i] | mask)); assert(item->order == order[i]); @@ -267,18 +76,15 @@ void multiorder_iteration(void) } } - item_kill_tree(&tree); + item_kill_tree(xa); } -void multiorder_tagged_iteration(void) +void multiorder_tagged_iteration(struct xarray *xa) { - RADIX_TREE(tree, GFP_KERNEL); - struct radix_tree_iter iter; - void **slot; + XA_STATE(xas, xa, 0); + struct item *item; int i, j; - printv(1, "Multiorder tagged iteration test\n"); - #define MT_NUM_ENTRIES 9 int index[MT_NUM_ENTRIES] = {0, 2, 4, 16, 32, 40, 64, 72, 128}; int order[MT_NUM_ENTRIES] = {1, 0, 2, 4, 3, 1, 3, 0, 7}; @@ -286,13 +92,15 @@ void multiorder_tagged_iteration(void) #define TAG_ENTRIES 7 int tag_index[TAG_ENTRIES] = {0, 4, 16, 40, 64, 72, 128}; + printv(1, "Multiorder tagged iteration test\n"); + for (i = 0; i < MT_NUM_ENTRIES; i++) - assert(!item_insert_order(&tree, index[i], order[i])); + assert(!item_insert_order(xa, index[i], order[i])); - assert(!radix_tree_tagged(&tree, 1)); + assert(!xa_marked(xa, XA_MARK_1)); for (i = 0; i < TAG_ENTRIES; i++) - assert(radix_tree_tag_set(&tree, tag_index[i], 1)); + xa_set_mark(xa, tag_index[i], XA_MARK_1); for (j = 0; j < 256; j++) { int k; @@ -304,23 +112,23 @@ void multiorder_tagged_iteration(void) break; } - radix_tree_for_each_tagged(slot, &tree, &iter, j, 1) { + xas_set(&xas, j); + xas_for_each_marked(&xas, item, ULONG_MAX, XA_MARK_1) { unsigned long mask; - struct item *item = *slot; for (k = i; index[k] < tag_index[i]; k++) ; mask = (1UL << order[k]) - 1; - assert((iter.index | mask) == (tag_index[i] | mask)); - assert(!radix_tree_is_internal_node(item)); + assert((xas.xa_index | mask) == (tag_index[i] | mask)); + assert(!xa_is_internal(item)); assert((item->index | mask) == (tag_index[i] | mask)); assert(item->order == order[k]); i++; } } - assert(tag_tagged_items(&tree, NULL, 0, ~0UL, TAG_ENTRIES, 1, 2) == - TAG_ENTRIES); + assert(tag_tagged_items(xa, 0, ULONG_MAX, TAG_ENTRIES, XA_MARK_1, + XA_MARK_2) == TAG_ENTRIES); for (j = 0; j < 256; j++) { int mask, k; @@ -332,297 +140,31 @@ void multiorder_tagged_iteration(void) break; } - radix_tree_for_each_tagged(slot, &tree, &iter, j, 2) { - struct item *item = *slot; + xas_set(&xas, j); + xas_for_each_marked(&xas, item, ULONG_MAX, XA_MARK_2) { for (k = i; index[k] < tag_index[i]; k++) ; mask = (1 << order[k]) - 1; - assert((iter.index | mask) == (tag_index[i] | mask)); - assert(!radix_tree_is_internal_node(item)); + assert((xas.xa_index | mask) == (tag_index[i] | mask)); + assert(!xa_is_internal(item)); assert((item->index | mask) == (tag_index[i] | mask)); assert(item->order == order[k]); i++; } } - assert(tag_tagged_items(&tree, NULL, 1, ~0UL, MT_NUM_ENTRIES * 2, 1, 0) - == TAG_ENTRIES); + assert(tag_tagged_items(xa, 1, ULONG_MAX, MT_NUM_ENTRIES * 2, XA_MARK_1, + XA_MARK_0) == TAG_ENTRIES); i = 0; - radix_tree_for_each_tagged(slot, &tree, &iter, 0, 0) { - assert(iter.index == tag_index[i]); + xas_set(&xas, 0); + xas_for_each_marked(&xas, item, ULONG_MAX, XA_MARK_0) { + assert(xas.xa_index == tag_index[i]); i++; } + assert(i == TAG_ENTRIES); - item_kill_tree(&tree); -} - -/* - * Basic join checks: make sure we can't find an entry in the tree after - * a larger entry has replaced it - */ -static void multiorder_join1(unsigned long index, - unsigned order1, unsigned order2) -{ - unsigned long loc; - void *item, *item2 = item_create(index + 1, order1); - RADIX_TREE(tree, GFP_KERNEL); - - item_insert_order(&tree, index, order2); - item = radix_tree_lookup(&tree, index); - radix_tree_join(&tree, index + 1, order1, item2); - loc = find_item(&tree, item); - if (loc == -1) - free(item); - item = radix_tree_lookup(&tree, index + 1); - assert(item == item2); - item_kill_tree(&tree); -} - -/* - * Check that the accounting of exceptional entries is handled correctly - * by joining an exceptional entry to a normal pointer. - */ -static void multiorder_join2(unsigned order1, unsigned order2) -{ - RADIX_TREE(tree, GFP_KERNEL); - struct radix_tree_node *node; - void *item1 = item_create(0, order1); - void *item2; - - item_insert_order(&tree, 0, order2); - radix_tree_insert(&tree, 1 << order2, (void *)0x12UL); - item2 = __radix_tree_lookup(&tree, 1 << order2, &node, NULL); - assert(item2 == (void *)0x12UL); - assert(node->exceptional == 1); - - item2 = radix_tree_lookup(&tree, 0); - free(item2); - - radix_tree_join(&tree, 0, order1, item1); - item2 = __radix_tree_lookup(&tree, 1 << order2, &node, NULL); - assert(item2 == item1); - assert(node->exceptional == 0); - item_kill_tree(&tree); -} - -/* - * This test revealed an accounting bug for exceptional entries at one point. - * Nodes were being freed back into the pool with an elevated exception count - * by radix_tree_join() and then radix_tree_split() was failing to zero the - * count of exceptional entries. - */ -static void multiorder_join3(unsigned int order) -{ - RADIX_TREE(tree, GFP_KERNEL); - struct radix_tree_node *node; - void **slot; - struct radix_tree_iter iter; - unsigned long i; - - for (i = 0; i < (1 << order); i++) { - radix_tree_insert(&tree, i, (void *)0x12UL); - } - - radix_tree_join(&tree, 0, order, (void *)0x16UL); - rcu_barrier(); - - radix_tree_split(&tree, 0, 0); - - radix_tree_for_each_slot(slot, &tree, &iter, 0) { - radix_tree_iter_replace(&tree, &iter, slot, (void *)0x12UL); - } - - __radix_tree_lookup(&tree, 0, &node, NULL); - assert(node->exceptional == node->count); - - item_kill_tree(&tree); -} - -static void multiorder_join(void) -{ - int i, j, idx; - - for (idx = 0; idx < 1024; idx = idx * 2 + 3) { - for (i = 1; i < 15; i++) { - for (j = 0; j < i; j++) { - multiorder_join1(idx, i, j); - } - } - } - - for (i = 1; i < 15; i++) { - for (j = 0; j < i; j++) { - multiorder_join2(i, j); - } - } - - for (i = 3; i < 10; i++) { - multiorder_join3(i); - } -} - -static void check_mem(unsigned old_order, unsigned new_order, unsigned alloc) -{ - struct radix_tree_preload *rtp = &radix_tree_preloads; - if (rtp->nr != 0) - printv(2, "split(%u %u) remaining %u\n", old_order, new_order, - rtp->nr); - /* - * Can't check for equality here as some nodes may have been - * RCU-freed while we ran. But we should never finish with more - * nodes allocated since they should have all been preloaded. - */ - if (nr_allocated > alloc) - printv(2, "split(%u %u) allocated %u %u\n", old_order, new_order, - alloc, nr_allocated); -} - -static void __multiorder_split(int old_order, int new_order) -{ - RADIX_TREE(tree, GFP_ATOMIC); - void **slot; - struct radix_tree_iter iter; - unsigned alloc; - struct item *item; - - radix_tree_preload(GFP_KERNEL); - assert(item_insert_order(&tree, 0, old_order) == 0); - radix_tree_preload_end(); - - /* Wipe out the preloaded cache or it'll confuse check_mem() */ - radix_tree_cpu_dead(0); - - item = radix_tree_tag_set(&tree, 0, 2); - - radix_tree_split_preload(old_order, new_order, GFP_KERNEL); - alloc = nr_allocated; - radix_tree_split(&tree, 0, new_order); - check_mem(old_order, new_order, alloc); - radix_tree_for_each_slot(slot, &tree, &iter, 0) { - radix_tree_iter_replace(&tree, &iter, slot, - item_create(iter.index, new_order)); - } - radix_tree_preload_end(); - - item_kill_tree(&tree); - free(item); -} - -static void __multiorder_split2(int old_order, int new_order) -{ - RADIX_TREE(tree, GFP_KERNEL); - void **slot; - struct radix_tree_iter iter; - struct radix_tree_node *node; - void *item; - - __radix_tree_insert(&tree, 0, old_order, (void *)0x12); - - item = __radix_tree_lookup(&tree, 0, &node, NULL); - assert(item == (void *)0x12); - assert(node->exceptional > 0); - - radix_tree_split(&tree, 0, new_order); - radix_tree_for_each_slot(slot, &tree, &iter, 0) { - radix_tree_iter_replace(&tree, &iter, slot, - item_create(iter.index, new_order)); - } - - item = __radix_tree_lookup(&tree, 0, &node, NULL); - assert(item != (void *)0x12); - assert(node->exceptional == 0); - - item_kill_tree(&tree); -} - -static void __multiorder_split3(int old_order, int new_order) -{ - RADIX_TREE(tree, GFP_KERNEL); - void **slot; - struct radix_tree_iter iter; - struct radix_tree_node *node; - void *item; - - __radix_tree_insert(&tree, 0, old_order, (void *)0x12); - - item = __radix_tree_lookup(&tree, 0, &node, NULL); - assert(item == (void *)0x12); - assert(node->exceptional > 0); - - radix_tree_split(&tree, 0, new_order); - radix_tree_for_each_slot(slot, &tree, &iter, 0) { - radix_tree_iter_replace(&tree, &iter, slot, (void *)0x16); - } - - item = __radix_tree_lookup(&tree, 0, &node, NULL); - assert(item == (void *)0x16); - assert(node->exceptional > 0); - - item_kill_tree(&tree); - - __radix_tree_insert(&tree, 0, old_order, (void *)0x12); - - item = __radix_tree_lookup(&tree, 0, &node, NULL); - assert(item == (void *)0x12); - assert(node->exceptional > 0); - - radix_tree_split(&tree, 0, new_order); - radix_tree_for_each_slot(slot, &tree, &iter, 0) { - if (iter.index == (1 << new_order)) - radix_tree_iter_replace(&tree, &iter, slot, - (void *)0x16); - else - radix_tree_iter_replace(&tree, &iter, slot, NULL); - } - - item = __radix_tree_lookup(&tree, 1 << new_order, &node, NULL); - assert(item == (void *)0x16); - assert(node->count == node->exceptional); - do { - node = node->parent; - if (!node) - break; - assert(node->count == 1); - assert(node->exceptional == 0); - } while (1); - - item_kill_tree(&tree); -} - -static void multiorder_split(void) -{ - int i, j; - - for (i = 3; i < 11; i++) - for (j = 0; j < i; j++) { - __multiorder_split(i, j); - __multiorder_split2(i, j); - __multiorder_split3(i, j); - } -} - -static void multiorder_account(void) -{ - RADIX_TREE(tree, GFP_KERNEL); - struct radix_tree_node *node; - void **slot; - - item_insert_order(&tree, 0, 5); - - __radix_tree_insert(&tree, 1 << 5, 5, (void *)0x12); - __radix_tree_lookup(&tree, 0, &node, NULL); - assert(node->count == node->exceptional * 2); - radix_tree_delete(&tree, 1 << 5); - assert(node->exceptional == 0); - - __radix_tree_insert(&tree, 1 << 5, 5, (void *)0x12); - __radix_tree_lookup(&tree, 1 << 5, &node, &slot); - assert(node->count == node->exceptional * 2); - __radix_tree_replace(&tree, node, slot, NULL, NULL); - assert(node->exceptional == 0); - - item_kill_tree(&tree); + item_kill_tree(xa); } bool stop_iteration = false; @@ -645,68 +187,45 @@ static void *creator_func(void *ptr) static void *iterator_func(void *ptr) { - struct radix_tree_root *tree = ptr; - struct radix_tree_iter iter; + XA_STATE(xas, ptr, 0); struct item *item; - void **slot; while (!stop_iteration) { rcu_read_lock(); - radix_tree_for_each_slot(slot, tree, &iter, 0) { - item = radix_tree_deref_slot(slot); - - if (!item) + xas_for_each(&xas, item, ULONG_MAX) { + if (xas_retry(&xas, item)) continue; - if (radix_tree_deref_retry(item)) { - slot = radix_tree_iter_retry(&iter); - continue; - } - item_sanity(item, iter.index); + item_sanity(item, xas.xa_index); } rcu_read_unlock(); } return NULL; } -static void multiorder_iteration_race(void) +static void multiorder_iteration_race(struct xarray *xa) { const int num_threads = sysconf(_SC_NPROCESSORS_ONLN); pthread_t worker_thread[num_threads]; - RADIX_TREE(tree, GFP_KERNEL); int i; - pthread_create(&worker_thread[0], NULL, &creator_func, &tree); + pthread_create(&worker_thread[0], NULL, &creator_func, xa); for (i = 1; i < num_threads; i++) - pthread_create(&worker_thread[i], NULL, &iterator_func, &tree); + pthread_create(&worker_thread[i], NULL, &iterator_func, xa); for (i = 0; i < num_threads; i++) pthread_join(worker_thread[i], NULL); - item_kill_tree(&tree); + item_kill_tree(xa); } +static DEFINE_XARRAY(array); + void multiorder_checks(void) { - int i; - - for (i = 0; i < 20; i++) { - multiorder_check(200, i); - multiorder_check(0, i); - multiorder_check((1UL << i) + 1, i); - } - - for (i = 0; i < 15; i++) - multiorder_shrink((1UL << (i + RADIX_TREE_MAP_SHIFT)), i); - - multiorder_insert_bug(); - multiorder_tag_tests(); - multiorder_iteration(); - multiorder_tagged_iteration(); - multiorder_join(); - multiorder_split(); - multiorder_account(); - multiorder_iteration_race(); + multiorder_iteration(&array); + multiorder_tagged_iteration(&array); + multiorder_iteration_race(&array); radix_tree_cpu_dead(0); } diff --git a/tools/testing/radix-tree/regression.h b/tools/testing/radix-tree/regression.h index 3c8a1584e9ee..135145af18b7 100644 --- a/tools/testing/radix-tree/regression.h +++ b/tools/testing/radix-tree/regression.h @@ -5,5 +5,6 @@ void regression1_test(void); void regression2_test(void); void regression3_test(void); +void regression4_test(void); #endif diff --git a/tools/testing/radix-tree/regression1.c b/tools/testing/radix-tree/regression1.c index 0aece092f40e..a61c7bcbc72d 100644 --- a/tools/testing/radix-tree/regression1.c +++ b/tools/testing/radix-tree/regression1.c @@ -44,7 +44,6 @@ #include "regression.h" static RADIX_TREE(mt_tree, GFP_KERNEL); -static pthread_mutex_t mt_lock = PTHREAD_MUTEX_INITIALIZER; struct page { pthread_mutex_t lock; @@ -53,12 +52,12 @@ struct page { unsigned long index; }; -static struct page *page_alloc(void) +static struct page *page_alloc(int index) { struct page *p; p = malloc(sizeof(struct page)); p->count = 1; - p->index = 1; + p->index = index; pthread_mutex_init(&p->lock, NULL); return p; @@ -80,53 +79,33 @@ static void page_free(struct page *p) static unsigned find_get_pages(unsigned long start, unsigned int nr_pages, struct page **pages) { - unsigned int i; - unsigned int ret; - unsigned int nr_found; + XA_STATE(xas, &mt_tree, start); + struct page *page; + unsigned int ret = 0; rcu_read_lock(); -restart: - nr_found = radix_tree_gang_lookup_slot(&mt_tree, - (void ***)pages, NULL, start, nr_pages); - ret = 0; - for (i = 0; i < nr_found; i++) { - struct page *page; -repeat: - page = radix_tree_deref_slot((void **)pages[i]); - if (unlikely(!page)) + xas_for_each(&xas, page, ULONG_MAX) { + if (xas_retry(&xas, page)) continue; - if (radix_tree_exception(page)) { - if (radix_tree_deref_retry(page)) { - /* - * Transient condition which can only trigger - * when entry at index 0 moves out of or back - * to root: none yet gotten, safe to restart. - */ - assert((start | i) == 0); - goto restart; - } - /* - * No exceptional entries are inserted in this test. - */ - assert(0); - } - pthread_mutex_lock(&page->lock); - if (!page->count) { - pthread_mutex_unlock(&page->lock); - goto repeat; - } + if (!page->count) + goto unlock; + /* don't actually update page refcount */ pthread_mutex_unlock(&page->lock); /* Has the page moved? */ - if (unlikely(page != *((void **)pages[i]))) { - goto repeat; - } + if (unlikely(page != xas_reload(&xas))) + goto put_page; pages[ret] = page; ret++; + continue; +unlock: + pthread_mutex_unlock(&page->lock); +put_page: + xas_reset(&xas); } rcu_read_unlock(); return ret; @@ -145,30 +124,30 @@ static void *regression1_fn(void *arg) for (j = 0; j < 1000000; j++) { struct page *p; - p = page_alloc(); - pthread_mutex_lock(&mt_lock); + p = page_alloc(0); + xa_lock(&mt_tree); radix_tree_insert(&mt_tree, 0, p); - pthread_mutex_unlock(&mt_lock); + xa_unlock(&mt_tree); - p = page_alloc(); - pthread_mutex_lock(&mt_lock); + p = page_alloc(1); + xa_lock(&mt_tree); radix_tree_insert(&mt_tree, 1, p); - pthread_mutex_unlock(&mt_lock); + xa_unlock(&mt_tree); - pthread_mutex_lock(&mt_lock); + xa_lock(&mt_tree); p = radix_tree_delete(&mt_tree, 1); pthread_mutex_lock(&p->lock); p->count--; pthread_mutex_unlock(&p->lock); - pthread_mutex_unlock(&mt_lock); + xa_unlock(&mt_tree); page_free(p); - pthread_mutex_lock(&mt_lock); + xa_lock(&mt_tree); p = radix_tree_delete(&mt_tree, 0); pthread_mutex_lock(&p->lock); p->count--; pthread_mutex_unlock(&p->lock); - pthread_mutex_unlock(&mt_lock); + xa_unlock(&mt_tree); page_free(p); } } else { diff --git a/tools/testing/radix-tree/regression2.c b/tools/testing/radix-tree/regression2.c index 424b91c77831..f2c7e640a919 100644 --- a/tools/testing/radix-tree/regression2.c +++ b/tools/testing/radix-tree/regression2.c @@ -53,9 +53,9 @@ #include "regression.h" #include "test.h" -#define PAGECACHE_TAG_DIRTY 0 -#define PAGECACHE_TAG_WRITEBACK 1 -#define PAGECACHE_TAG_TOWRITE 2 +#define PAGECACHE_TAG_DIRTY XA_MARK_0 +#define PAGECACHE_TAG_WRITEBACK XA_MARK_1 +#define PAGECACHE_TAG_TOWRITE XA_MARK_2 static RADIX_TREE(mt_tree, GFP_KERNEL); unsigned long page_count = 0; @@ -92,7 +92,7 @@ void regression2_test(void) /* 1. */ start = 0; end = max_slots - 2; - tag_tagged_items(&mt_tree, NULL, start, end, 1, + tag_tagged_items(&mt_tree, start, end, 1, PAGECACHE_TAG_DIRTY, PAGECACHE_TAG_TOWRITE); /* 2. */ diff --git a/tools/testing/radix-tree/regression3.c b/tools/testing/radix-tree/regression3.c index ace2543c3eda..9f9a3b280f56 100644 --- a/tools/testing/radix-tree/regression3.c +++ b/tools/testing/radix-tree/regression3.c @@ -69,21 +69,6 @@ void regression3_test(void) continue; } } - radix_tree_delete(&root, 1); - - first = true; - radix_tree_for_each_contig(slot, &root, &iter, 0) { - printv(2, "contig %ld %p\n", iter.index, *slot); - if (first) { - radix_tree_insert(&root, 1, ptr); - first = false; - } - if (radix_tree_deref_retry(*slot)) { - printv(2, "retry at %ld\n", iter.index); - slot = radix_tree_iter_retry(&iter); - continue; - } - } radix_tree_for_each_slot(slot, &root, &iter, 0) { printv(2, "slot %ld %p\n", iter.index, *slot); @@ -93,14 +78,6 @@ void regression3_test(void) } } - radix_tree_for_each_contig(slot, &root, &iter, 0) { - printv(2, "contig %ld %p\n", iter.index, *slot); - if (!iter.index) { - printv(2, "next at %ld\n", iter.index); - slot = radix_tree_iter_resume(slot, &iter); - } - } - radix_tree_tag_set(&root, 0, 0); radix_tree_tag_set(&root, 1, 0); radix_tree_for_each_tagged(slot, &root, &iter, 0, 0) { diff --git a/tools/testing/radix-tree/regression4.c b/tools/testing/radix-tree/regression4.c new file mode 100644 index 000000000000..cf4e5aba6b08 --- /dev/null +++ b/tools/testing/radix-tree/regression4.c @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/kernel.h> +#include <linux/gfp.h> +#include <linux/slab.h> +#include <linux/radix-tree.h> +#include <linux/rcupdate.h> +#include <stdlib.h> +#include <pthread.h> +#include <stdio.h> +#include <assert.h> + +#include "regression.h" + +static pthread_barrier_t worker_barrier; +static int obj0, obj1; +static RADIX_TREE(mt_tree, GFP_KERNEL); + +static void *reader_fn(void *arg) +{ + int i; + void *entry; + + rcu_register_thread(); + pthread_barrier_wait(&worker_barrier); + + for (i = 0; i < 1000000; i++) { + rcu_read_lock(); + entry = radix_tree_lookup(&mt_tree, 0); + rcu_read_unlock(); + if (entry != &obj0) { + printf("iteration %d bad entry = %p\n", i, entry); + abort(); + } + } + + rcu_unregister_thread(); + + return NULL; +} + +static void *writer_fn(void *arg) +{ + int i; + + rcu_register_thread(); + pthread_barrier_wait(&worker_barrier); + + for (i = 0; i < 1000000; i++) { + radix_tree_insert(&mt_tree, 1, &obj1); + radix_tree_delete(&mt_tree, 1); + } + + rcu_unregister_thread(); + + return NULL; +} + +void regression4_test(void) +{ + pthread_t reader, writer; + + printv(1, "regression test 4 starting\n"); + + radix_tree_insert(&mt_tree, 0, &obj0); + pthread_barrier_init(&worker_barrier, NULL, 2); + + if (pthread_create(&reader, NULL, reader_fn, NULL) || + pthread_create(&writer, NULL, writer_fn, NULL)) { + perror("pthread_create"); + exit(1); + } + + if (pthread_join(reader, NULL) || pthread_join(writer, NULL)) { + perror("pthread_join"); + exit(1); + } + + printv(1, "regression test 4 passed\n"); +} diff --git a/tools/testing/radix-tree/tag_check.c b/tools/testing/radix-tree/tag_check.c index 543181e4847b..f898957b1a19 100644 --- a/tools/testing/radix-tree/tag_check.c +++ b/tools/testing/radix-tree/tag_check.c @@ -24,7 +24,7 @@ __simple_checks(struct radix_tree_root *tree, unsigned long index, int tag) item_tag_set(tree, index, tag); ret = item_tag_get(tree, index, tag); assert(ret != 0); - ret = tag_tagged_items(tree, NULL, first, ~0UL, 10, tag, !tag); + ret = tag_tagged_items(tree, first, ~0UL, 10, tag, !tag); assert(ret == 1); ret = item_tag_get(tree, index, !tag); assert(ret != 0); @@ -321,7 +321,7 @@ static void single_check(void) assert(ret == 0); verify_tag_consistency(&tree, 0); verify_tag_consistency(&tree, 1); - ret = tag_tagged_items(&tree, NULL, first, 10, 10, 0, 1); + ret = tag_tagged_items(&tree, first, 10, 10, XA_MARK_0, XA_MARK_1); assert(ret == 1); ret = radix_tree_gang_lookup_tag(&tree, (void **)items, 0, BATCH, 1); assert(ret == 1); @@ -331,34 +331,6 @@ static void single_check(void) item_kill_tree(&tree); } -void radix_tree_clear_tags_test(void) -{ - unsigned long index; - struct radix_tree_node *node; - struct radix_tree_iter iter; - void **slot; - - RADIX_TREE(tree, GFP_KERNEL); - - item_insert(&tree, 0); - item_tag_set(&tree, 0, 0); - __radix_tree_lookup(&tree, 0, &node, &slot); - radix_tree_clear_tags(&tree, node, slot); - assert(item_tag_get(&tree, 0, 0) == 0); - - for (index = 0; index < 1000; index++) { - item_insert(&tree, index); - item_tag_set(&tree, index, 0); - } - - radix_tree_for_each_slot(slot, &tree, &iter, 0) { - radix_tree_clear_tags(&tree, iter.node, slot); - assert(item_tag_get(&tree, iter.index, 0) == 0); - } - - item_kill_tree(&tree); -} - void tag_check(void) { single_check(); @@ -376,5 +348,4 @@ void tag_check(void) thrash_tags(); rcu_barrier(); printv(2, "after thrash_tags: %d allocated\n", nr_allocated); - radix_tree_clear_tags_test(); } diff --git a/tools/testing/radix-tree/test.c b/tools/testing/radix-tree/test.c index def6015570b2..a15d0512e633 100644 --- a/tools/testing/radix-tree/test.c +++ b/tools/testing/radix-tree/test.c @@ -25,11 +25,6 @@ int item_tag_get(struct radix_tree_root *root, unsigned long index, int tag) return radix_tree_tag_get(root, index, tag); } -int __item_insert(struct radix_tree_root *root, struct item *item) -{ - return __radix_tree_insert(root, item->index, item->order, item); -} - struct item *item_create(unsigned long index, unsigned int order) { struct item *ret = malloc(sizeof(*ret)); @@ -39,21 +34,15 @@ struct item *item_create(unsigned long index, unsigned int order) return ret; } -int item_insert_order(struct radix_tree_root *root, unsigned long index, - unsigned order) +int item_insert(struct radix_tree_root *root, unsigned long index) { - struct item *item = item_create(index, order); - int err = __item_insert(root, item); + struct item *item = item_create(index, 0); + int err = radix_tree_insert(root, item->index, item); if (err) free(item); return err; } -int item_insert(struct radix_tree_root *root, unsigned long index) -{ - return item_insert_order(root, index, 0); -} - void item_sanity(struct item *item, unsigned long index) { unsigned long mask; @@ -63,16 +52,21 @@ void item_sanity(struct item *item, unsigned long index) assert((item->index | mask) == (index | mask)); } +void item_free(struct item *item, unsigned long index) +{ + item_sanity(item, index); + free(item); +} + int item_delete(struct radix_tree_root *root, unsigned long index) { struct item *item = radix_tree_delete(root, index); - if (item) { - item_sanity(item, index); - free(item); - return 1; - } - return 0; + if (!item) + return 0; + + item_free(item, index); + return 1; } static void item_free_rcu(struct rcu_head *head) @@ -82,9 +76,9 @@ static void item_free_rcu(struct rcu_head *head) free(item); } -int item_delete_rcu(struct radix_tree_root *root, unsigned long index) +int item_delete_rcu(struct xarray *xa, unsigned long index) { - struct item *item = radix_tree_delete(root, index); + struct item *item = xa_erase(xa, index); if (item) { item_sanity(item, index); @@ -176,59 +170,30 @@ void item_full_scan(struct radix_tree_root *root, unsigned long start, } /* Use the same pattern as tag_pages_for_writeback() in mm/page-writeback.c */ -int tag_tagged_items(struct radix_tree_root *root, pthread_mutex_t *lock, - unsigned long start, unsigned long end, unsigned batch, - unsigned iftag, unsigned thentag) +int tag_tagged_items(struct xarray *xa, unsigned long start, unsigned long end, + unsigned batch, xa_mark_t iftag, xa_mark_t thentag) { - unsigned long tagged = 0; - struct radix_tree_iter iter; - void **slot; + XA_STATE(xas, xa, start); + unsigned int tagged = 0; + struct item *item; if (batch == 0) batch = 1; - if (lock) - pthread_mutex_lock(lock); - radix_tree_for_each_tagged(slot, root, &iter, start, iftag) { - if (iter.index > end) - break; - radix_tree_iter_tag_set(root, &iter, thentag); - tagged++; - if ((tagged % batch) != 0) + xas_lock_irq(&xas); + xas_for_each_marked(&xas, item, end, iftag) { + xas_set_mark(&xas, thentag); + if (++tagged % batch) continue; - slot = radix_tree_iter_resume(slot, &iter); - if (lock) { - pthread_mutex_unlock(lock); - rcu_barrier(); - pthread_mutex_lock(lock); - } - } - if (lock) - pthread_mutex_unlock(lock); - - return tagged; -} -/* Use the same pattern as find_swap_entry() in mm/shmem.c */ -unsigned long find_item(struct radix_tree_root *root, void *item) -{ - struct radix_tree_iter iter; - void **slot; - unsigned long found = -1; - unsigned long checked = 0; - - radix_tree_for_each_slot(slot, root, &iter, 0) { - if (*slot == item) { - found = iter.index; - break; - } - checked++; - if ((checked % 4) != 0) - continue; - slot = radix_tree_iter_resume(slot, &iter); + xas_pause(&xas); + xas_unlock_irq(&xas); + rcu_barrier(); + xas_lock_irq(&xas); } + xas_unlock_irq(&xas); - return found; + return tagged; } static int verify_node(struct radix_tree_node *slot, unsigned int tag, @@ -281,43 +246,31 @@ static int verify_node(struct radix_tree_node *slot, unsigned int tag, void verify_tag_consistency(struct radix_tree_root *root, unsigned int tag) { - struct radix_tree_node *node = root->rnode; + struct radix_tree_node *node = root->xa_head; if (!radix_tree_is_internal_node(node)) return; verify_node(node, tag, !!root_tag_get(root, tag)); } -void item_kill_tree(struct radix_tree_root *root) +void item_kill_tree(struct xarray *xa) { - struct radix_tree_iter iter; - void **slot; - struct item *items[32]; - int nfound; - - radix_tree_for_each_slot(slot, root, &iter, 0) { - if (radix_tree_exceptional_entry(*slot)) - radix_tree_delete(root, iter.index); - } + XA_STATE(xas, xa, 0); + void *entry; - while ((nfound = radix_tree_gang_lookup(root, (void **)items, 0, 32))) { - int i; - - for (i = 0; i < nfound; i++) { - void *ret; - - ret = radix_tree_delete(root, items[i]->index); - assert(ret == items[i]); - free(items[i]); + xas_for_each(&xas, entry, ULONG_MAX) { + if (!xa_is_value(entry)) { + item_free(entry, xas.xa_index); } + xas_store(&xas, NULL); } - assert(radix_tree_gang_lookup(root, (void **)items, 0, 32) == 0); - assert(root->rnode == NULL); + + assert(xa_empty(xa)); } void tree_verify_min_height(struct radix_tree_root *root, int maxindex) { unsigned shift; - struct radix_tree_node *node = root->rnode; + struct radix_tree_node *node = root->xa_head; if (!radix_tree_is_internal_node(node)) { assert(maxindex == 0); return; diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h index 92d901eacf49..1ee4b2c0ad10 100644 --- a/tools/testing/radix-tree/test.h +++ b/tools/testing/radix-tree/test.h @@ -11,13 +11,11 @@ struct item { }; struct item *item_create(unsigned long index, unsigned int order); -int __item_insert(struct radix_tree_root *root, struct item *item); int item_insert(struct radix_tree_root *root, unsigned long index); void item_sanity(struct item *item, unsigned long index); -int item_insert_order(struct radix_tree_root *root, unsigned long index, - unsigned order); +void item_free(struct item *item, unsigned long index); int item_delete(struct radix_tree_root *root, unsigned long index); -int item_delete_rcu(struct radix_tree_root *root, unsigned long index); +int item_delete_rcu(struct xarray *xa, unsigned long index); struct item *item_lookup(struct radix_tree_root *root, unsigned long index); void item_check_present(struct radix_tree_root *root, unsigned long index); @@ -29,11 +27,10 @@ void item_full_scan(struct radix_tree_root *root, unsigned long start, unsigned long nr, int chunk); void item_kill_tree(struct radix_tree_root *root); -int tag_tagged_items(struct radix_tree_root *, pthread_mutex_t *, - unsigned long start, unsigned long end, unsigned batch, - unsigned iftag, unsigned thentag); -unsigned long find_item(struct radix_tree_root *, void *item); +int tag_tagged_items(struct xarray *, unsigned long start, unsigned long end, + unsigned batch, xa_mark_t iftag, xa_mark_t thentag); +void xarray_tests(void); void tag_check(void); void multiorder_checks(void); void iteration_test(unsigned order, unsigned duration); diff --git a/tools/testing/radix-tree/xarray.c b/tools/testing/radix-tree/xarray.c new file mode 100644 index 000000000000..e61e43efe463 --- /dev/null +++ b/tools/testing/radix-tree/xarray.c @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * xarray.c: Userspace shim for XArray test-suite + * Copyright (c) 2018 Matthew Wilcox <willy@infradead.org> + */ + +#define XA_DEBUG +#include "test.h" + +#define module_init(x) +#define module_exit(x) +#define MODULE_AUTHOR(x) +#define MODULE_LICENSE(x) +#define dump_stack() assert(0) + +#include "../../../lib/xarray.c" +#undef XA_DEBUG +#include "../../../lib/test_xarray.c" + +void xarray_tests(void) +{ + xarray_checks(); + xarray_exit(); +} + +int __weak main(void) +{ + radix_tree_init(); + xarray_tests(); + radix_tree_cpu_dead(1); + rcu_barrier(); + if (nr_allocated) + printf("nr_allocated = %d\n", nr_allocated); + return 0; +} diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index f1fe492c8e17..400ee81a3043 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -6,15 +6,19 @@ TARGETS += capabilities TARGETS += cgroup TARGETS += cpufreq TARGETS += cpu-hotplug +TARGETS += drivers/dma-buf TARGETS += efivarfs TARGETS += exec TARGETS += filesystems +TARGETS += filesystems/binderfs TARGETS += firmware TARGETS += ftrace TARGETS += futex TARGETS += gpio +TARGETS += ima TARGETS += intel_pstate TARGETS += ipc +TARGETS += ir TARGETS += kcmp TARGETS += kvm TARGETS += lib @@ -24,6 +28,8 @@ TARGETS += memory-hotplug TARGETS += mount TARGETS += mqueue TARGETS += net +TARGETS += netfilter +TARGETS += networking/timestamping TARGETS += nsfs TARGETS += powerpc TARGETS += proc diff --git a/tools/testing/selftests/android/Makefile b/tools/testing/selftests/android/Makefile index d9a725478375..72c25a3cb658 100644 --- a/tools/testing/selftests/android/Makefile +++ b/tools/testing/selftests/android/Makefile @@ -6,7 +6,7 @@ TEST_PROGS := run.sh include ../lib.mk -all: khdr +all: @for DIR in $(SUBDIRS); do \ BUILD_TARGET=$(OUTPUT)/$$DIR; \ mkdir $$BUILD_TARGET -p; \ diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index 49938d72cf63..dd093bd91aa9 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -19,3 +19,13 @@ test_btf test_sockmap test_lirc_mode2_user get_cgroup_id_user +test_skb_cgroup_id_user +test_socket_cookie +test_cgroup_storage +test_select_reuseport +test_flow_dissector +flow_dissector_load +test_netcnt +test_section_names +test_tcpnotify_user +test_libbpf diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index fff7fb1285fc..41ab7a3668b3 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -23,19 +23,23 @@ $(TEST_CUSTOM_PROGS): $(OUTPUT)/%: %.c TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \ test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \ test_sock test_btf test_sockmap test_lirc_mode2_user get_cgroup_id_user \ - test_socket_cookie test_cgroup_storage test_select_reuseport + test_socket_cookie test_cgroup_storage test_select_reuseport test_section_names \ + test_netcnt test_tcpnotify_user TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \ test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \ sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \ test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \ + test_tcpnotify_kern.o \ sample_map_ret0.o test_tcpbpf_kern.o test_stacktrace_build_id.o \ sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o test_adjust_tail.o \ test_btf_haskv.o test_btf_nokv.o test_sockmap_kern.o test_tunnel_kern.o \ test_get_stack_rawtp.o test_sockmap_kern.o test_sockhash_kern.o \ test_lwt_seg6local.o sendmsg4_prog.o sendmsg6_prog.o test_lirc_mode2_kern.o \ get_cgroup_id_kern.o socket_cookie_prog.o test_select_reuseport_kern.o \ - test_skb_cgroup_id_kern.o + test_skb_cgroup_id_kern.o bpf_flow.o netcnt_prog.o \ + test_sk_lookup_kern.o test_xdp_vlan.o test_queue_map.o test_stack_map.o \ + xdp_dummy.o test_map_in_map.o # Order correspond to 'make run_tests' order TEST_PROGS := test_kmod.sh \ @@ -47,10 +51,18 @@ TEST_PROGS := test_kmod.sh \ test_tunnel.sh \ test_lwt_seg6local.sh \ test_lirc_mode2.sh \ - test_skb_cgroup_id.sh + test_skb_cgroup_id.sh \ + test_flow_dissector.sh \ + test_xdp_vlan.sh + +TEST_PROGS_EXTENDED := with_addr.sh \ + with_tunnels.sh \ + tcp_client.py \ + tcp_server.py # Compile but not part of 'make run_tests' -TEST_GEN_PROGS_EXTENDED = test_libbpf_open test_sock_addr test_skb_cgroup_id_user +TEST_GEN_PROGS_EXTENDED = test_libbpf_open test_sock_addr test_skb_cgroup_id_user \ + flow_dissector_load test_flow_dissector include ../lib.mk @@ -67,9 +79,11 @@ $(OUTPUT)/test_sock_addr: cgroup_helpers.c $(OUTPUT)/test_socket_cookie: cgroup_helpers.c $(OUTPUT)/test_sockmap: cgroup_helpers.c $(OUTPUT)/test_tcpbpf_user: cgroup_helpers.c +$(OUTPUT)/test_tcpnotify_user: cgroup_helpers.c trace_helpers.c $(OUTPUT)/test_progs: trace_helpers.c $(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c $(OUTPUT)/test_cgroup_storage: cgroup_helpers.c +$(OUTPUT)/test_netcnt: cgroup_helpers.c .PHONY: force @@ -110,10 +124,20 @@ CLANG_FLAGS = -I. -I./include/uapi -I../../../include/uapi \ $(OUTPUT)/test_l4lb_noinline.o: CLANG_FLAGS += -fno-inline $(OUTPUT)/test_xdp_noinline.o: CLANG_FLAGS += -fno-inline +$(OUTPUT)/test_queue_map.o: test_queue_stack_map.h +$(OUTPUT)/test_stack_map.o: test_queue_stack_map.h + BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help 2>&1 | grep dwarfris) BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF) BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --help 2>&1 | grep -i 'usage.*llvm') +BTF_LLVM_PROBE := $(shell echo "int main() { return 0; }" | \ + $(CLANG) -target bpf -O2 -g -c -x c - -o ./llvm_btf_verify.o; \ + readelf -S ./llvm_btf_verify.o | grep BTF; \ + /bin/rm -f ./llvm_btf_verify.o) +ifneq ($(BTF_LLVM_PROBE),) + CLANG_FLAGS += -g +else ifneq ($(BTF_LLC_PROBE),) ifneq ($(BTF_PAHOLE_PROBE),) ifneq ($(BTF_OBJCOPY_PROBE),) @@ -123,6 +147,17 @@ ifneq ($(BTF_OBJCOPY_PROBE),) endif endif endif +endif + +# Have one program compiled without "-target bpf" to test whether libbpf loads +# it successfully +$(OUTPUT)/test_xdp.o: test_xdp.c + $(CLANG) $(CLANG_FLAGS) \ + -O2 -emit-llvm -c $< -o - | \ + $(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@ +ifeq ($(DWARF2BTF),y) + $(BTF_PAHOLE) -J $@ +endif $(OUTPUT)/%.o: %.c $(CLANG) $(CLANG_FLAGS) \ diff --git a/tools/testing/selftests/bpf/bpf_flow.c b/tools/testing/selftests/bpf/bpf_flow.c new file mode 100644 index 000000000000..284660f5aa95 --- /dev/null +++ b/tools/testing/selftests/bpf/bpf_flow.c @@ -0,0 +1,371 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <limits.h> +#include <stddef.h> +#include <stdbool.h> +#include <string.h> +#include <linux/pkt_cls.h> +#include <linux/bpf.h> +#include <linux/in.h> +#include <linux/if_ether.h> +#include <linux/icmp.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/tcp.h> +#include <linux/udp.h> +#include <linux/if_packet.h> +#include <sys/socket.h> +#include <linux/if_tunnel.h> +#include <linux/mpls.h> +#include "bpf_helpers.h" +#include "bpf_endian.h" + +int _version SEC("version") = 1; +#define PROG(F) SEC(#F) int bpf_func_##F + +/* These are the identifiers of the BPF programs that will be used in tail + * calls. Name is limited to 16 characters, with the terminating character and + * bpf_func_ above, we have only 6 to work with, anything after will be cropped. + */ +enum { + IP, + IPV6, + IPV6OP, /* Destination/Hop-by-Hop Options IPv6 Extension header */ + IPV6FR, /* Fragmentation IPv6 Extension Header */ + MPLS, + VLAN, +}; + +#define IP_MF 0x2000 +#define IP_OFFSET 0x1FFF +#define IP6_MF 0x0001 +#define IP6_OFFSET 0xFFF8 + +struct vlan_hdr { + __be16 h_vlan_TCI; + __be16 h_vlan_encapsulated_proto; +}; + +struct gre_hdr { + __be16 flags; + __be16 proto; +}; + +struct frag_hdr { + __u8 nexthdr; + __u8 reserved; + __be16 frag_off; + __be32 identification; +}; + +struct bpf_map_def SEC("maps") jmp_table = { + .type = BPF_MAP_TYPE_PROG_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(__u32), + .max_entries = 8 +}; + +static __always_inline void *bpf_flow_dissect_get_header(struct __sk_buff *skb, + __u16 hdr_size, + void *buffer) +{ + void *data_end = (void *)(long)skb->data_end; + void *data = (void *)(long)skb->data; + __u16 thoff = skb->flow_keys->thoff; + __u8 *hdr; + + /* Verifies this variable offset does not overflow */ + if (thoff > (USHRT_MAX - hdr_size)) + return NULL; + + hdr = data + thoff; + if (hdr + hdr_size <= data_end) + return hdr; + + if (bpf_skb_load_bytes(skb, thoff, buffer, hdr_size)) + return NULL; + + return buffer; +} + +/* Dispatches on ETHERTYPE */ +static __always_inline int parse_eth_proto(struct __sk_buff *skb, __be16 proto) +{ + struct bpf_flow_keys *keys = skb->flow_keys; + + keys->n_proto = proto; + switch (proto) { + case bpf_htons(ETH_P_IP): + bpf_tail_call(skb, &jmp_table, IP); + break; + case bpf_htons(ETH_P_IPV6): + bpf_tail_call(skb, &jmp_table, IPV6); + break; + case bpf_htons(ETH_P_MPLS_MC): + case bpf_htons(ETH_P_MPLS_UC): + bpf_tail_call(skb, &jmp_table, MPLS); + break; + case bpf_htons(ETH_P_8021Q): + case bpf_htons(ETH_P_8021AD): + bpf_tail_call(skb, &jmp_table, VLAN); + break; + default: + /* Protocol not supported */ + return BPF_DROP; + } + + return BPF_DROP; +} + +SEC("flow_dissector") +int _dissect(struct __sk_buff *skb) +{ + if (!skb->vlan_present) + return parse_eth_proto(skb, skb->protocol); + else + return parse_eth_proto(skb, skb->vlan_proto); +} + +/* Parses on IPPROTO_* */ +static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto) +{ + struct bpf_flow_keys *keys = skb->flow_keys; + void *data_end = (void *)(long)skb->data_end; + struct icmphdr *icmp, _icmp; + struct gre_hdr *gre, _gre; + struct ethhdr *eth, _eth; + struct tcphdr *tcp, _tcp; + struct udphdr *udp, _udp; + + keys->ip_proto = proto; + switch (proto) { + case IPPROTO_ICMP: + icmp = bpf_flow_dissect_get_header(skb, sizeof(*icmp), &_icmp); + if (!icmp) + return BPF_DROP; + return BPF_OK; + case IPPROTO_IPIP: + keys->is_encap = true; + return parse_eth_proto(skb, bpf_htons(ETH_P_IP)); + case IPPROTO_IPV6: + keys->is_encap = true; + return parse_eth_proto(skb, bpf_htons(ETH_P_IPV6)); + case IPPROTO_GRE: + gre = bpf_flow_dissect_get_header(skb, sizeof(*gre), &_gre); + if (!gre) + return BPF_DROP; + + if (bpf_htons(gre->flags & GRE_VERSION)) + /* Only inspect standard GRE packets with version 0 */ + return BPF_OK; + + keys->thoff += sizeof(*gre); /* Step over GRE Flags and Proto */ + if (GRE_IS_CSUM(gre->flags)) + keys->thoff += 4; /* Step over chksum and Padding */ + if (GRE_IS_KEY(gre->flags)) + keys->thoff += 4; /* Step over key */ + if (GRE_IS_SEQ(gre->flags)) + keys->thoff += 4; /* Step over sequence number */ + + keys->is_encap = true; + + if (gre->proto == bpf_htons(ETH_P_TEB)) { + eth = bpf_flow_dissect_get_header(skb, sizeof(*eth), + &_eth); + if (!eth) + return BPF_DROP; + + keys->thoff += sizeof(*eth); + + return parse_eth_proto(skb, eth->h_proto); + } else { + return parse_eth_proto(skb, gre->proto); + } + case IPPROTO_TCP: + tcp = bpf_flow_dissect_get_header(skb, sizeof(*tcp), &_tcp); + if (!tcp) + return BPF_DROP; + + if (tcp->doff < 5) + return BPF_DROP; + + if ((__u8 *)tcp + (tcp->doff << 2) > data_end) + return BPF_DROP; + + keys->sport = tcp->source; + keys->dport = tcp->dest; + return BPF_OK; + case IPPROTO_UDP: + case IPPROTO_UDPLITE: + udp = bpf_flow_dissect_get_header(skb, sizeof(*udp), &_udp); + if (!udp) + return BPF_DROP; + + keys->sport = udp->source; + keys->dport = udp->dest; + return BPF_OK; + default: + return BPF_DROP; + } + + return BPF_DROP; +} + +static __always_inline int parse_ipv6_proto(struct __sk_buff *skb, __u8 nexthdr) +{ + struct bpf_flow_keys *keys = skb->flow_keys; + + keys->ip_proto = nexthdr; + switch (nexthdr) { + case IPPROTO_HOPOPTS: + case IPPROTO_DSTOPTS: + bpf_tail_call(skb, &jmp_table, IPV6OP); + break; + case IPPROTO_FRAGMENT: + bpf_tail_call(skb, &jmp_table, IPV6FR); + break; + default: + return parse_ip_proto(skb, nexthdr); + } + + return BPF_DROP; +} + +PROG(IP)(struct __sk_buff *skb) +{ + void *data_end = (void *)(long)skb->data_end; + struct bpf_flow_keys *keys = skb->flow_keys; + void *data = (void *)(long)skb->data; + struct iphdr *iph, _iph; + bool done = false; + + iph = bpf_flow_dissect_get_header(skb, sizeof(*iph), &_iph); + if (!iph) + return BPF_DROP; + + /* IP header cannot be smaller than 20 bytes */ + if (iph->ihl < 5) + return BPF_DROP; + + keys->addr_proto = ETH_P_IP; + keys->ipv4_src = iph->saddr; + keys->ipv4_dst = iph->daddr; + + keys->thoff += iph->ihl << 2; + if (data + keys->thoff > data_end) + return BPF_DROP; + + if (iph->frag_off & bpf_htons(IP_MF | IP_OFFSET)) { + keys->is_frag = true; + if (iph->frag_off & bpf_htons(IP_OFFSET)) + /* From second fragment on, packets do not have headers + * we can parse. + */ + done = true; + else + keys->is_first_frag = true; + } + + if (done) + return BPF_OK; + + return parse_ip_proto(skb, iph->protocol); +} + +PROG(IPV6)(struct __sk_buff *skb) +{ + struct bpf_flow_keys *keys = skb->flow_keys; + struct ipv6hdr *ip6h, _ip6h; + + ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h); + if (!ip6h) + return BPF_DROP; + + keys->addr_proto = ETH_P_IPV6; + memcpy(&keys->ipv6_src, &ip6h->saddr, 2*sizeof(ip6h->saddr)); + + keys->thoff += sizeof(struct ipv6hdr); + + return parse_ipv6_proto(skb, ip6h->nexthdr); +} + +PROG(IPV6OP)(struct __sk_buff *skb) +{ + struct ipv6_opt_hdr *ip6h, _ip6h; + + ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h); + if (!ip6h) + return BPF_DROP; + + /* hlen is in 8-octets and does not include the first 8 bytes + * of the header + */ + skb->flow_keys->thoff += (1 + ip6h->hdrlen) << 3; + + return parse_ipv6_proto(skb, ip6h->nexthdr); +} + +PROG(IPV6FR)(struct __sk_buff *skb) +{ + struct bpf_flow_keys *keys = skb->flow_keys; + struct frag_hdr *fragh, _fragh; + + fragh = bpf_flow_dissect_get_header(skb, sizeof(*fragh), &_fragh); + if (!fragh) + return BPF_DROP; + + keys->thoff += sizeof(*fragh); + keys->is_frag = true; + if (!(fragh->frag_off & bpf_htons(IP6_OFFSET))) + keys->is_first_frag = true; + + return parse_ipv6_proto(skb, fragh->nexthdr); +} + +PROG(MPLS)(struct __sk_buff *skb) +{ + struct mpls_label *mpls, _mpls; + + mpls = bpf_flow_dissect_get_header(skb, sizeof(*mpls), &_mpls); + if (!mpls) + return BPF_DROP; + + return BPF_OK; +} + +PROG(VLAN)(struct __sk_buff *skb) +{ + struct bpf_flow_keys *keys = skb->flow_keys; + struct vlan_hdr *vlan, _vlan; + __be16 proto; + + /* Peek back to see if single or double-tagging */ + if (bpf_skb_load_bytes(skb, keys->thoff - sizeof(proto), &proto, + sizeof(proto))) + return BPF_DROP; + + /* Account for double-tagging */ + if (proto == bpf_htons(ETH_P_8021AD)) { + vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan); + if (!vlan) + return BPF_DROP; + + if (vlan->h_vlan_encapsulated_proto != bpf_htons(ETH_P_8021Q)) + return BPF_DROP; + + keys->thoff += sizeof(*vlan); + } + + vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan); + if (!vlan) + return BPF_DROP; + + keys->thoff += sizeof(*vlan); + /* Only allow 8021AD + 8021Q double tagging and no triple tagging.*/ + if (vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021AD) || + vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021Q)) + return BPF_DROP; + + return parse_eth_proto(skb, vlan->h_vlan_encapsulated_proto); +} + +char __license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index e4be7730222d..6c77cf7bedce 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -16,6 +16,13 @@ static int (*bpf_map_update_elem)(void *map, void *key, void *value, (void *) BPF_FUNC_map_update_elem; static int (*bpf_map_delete_elem)(void *map, void *key) = (void *) BPF_FUNC_map_delete_elem; +static int (*bpf_map_push_elem)(void *map, void *value, + unsigned long long flags) = + (void *) BPF_FUNC_map_push_elem; +static int (*bpf_map_pop_elem)(void *map, void *value) = + (void *) BPF_FUNC_map_pop_elem; +static int (*bpf_map_peek_elem)(void *map, void *value) = + (void *) BPF_FUNC_map_peek_elem; static int (*bpf_probe_read)(void *dst, int size, void *unsafe_ptr) = (void *) BPF_FUNC_probe_read; static unsigned long long (*bpf_ktime_get_ns)(void) = @@ -104,6 +111,10 @@ static int (*bpf_msg_cork_bytes)(void *ctx, int len) = (void *) BPF_FUNC_msg_cork_bytes; static int (*bpf_msg_pull_data)(void *ctx, int start, int end, int flags) = (void *) BPF_FUNC_msg_pull_data; +static int (*bpf_msg_push_data)(void *ctx, int start, int end, int flags) = + (void *) BPF_FUNC_msg_push_data; +static int (*bpf_msg_pop_data)(void *ctx, int start, int cut, int flags) = + (void *) BPF_FUNC_msg_pop_data; static int (*bpf_bind)(void *ctx, void *addr, int addr_len) = (void *) BPF_FUNC_bind; static int (*bpf_xdp_adjust_tail)(void *ctx, int offset) = @@ -143,6 +154,24 @@ static unsigned long long (*bpf_skb_cgroup_id)(void *ctx) = (void *) BPF_FUNC_skb_cgroup_id; static unsigned long long (*bpf_skb_ancestor_cgroup_id)(void *ctx, int level) = (void *) BPF_FUNC_skb_ancestor_cgroup_id; +static struct bpf_sock *(*bpf_sk_lookup_tcp)(void *ctx, + struct bpf_sock_tuple *tuple, + int size, unsigned long long netns_id, + unsigned long long flags) = + (void *) BPF_FUNC_sk_lookup_tcp; +static struct bpf_sock *(*bpf_sk_lookup_udp)(void *ctx, + struct bpf_sock_tuple *tuple, + int size, unsigned long long netns_id, + unsigned long long flags) = + (void *) BPF_FUNC_sk_lookup_udp; +static int (*bpf_sk_release)(struct bpf_sock *sk) = + (void *) BPF_FUNC_sk_release; +static int (*bpf_skb_vlan_push)(void *ctx, __be16 vlan_proto, __u16 vlan_tci) = + (void *) BPF_FUNC_skb_vlan_push; +static int (*bpf_skb_vlan_pop)(void *ctx) = + (void *) BPF_FUNC_skb_vlan_pop; +static int (*bpf_rc_pointer_rel)(void *ctx, int rel_x, int rel_y) = + (void *) BPF_FUNC_rc_pointer_rel; /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions diff --git a/tools/testing/selftests/bpf/bpf_util.h b/tools/testing/selftests/bpf/bpf_util.h index 315a44fa32af..84fd6f1bf33e 100644 --- a/tools/testing/selftests/bpf/bpf_util.h +++ b/tools/testing/selftests/bpf/bpf_util.h @@ -13,7 +13,7 @@ static inline unsigned int bpf_num_possible_cpus(void) unsigned int start, end, possible_cpus = 0; char buff[128]; FILE *fp; - int n; + int len, n, i, j = 0; fp = fopen(fcpu, "r"); if (!fp) { @@ -21,17 +21,27 @@ static inline unsigned int bpf_num_possible_cpus(void) exit(1); } - while (fgets(buff, sizeof(buff), fp)) { - n = sscanf(buff, "%u-%u", &start, &end); - if (n == 0) { - printf("Failed to retrieve # possible CPUs!\n"); - exit(1); - } else if (n == 1) { - end = start; + if (!fgets(buff, sizeof(buff), fp)) { + printf("Failed to read %s!\n", fcpu); + exit(1); + } + + len = strlen(buff); + for (i = 0; i <= len; i++) { + if (buff[i] == ',' || buff[i] == '\0') { + buff[i] = '\0'; + n = sscanf(&buff[j], "%u-%u", &start, &end); + if (n <= 0) { + printf("Failed to retrieve # possible CPUs!\n"); + exit(1); + } else if (n == 1) { + end = start; + } + possible_cpus += end - start + 1; + j = i + 1; } - possible_cpus = start == 0 ? end + 1 : 0; - break; } + fclose(fp); return possible_cpus; diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c index cf16948aad4a..6692a40a6979 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.c +++ b/tools/testing/selftests/bpf/cgroup_helpers.c @@ -155,7 +155,7 @@ void cleanup_cgroup_environment(void) * This function creates a cgroup under the top level workdir and returns the * file descriptor. It is idempotent. * - * On success, it returns the file descriptor. On failure it returns 0. + * On success, it returns the file descriptor. On failure it returns -1. * If there is a failure, it prints the error to stderr. */ int create_and_get_cgroup(const char *path) @@ -166,13 +166,13 @@ int create_and_get_cgroup(const char *path) format_cgroup_path(cgroup_path, path); if (mkdir(cgroup_path, 0777) && errno != EEXIST) { log_err("mkdiring cgroup %s .. %s", path, cgroup_path); - return 0; + return -1; } fd = open(cgroup_path, O_RDONLY); if (fd < 0) { log_err("Opening Cgroup"); - return 0; + return -1; } return fd; diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index b4994a94968b..37f947ec44ed 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -18,3 +18,8 @@ CONFIG_CRYPTO_HMAC=m CONFIG_CRYPTO_SHA256=m CONFIG_VXLAN=y CONFIG_GENEVE=y +CONFIG_NET_CLS_FLOWER=m +CONFIG_LWTUNNEL=y +CONFIG_BPF_STREAM_PARSER=y +CONFIG_XDP_SOCKETS=y +CONFIG_FTRACE_SYSCALLS=y diff --git a/tools/testing/selftests/bpf/connect4_prog.c b/tools/testing/selftests/bpf/connect4_prog.c index 5a88a681d2ab..1fd244d35ba9 100644 --- a/tools/testing/selftests/bpf/connect4_prog.c +++ b/tools/testing/selftests/bpf/connect4_prog.c @@ -21,23 +21,50 @@ int _version SEC("version") = 1; SEC("cgroup/connect4") int connect_v4_prog(struct bpf_sock_addr *ctx) { + struct bpf_sock_tuple tuple = {}; struct sockaddr_in sa; + struct bpf_sock *sk; + + /* Verify that new destination is available. */ + memset(&tuple.ipv4.saddr, 0, sizeof(tuple.ipv4.saddr)); + memset(&tuple.ipv4.sport, 0, sizeof(tuple.ipv4.sport)); + + tuple.ipv4.daddr = bpf_htonl(DST_REWRITE_IP4); + tuple.ipv4.dport = bpf_htons(DST_REWRITE_PORT4); + + if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM) + return 0; + else if (ctx->type == SOCK_STREAM) + sk = bpf_sk_lookup_tcp(ctx, &tuple, sizeof(tuple.ipv4), + BPF_F_CURRENT_NETNS, 0); + else + sk = bpf_sk_lookup_udp(ctx, &tuple, sizeof(tuple.ipv4), + BPF_F_CURRENT_NETNS, 0); + + if (!sk) + return 0; + + if (sk->src_ip4 != tuple.ipv4.daddr || + sk->src_port != DST_REWRITE_PORT4) { + bpf_sk_release(sk); + return 0; + } + + bpf_sk_release(sk); /* Rewrite destination. */ ctx->user_ip4 = bpf_htonl(DST_REWRITE_IP4); ctx->user_port = bpf_htons(DST_REWRITE_PORT4); - if (ctx->type == SOCK_DGRAM || ctx->type == SOCK_STREAM) { - ///* Rewrite source. */ - memset(&sa, 0, sizeof(sa)); + /* Rewrite source. */ + memset(&sa, 0, sizeof(sa)); - sa.sin_family = AF_INET; - sa.sin_port = bpf_htons(0); - sa.sin_addr.s_addr = bpf_htonl(SRC_REWRITE_IP4); + sa.sin_family = AF_INET; + sa.sin_port = bpf_htons(0); + sa.sin_addr.s_addr = bpf_htonl(SRC_REWRITE_IP4); - if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0) - return 0; - } + if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0) + return 0; return 1; } diff --git a/tools/testing/selftests/bpf/connect6_prog.c b/tools/testing/selftests/bpf/connect6_prog.c index 8ea3f7d12dee..26397ab7b3c7 100644 --- a/tools/testing/selftests/bpf/connect6_prog.c +++ b/tools/testing/selftests/bpf/connect6_prog.c @@ -29,7 +29,43 @@ int _version SEC("version") = 1; SEC("cgroup/connect6") int connect_v6_prog(struct bpf_sock_addr *ctx) { + struct bpf_sock_tuple tuple = {}; struct sockaddr_in6 sa; + struct bpf_sock *sk; + + /* Verify that new destination is available. */ + memset(&tuple.ipv6.saddr, 0, sizeof(tuple.ipv6.saddr)); + memset(&tuple.ipv6.sport, 0, sizeof(tuple.ipv6.sport)); + + tuple.ipv6.daddr[0] = bpf_htonl(DST_REWRITE_IP6_0); + tuple.ipv6.daddr[1] = bpf_htonl(DST_REWRITE_IP6_1); + tuple.ipv6.daddr[2] = bpf_htonl(DST_REWRITE_IP6_2); + tuple.ipv6.daddr[3] = bpf_htonl(DST_REWRITE_IP6_3); + + tuple.ipv6.dport = bpf_htons(DST_REWRITE_PORT6); + + if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM) + return 0; + else if (ctx->type == SOCK_STREAM) + sk = bpf_sk_lookup_tcp(ctx, &tuple, sizeof(tuple.ipv6), + BPF_F_CURRENT_NETNS, 0); + else + sk = bpf_sk_lookup_udp(ctx, &tuple, sizeof(tuple.ipv6), + BPF_F_CURRENT_NETNS, 0); + + if (!sk) + return 0; + + if (sk->src_ip6[0] != tuple.ipv6.daddr[0] || + sk->src_ip6[1] != tuple.ipv6.daddr[1] || + sk->src_ip6[2] != tuple.ipv6.daddr[2] || + sk->src_ip6[3] != tuple.ipv6.daddr[3] || + sk->src_port != DST_REWRITE_PORT6) { + bpf_sk_release(sk); + return 0; + } + + bpf_sk_release(sk); /* Rewrite destination. */ ctx->user_ip6[0] = bpf_htonl(DST_REWRITE_IP6_0); @@ -39,21 +75,19 @@ int connect_v6_prog(struct bpf_sock_addr *ctx) ctx->user_port = bpf_htons(DST_REWRITE_PORT6); - if (ctx->type == SOCK_DGRAM || ctx->type == SOCK_STREAM) { - /* Rewrite source. */ - memset(&sa, 0, sizeof(sa)); + /* Rewrite source. */ + memset(&sa, 0, sizeof(sa)); - sa.sin6_family = AF_INET6; - sa.sin6_port = bpf_htons(0); + sa.sin6_family = AF_INET6; + sa.sin6_port = bpf_htons(0); - sa.sin6_addr.s6_addr32[0] = bpf_htonl(SRC_REWRITE_IP6_0); - sa.sin6_addr.s6_addr32[1] = bpf_htonl(SRC_REWRITE_IP6_1); - sa.sin6_addr.s6_addr32[2] = bpf_htonl(SRC_REWRITE_IP6_2); - sa.sin6_addr.s6_addr32[3] = bpf_htonl(SRC_REWRITE_IP6_3); + sa.sin6_addr.s6_addr32[0] = bpf_htonl(SRC_REWRITE_IP6_0); + sa.sin6_addr.s6_addr32[1] = bpf_htonl(SRC_REWRITE_IP6_1); + sa.sin6_addr.s6_addr32[2] = bpf_htonl(SRC_REWRITE_IP6_2); + sa.sin6_addr.s6_addr32[3] = bpf_htonl(SRC_REWRITE_IP6_3); - if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0) - return 0; - } + if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0) + return 0; return 1; } diff --git a/tools/testing/selftests/bpf/flow_dissector_load.c b/tools/testing/selftests/bpf/flow_dissector_load.c new file mode 100644 index 000000000000..ae8180b11d5f --- /dev/null +++ b/tools/testing/selftests/bpf/flow_dissector_load.c @@ -0,0 +1,142 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <error.h> +#include <errno.h> +#include <getopt.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <bpf/bpf.h> +#include <bpf/libbpf.h> + +#include "bpf_rlimit.h" + +const char *cfg_pin_path = "/sys/fs/bpf/flow_dissector"; +const char *cfg_map_name = "jmp_table"; +bool cfg_attach = true; +char *cfg_section_name; +char *cfg_path_name; + +static void load_and_attach_program(void) +{ + struct bpf_program *prog, *main_prog; + struct bpf_map *prog_array; + int i, fd, prog_fd, ret; + struct bpf_object *obj; + int prog_array_fd; + + ret = bpf_prog_load(cfg_path_name, BPF_PROG_TYPE_FLOW_DISSECTOR, &obj, + &prog_fd); + if (ret) + error(1, 0, "bpf_prog_load %s", cfg_path_name); + + main_prog = bpf_object__find_program_by_title(obj, cfg_section_name); + if (!main_prog) + error(1, 0, "bpf_object__find_program_by_title %s", + cfg_section_name); + + prog_fd = bpf_program__fd(main_prog); + if (prog_fd < 0) + error(1, 0, "bpf_program__fd"); + + prog_array = bpf_object__find_map_by_name(obj, cfg_map_name); + if (!prog_array) + error(1, 0, "bpf_object__find_map_by_name %s", cfg_map_name); + + prog_array_fd = bpf_map__fd(prog_array); + if (prog_array_fd < 0) + error(1, 0, "bpf_map__fd %s", cfg_map_name); + + i = 0; + bpf_object__for_each_program(prog, obj) { + fd = bpf_program__fd(prog); + if (fd < 0) + error(1, 0, "bpf_program__fd"); + + if (fd != prog_fd) { + printf("%d: %s\n", i, bpf_program__title(prog, false)); + bpf_map_update_elem(prog_array_fd, &i, &fd, BPF_ANY); + ++i; + } + } + + ret = bpf_prog_attach(prog_fd, 0 /* Ignore */, BPF_FLOW_DISSECTOR, 0); + if (ret) + error(1, 0, "bpf_prog_attach %s", cfg_path_name); + + ret = bpf_object__pin(obj, cfg_pin_path); + if (ret) + error(1, 0, "bpf_object__pin %s", cfg_pin_path); + +} + +static void detach_program(void) +{ + char command[64]; + int ret; + + ret = bpf_prog_detach(0, BPF_FLOW_DISSECTOR); + if (ret) + error(1, 0, "bpf_prog_detach"); + + /* To unpin, it is necessary and sufficient to just remove this dir */ + sprintf(command, "rm -r %s", cfg_pin_path); + ret = system(command); + if (ret) + error(1, errno, command); +} + +static void parse_opts(int argc, char **argv) +{ + bool attach = false; + bool detach = false; + int c; + + while ((c = getopt(argc, argv, "adp:s:")) != -1) { + switch (c) { + case 'a': + if (detach) + error(1, 0, "attach/detach are exclusive"); + attach = true; + break; + case 'd': + if (attach) + error(1, 0, "attach/detach are exclusive"); + detach = true; + break; + case 'p': + if (cfg_path_name) + error(1, 0, "only one prog name can be given"); + + cfg_path_name = optarg; + break; + case 's': + if (cfg_section_name) + error(1, 0, "only one section can be given"); + + cfg_section_name = optarg; + break; + } + } + + if (detach) + cfg_attach = false; + + if (cfg_attach && !cfg_path_name) + error(1, 0, "must provide a path to the BPF program"); + + if (cfg_attach && !cfg_section_name) + error(1, 0, "must provide a section name"); +} + +int main(int argc, char **argv) +{ + parse_opts(argc, argv); + if (cfg_attach) + load_and_attach_program(); + else + detach_program(); + return 0; +} diff --git a/tools/testing/selftests/bpf/netcnt_common.h b/tools/testing/selftests/bpf/netcnt_common.h new file mode 100644 index 000000000000..81084c1c2c23 --- /dev/null +++ b/tools/testing/selftests/bpf/netcnt_common.h @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef __NETCNT_COMMON_H +#define __NETCNT_COMMON_H + +#include <linux/types.h> + +#define MAX_PERCPU_PACKETS 32 + +struct percpu_net_cnt { + __u64 packets; + __u64 bytes; + + __u64 prev_ts; + + __u64 prev_packets; + __u64 prev_bytes; +}; + +struct net_cnt { + __u64 packets; + __u64 bytes; +}; + +#endif diff --git a/tools/testing/selftests/bpf/netcnt_prog.c b/tools/testing/selftests/bpf/netcnt_prog.c new file mode 100644 index 000000000000..9f741e69cebe --- /dev/null +++ b/tools/testing/selftests/bpf/netcnt_prog.c @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <linux/version.h> + +#include "bpf_helpers.h" +#include "netcnt_common.h" + +#define MAX_BPS (3 * 1024 * 1024) + +#define REFRESH_TIME_NS 100000000 +#define NS_PER_SEC 1000000000 + +struct bpf_map_def SEC("maps") percpu_netcnt = { + .type = BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, + .key_size = sizeof(struct bpf_cgroup_storage_key), + .value_size = sizeof(struct percpu_net_cnt), +}; + +BPF_ANNOTATE_KV_PAIR(percpu_netcnt, struct bpf_cgroup_storage_key, + struct percpu_net_cnt); + +struct bpf_map_def SEC("maps") netcnt = { + .type = BPF_MAP_TYPE_CGROUP_STORAGE, + .key_size = sizeof(struct bpf_cgroup_storage_key), + .value_size = sizeof(struct net_cnt), +}; + +BPF_ANNOTATE_KV_PAIR(netcnt, struct bpf_cgroup_storage_key, + struct net_cnt); + +SEC("cgroup/skb") +int bpf_nextcnt(struct __sk_buff *skb) +{ + struct percpu_net_cnt *percpu_cnt; + char fmt[] = "%d %llu %llu\n"; + struct net_cnt *cnt; + __u64 ts, dt; + int ret; + + cnt = bpf_get_local_storage(&netcnt, 0); + percpu_cnt = bpf_get_local_storage(&percpu_netcnt, 0); + + percpu_cnt->packets++; + percpu_cnt->bytes += skb->len; + + if (percpu_cnt->packets > MAX_PERCPU_PACKETS) { + __sync_fetch_and_add(&cnt->packets, + percpu_cnt->packets); + percpu_cnt->packets = 0; + + __sync_fetch_and_add(&cnt->bytes, + percpu_cnt->bytes); + percpu_cnt->bytes = 0; + } + + ts = bpf_ktime_get_ns(); + dt = ts - percpu_cnt->prev_ts; + + dt *= MAX_BPS; + dt /= NS_PER_SEC; + + if (cnt->bytes + percpu_cnt->bytes - percpu_cnt->prev_bytes < dt) + ret = 1; + else + ret = 0; + + if (dt > REFRESH_TIME_NS) { + percpu_cnt->prev_ts = ts; + percpu_cnt->prev_packets = cnt->packets; + percpu_cnt->prev_bytes = cnt->bytes; + } + + return !!ret; +} + +char _license[] SEC("license") = "GPL"; +__u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/tools/testing/selftests/bpf/test_align.c b/tools/testing/selftests/bpf/test_align.c index 5f377ec53f2f..3c789d03b629 100644 --- a/tools/testing/selftests/bpf/test_align.c +++ b/tools/testing/selftests/bpf/test_align.c @@ -620,8 +620,8 @@ static int do_test_single(struct bpf_align_test *test) prog_len = probe_filter_length(prog); fd_prog = bpf_verify_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER, - prog, prog_len, 1, "GPL", 0, - bpf_vlog, sizeof(bpf_vlog), 2); + prog, prog_len, BPF_F_STRICT_ALIGNMENT, + "GPL", 0, bpf_vlog, sizeof(bpf_vlog), 2); if (fd_prog < 0 && test->result != REJECT) { printf("Failed to load program.\n"); printf("%s", bpf_vlog); diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c index 6b5cfeb7a9cc..91420fa83b08 100644 --- a/tools/testing/selftests/bpf/test_btf.c +++ b/tools/testing/selftests/bpf/test_btf.c @@ -4,6 +4,9 @@ #include <linux/bpf.h> #include <linux/btf.h> #include <linux/err.h> +#include <linux/kernel.h> +#include <linux/filter.h> +#include <linux/unistd.h> #include <bpf/bpf.h> #include <sys/resource.h> #include <libelf.h> @@ -21,6 +24,9 @@ #include "bpf_rlimit.h" #include "bpf_util.h" +#define MAX_INSNS 512 +#define MAX_SUBPROGS 16 + static uint32_t pass_cnt; static uint32_t error_cnt; static uint32_t skip_cnt; @@ -45,7 +51,6 @@ static int count_result(int err) return err; } -#define min(a, b) ((a) < (b) ? (a) : (b)) #define __printf(a, b) __attribute__((format(printf, a, b))) __printf(1, 2) @@ -60,8 +65,8 @@ static int __base_pr(const char *format, ...) return err; } -#define BTF_INFO_ENC(kind, root, vlen) \ - ((!!(root) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN)) +#define BTF_INFO_ENC(kind, kind_flag, vlen) \ + ((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN)) #define BTF_TYPE_ENC(name, info, size_or_type) \ (name), (info), (size_or_type) @@ -81,28 +86,44 @@ static int __base_pr(const char *format, ...) #define BTF_MEMBER_ENC(name, type, bits_offset) \ (name), (type), (bits_offset) #define BTF_ENUM_ENC(name, val) (name), (val) +#define BTF_MEMBER_OFFSET(bitfield_size, bits_offset) \ + ((bitfield_size) << 24 | (bits_offset)) #define BTF_TYPEDEF_ENC(name, type) \ BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0), type) -#define BTF_PTR_ENC(name, type) \ - BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), type) +#define BTF_PTR_ENC(type) \ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), type) + +#define BTF_CONST_ENC(type) \ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), type) + +#define BTF_FUNC_PROTO_ENC(ret_type, nargs) \ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, nargs), ret_type) + +#define BTF_FUNC_PROTO_ARG_ENC(name, type) \ + (name), (type) + +#define BTF_FUNC_ENC(name, func_proto) \ + BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), func_proto) #define BTF_END_RAW 0xdeadbeef #define NAME_TBD 0xdeadb33f -#define MAX_NR_RAW_TYPES 1024 +#define MAX_NR_RAW_U32 1024 #define BTF_LOG_BUF_SIZE 65535 static struct args { unsigned int raw_test_num; unsigned int file_test_num; unsigned int get_info_test_num; + unsigned int info_raw_test_num; bool raw_test; bool file_test; bool get_info_test; bool pprint_test; bool always_log; + bool info_raw_test; } args; static char btf_log_buf[BTF_LOG_BUF_SIZE]; @@ -118,7 +139,7 @@ struct btf_raw_test { const char *str_sec; const char *map_name; const char *err_str; - __u32 raw_types[MAX_NR_RAW_TYPES]; + __u32 raw_types[MAX_NR_RAW_U32]; __u32 str_sec_size; enum bpf_map_type map_type; __u32 key_size; @@ -130,12 +151,16 @@ struct btf_raw_test { bool map_create_err; bool ordered_map; bool lossless_map; + bool percpu_map; int hdr_len_delta; int type_off_delta; int str_off_delta; int str_len_delta; }; +#define BTF_STR_SEC(str) \ + .str_sec = str, .str_sec_size = sizeof(str) + static struct btf_raw_test raw_tests[] = { /* enum E { * E0, @@ -431,11 +456,11 @@ static struct btf_raw_test raw_tests[] = { /* const void* */ /* [3] */ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2), /* typedef const void * const_void_ptr */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 3), - /* struct A { */ /* [4] */ + BTF_TYPEDEF_ENC(NAME_TBD, 3), /* [4] */ + /* struct A { */ /* [5] */ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), sizeof(void *)), /* const_void_ptr m; */ - BTF_MEMBER_ENC(NAME_TBD, 3, 0), + BTF_MEMBER_ENC(NAME_TBD, 4, 0), /* } */ BTF_END_RAW, }, @@ -493,10 +518,10 @@ static struct btf_raw_test raw_tests[] = { BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0), /* const void* */ /* [3] */ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2), - /* typedef const void * const_void_ptr */ /* [4] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 3), - /* const_void_ptr[4] */ /* [5] */ - BTF_TYPE_ARRAY_ENC(3, 1, 4), + /* typedef const void * const_void_ptr */ + BTF_TYPEDEF_ENC(NAME_TBD, 3), /* [4] */ + /* const_void_ptr[4] */ + BTF_TYPE_ARRAY_ENC(4, 1, 4), /* [5] */ BTF_END_RAW, }, .str_sec = "\0const_void_ptr", @@ -1292,6 +1317,367 @@ static struct btf_raw_test raw_tests[] = { }, { + .descr = "typedef (invalid name, name_off = 0)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPEDEF_ENC(0, 1), /* [2] */ + BTF_END_RAW, + }, + .str_sec = "\0__int", + .str_sec_size = sizeof("\0__int"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "typedef_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "typedef (invalid name, invalid identifier)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPEDEF_ENC(NAME_TBD, 1), /* [2] */ + BTF_END_RAW, + }, + .str_sec = "\0__!int", + .str_sec_size = sizeof("\0__!int"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "typedef_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "ptr type (invalid name, name_off <> 0)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(NAME_TBD, + BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 1), /* [2] */ + BTF_END_RAW, + }, + .str_sec = "\0__int", + .str_sec_size = sizeof("\0__int"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "ptr_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "volatile type (invalid name, name_off <> 0)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(NAME_TBD, + BTF_INFO_ENC(BTF_KIND_VOLATILE, 0, 0), 1), /* [2] */ + BTF_END_RAW, + }, + .str_sec = "\0__int", + .str_sec_size = sizeof("\0__int"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "volatile_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "const type (invalid name, name_off <> 0)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(NAME_TBD, + BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 1), /* [2] */ + BTF_END_RAW, + }, + .str_sec = "\0__int", + .str_sec_size = sizeof("\0__int"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "const_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "restrict type (invalid name, name_off <> 0)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 1), /* [2] */ + BTF_TYPE_ENC(NAME_TBD, + BTF_INFO_ENC(BTF_KIND_RESTRICT, 0, 0), 2), /* [3] */ + BTF_END_RAW, + }, + .str_sec = "\0__int", + .str_sec_size = sizeof("\0__int"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "restrict_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "fwd type (invalid name, name_off = 0)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FWD, 0, 0), 0), /* [2] */ + BTF_END_RAW, + }, + .str_sec = "\0__skb", + .str_sec_size = sizeof("\0__skb"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "fwd_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "fwd type (invalid name, invalid identifier)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(NAME_TBD, + BTF_INFO_ENC(BTF_KIND_FWD, 0, 0), 0), /* [2] */ + BTF_END_RAW, + }, + .str_sec = "\0__!skb", + .str_sec_size = sizeof("\0__!skb"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "fwd_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "array type (invalid name, name_off <> 0)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(NAME_TBD, + BTF_INFO_ENC(BTF_KIND_ARRAY, 0, 0), 0), /* [2] */ + BTF_ARRAY_ENC(1, 1, 4), + BTF_END_RAW, + }, + .str_sec = "\0__skb", + .str_sec_size = sizeof("\0__skb"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "array_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "struct type (name_off = 0)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, + BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 4), /* [2] */ + BTF_MEMBER_ENC(NAME_TBD, 1, 0), + BTF_END_RAW, + }, + .str_sec = "\0A", + .str_sec_size = sizeof("\0A"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "struct type (invalid name, invalid identifier)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(NAME_TBD, + BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 4), /* [2] */ + BTF_MEMBER_ENC(NAME_TBD, 1, 0), + BTF_END_RAW, + }, + .str_sec = "\0A!\0B", + .str_sec_size = sizeof("\0A!\0B"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "struct member (name_off = 0)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, + BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 4), /* [2] */ + BTF_MEMBER_ENC(NAME_TBD, 1, 0), + BTF_END_RAW, + }, + .str_sec = "\0A", + .str_sec_size = sizeof("\0A"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "struct member (invalid name, invalid identifier)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(NAME_TBD, + BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 4), /* [2] */ + BTF_MEMBER_ENC(NAME_TBD, 1, 0), + BTF_END_RAW, + }, + .str_sec = "\0A\0B*", + .str_sec_size = sizeof("\0A\0B*"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "enum type (name_off = 0)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, + BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), + sizeof(int)), /* [2] */ + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_END_RAW, + }, + .str_sec = "\0A\0B", + .str_sec_size = sizeof("\0A\0B"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "enum_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "enum type (invalid name, invalid identifier)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(NAME_TBD, + BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), + sizeof(int)), /* [2] */ + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_END_RAW, + }, + .str_sec = "\0A!\0B", + .str_sec_size = sizeof("\0A!\0B"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "enum_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "enum member (invalid name, name_off = 0)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, + BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), + sizeof(int)), /* [2] */ + BTF_ENUM_ENC(0, 0), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "enum_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "enum member (invalid name, invalid identifier)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, + BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), + sizeof(int)), /* [2] */ + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_END_RAW, + }, + .str_sec = "\0A!", + .str_sec_size = sizeof("\0A!"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "enum_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, +{ .descr = "arraymap invalid btf key (a bit field)", .raw_types = { /* int */ /* [1] */ @@ -1373,6 +1759,951 @@ static struct btf_raw_test raw_tests[] = { .map_create_err = true, }, +{ + .descr = "func proto (int (*)(int, unsigned int))", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* int (*)(int, unsigned int) */ + BTF_FUNC_PROTO_ENC(1, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(0, 1), + BTF_FUNC_PROTO_ARG_ENC(0, 2), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "func proto (vararg)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* void (*)(int, unsigned int, ...) */ + BTF_FUNC_PROTO_ENC(0, 3), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(0, 1), + BTF_FUNC_PROTO_ARG_ENC(0, 2), + BTF_FUNC_PROTO_ARG_ENC(0, 0), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "func proto (vararg with name)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* void (*)(int a, unsigned int b, ... c) */ + BTF_FUNC_PROTO_ENC(0, 3), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 0), + BTF_END_RAW, + }, + .str_sec = "\0a\0b\0c", + .str_sec_size = sizeof("\0a\0b\0c"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid arg#3", +}, + +{ + .descr = "func proto (arg after vararg)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* void (*)(int a, ..., unsigned int b) */ + BTF_FUNC_PROTO_ENC(0, 3), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(0, 0), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + BTF_END_RAW, + }, + .str_sec = "\0a\0b", + .str_sec_size = sizeof("\0a\0b"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid arg#2", +}, + +{ + .descr = "func proto (CONST=>TYPEDEF=>PTR=>FUNC_PROTO)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* typedef void (*func_ptr)(int, unsigned int) */ + BTF_TYPEDEF_ENC(NAME_TBD, 5), /* [3] */ + /* const func_ptr */ + BTF_CONST_ENC(3), /* [4] */ + BTF_PTR_ENC(6), /* [5] */ + BTF_FUNC_PROTO_ENC(0, 2), /* [6] */ + BTF_FUNC_PROTO_ARG_ENC(0, 1), + BTF_FUNC_PROTO_ARG_ENC(0, 2), + BTF_END_RAW, + }, + .str_sec = "\0func_ptr", + .str_sec_size = sizeof("\0func_ptr"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "func proto (TYPEDEF=>FUNC_PROTO)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + BTF_TYPEDEF_ENC(NAME_TBD, 4), /* [3] */ + BTF_FUNC_PROTO_ENC(0, 2), /* [4] */ + BTF_FUNC_PROTO_ARG_ENC(0, 1), + BTF_FUNC_PROTO_ARG_ENC(0, 2), + BTF_END_RAW, + }, + .str_sec = "\0func_typedef", + .str_sec_size = sizeof("\0func_typedef"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "func proto (btf_resolve(arg))", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* void (*)(const void *) */ + BTF_FUNC_PROTO_ENC(0, 1), /* [2] */ + BTF_FUNC_PROTO_ARG_ENC(0, 3), + BTF_CONST_ENC(4), /* [3] */ + BTF_PTR_ENC(0), /* [4] */ + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "func proto (Not all arg has name)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* void (*)(int, unsigned int b) */ + BTF_FUNC_PROTO_ENC(0, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(0, 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + BTF_END_RAW, + }, + .str_sec = "\0b", + .str_sec_size = sizeof("\0b"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "func proto (Bad arg name_off)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* void (*)(int a, unsigned int <bad_name_off>) */ + BTF_FUNC_PROTO_ENC(0, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(0xffffffff, 2), + BTF_END_RAW, + }, + .str_sec = "\0a", + .str_sec_size = sizeof("\0a"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid arg#2", +}, + +{ + .descr = "func proto (Bad arg name)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* void (*)(int a, unsigned int !!!) */ + BTF_FUNC_PROTO_ENC(0, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + BTF_END_RAW, + }, + .str_sec = "\0a\0!!!", + .str_sec_size = sizeof("\0a\0!!!"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid arg#2", +}, + +{ + .descr = "func proto (Invalid return type)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* <bad_ret_type> (*)(int, unsigned int) */ + BTF_FUNC_PROTO_ENC(100, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(0, 1), + BTF_FUNC_PROTO_ARG_ENC(0, 2), + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid return type", +}, + +{ + .descr = "func proto (with func name)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* void func_proto(int, unsigned int) */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 2), 0), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(0, 1), + BTF_FUNC_PROTO_ARG_ENC(0, 2), + BTF_END_RAW, + }, + .str_sec = "\0func_proto", + .str_sec_size = sizeof("\0func_proto"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "func proto (const void arg)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* void (*)(const void) */ + BTF_FUNC_PROTO_ENC(0, 1), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(0, 4), + BTF_CONST_ENC(0), /* [4] */ + BTF_END_RAW, + }, + .str_sec = "", + .str_sec_size = sizeof(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid arg#1", +}, + +{ + .descr = "func (void func(int a, unsigned int b))", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* void (*)(int a, unsigned int b) */ + BTF_FUNC_PROTO_ENC(0, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + /* void func(int a, unsigned int b) */ + BTF_FUNC_ENC(NAME_TBD, 3), /* [4] */ + BTF_END_RAW, + }, + .str_sec = "\0a\0b\0func", + .str_sec_size = sizeof("\0a\0b\0func"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "func (No func name)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* void (*)(int a, unsigned int b) */ + BTF_FUNC_PROTO_ENC(0, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + /* void <no_name>(int a, unsigned int b) */ + BTF_FUNC_ENC(0, 3), /* [4] */ + BTF_END_RAW, + }, + .str_sec = "\0a\0b", + .str_sec_size = sizeof("\0a\0b"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "func (Invalid func name)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* void (*)(int a, unsigned int b) */ + BTF_FUNC_PROTO_ENC(0, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + /* void !!!(int a, unsigned int b) */ + BTF_FUNC_ENC(NAME_TBD, 3), /* [4] */ + BTF_END_RAW, + }, + .str_sec = "\0a\0b\0!!!", + .str_sec_size = sizeof("\0a\0b\0!!!"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid name", +}, + +{ + .descr = "func (Some arg has no name)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* void (*)(int a, unsigned int) */ + BTF_FUNC_PROTO_ENC(0, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(0, 2), + /* void func(int a, unsigned int) */ + BTF_FUNC_ENC(NAME_TBD, 3), /* [4] */ + BTF_END_RAW, + }, + .str_sec = "\0a\0func", + .str_sec_size = sizeof("\0a\0func"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid arg#2", +}, + +{ + .descr = "func (Non zero vlen)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ + /* void (*)(int a, unsigned int b) */ + BTF_FUNC_PROTO_ENC(0, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + /* void func(int a, unsigned int b) */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 2), 3), /* [4] */ + BTF_END_RAW, + }, + .str_sec = "\0a\0b\0func", + .str_sec_size = sizeof("\0a\0b\0func"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "vlen != 0", +}, + +{ + .descr = "func (Not referring to FUNC_PROTO)", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_FUNC_ENC(NAME_TBD, 1), /* [2] */ + BTF_END_RAW, + }, + .str_sec = "\0func", + .str_sec_size = sizeof("\0func"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid type_id", +}, + +{ + .descr = "invalid int kind_flag", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_INT, 1, 0), 4), /* [2] */ + BTF_INT_ENC(0, 0, 32), + BTF_END_RAW, + }, + BTF_STR_SEC(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "int_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid btf_info kind_flag", +}, + +{ + .descr = "invalid ptr kind_flag", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 1, 0), 1), /* [2] */ + BTF_END_RAW, + }, + BTF_STR_SEC(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "ptr_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid btf_info kind_flag", +}, + +{ + .descr = "invalid array kind_flag", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ARRAY, 1, 0), 0), /* [2] */ + BTF_ARRAY_ENC(1, 1, 1), + BTF_END_RAW, + }, + BTF_STR_SEC(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "array_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid btf_info kind_flag", +}, + +{ + .descr = "invalid enum kind_flag", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ENUM, 1, 1), 4), /* [2] */ + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_END_RAW, + }, + BTF_STR_SEC("\0A"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "enum_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid btf_info kind_flag", +}, + +{ + .descr = "valid fwd kind_flag", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(NAME_TBD, + BTF_INFO_ENC(BTF_KIND_FWD, 1, 0), 0), /* [2] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0A"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "fwd_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "invalid typedef kind_flag", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(NAME_TBD, + BTF_INFO_ENC(BTF_KIND_TYPEDEF, 1, 0), 1), /* [2] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0A"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "typedef_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid btf_info kind_flag", +}, + +{ + .descr = "invalid volatile kind_flag", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_VOLATILE, 1, 0), 1), /* [2] */ + BTF_END_RAW, + }, + BTF_STR_SEC(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "volatile_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid btf_info kind_flag", +}, + +{ + .descr = "invalid const kind_flag", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 1, 0), 1), /* [2] */ + BTF_END_RAW, + }, + BTF_STR_SEC(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "const_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid btf_info kind_flag", +}, + +{ + .descr = "invalid restrict kind_flag", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_RESTRICT, 1, 0), 1), /* [2] */ + BTF_END_RAW, + }, + BTF_STR_SEC(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "restrict_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid btf_info kind_flag", +}, + +{ + .descr = "invalid func kind_flag", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 0), 0), /* [2] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_FUNC, 1, 0), 2), /* [3] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0A"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid btf_info kind_flag", +}, + +{ + .descr = "invalid func_proto kind_flag", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 1, 0), 0), /* [2] */ + BTF_END_RAW, + }, + BTF_STR_SEC(""), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "func_proto_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid btf_info kind_flag", +}, + +{ + .descr = "valid struct, kind_flag, bitfield_size = 0", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 2), 8), /* [2] */ + BTF_MEMBER_ENC(NAME_TBD, 1, BTF_MEMBER_OFFSET(0, 0)), + BTF_MEMBER_ENC(NAME_TBD, 1, BTF_MEMBER_OFFSET(0, 32)), + BTF_END_RAW, + }, + BTF_STR_SEC("\0A\0B"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "valid struct, kind_flag, int member, bitfield_size != 0", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 2), 4), /* [2] */ + BTF_MEMBER_ENC(NAME_TBD, 1, BTF_MEMBER_OFFSET(4, 0)), + BTF_MEMBER_ENC(NAME_TBD, 1, BTF_MEMBER_OFFSET(4, 4)), + BTF_END_RAW, + }, + BTF_STR_SEC("\0A\0B"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "valid union, kind_flag, int member, bitfield_size != 0", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_UNION, 1, 2), 4), /* [2] */ + BTF_MEMBER_ENC(NAME_TBD, 1, BTF_MEMBER_OFFSET(4, 0)), + BTF_MEMBER_ENC(NAME_TBD, 1, BTF_MEMBER_OFFSET(4, 0)), + BTF_END_RAW, + }, + BTF_STR_SEC("\0A\0B"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "union_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "valid struct, kind_flag, enum member, bitfield_size != 0", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), /* [2] */ + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 2), 4),/* [3] */ + BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(4, 0)), + BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(4, 4)), + BTF_END_RAW, + }, + BTF_STR_SEC("\0A\0B\0C"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "valid union, kind_flag, enum member, bitfield_size != 0", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), /* [2] */ + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_UNION, 1, 2), 4), /* [3] */ + BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(4, 0)), + BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(4, 0)), + BTF_END_RAW, + }, + BTF_STR_SEC("\0A\0B\0C"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "union_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "valid struct, kind_flag, typedef member, bitfield_size != 0", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), /* [2] */ + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 2), 4),/* [3] */ + BTF_MEMBER_ENC(NAME_TBD, 4, BTF_MEMBER_OFFSET(4, 0)), + BTF_MEMBER_ENC(NAME_TBD, 5, BTF_MEMBER_OFFSET(4, 4)), + BTF_TYPEDEF_ENC(NAME_TBD, 1), /* [4] */ + BTF_TYPEDEF_ENC(NAME_TBD, 2), /* [5] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0A\0B\0C\0D\0E"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "valid union, kind_flag, typedef member, bitfield_size != 0", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), /* [2] */ + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_UNION, 1, 2), 4), /* [3] */ + BTF_MEMBER_ENC(NAME_TBD, 4, BTF_MEMBER_OFFSET(4, 0)), + BTF_MEMBER_ENC(NAME_TBD, 5, BTF_MEMBER_OFFSET(4, 0)), + BTF_TYPEDEF_ENC(NAME_TBD, 1), /* [4] */ + BTF_TYPEDEF_ENC(NAME_TBD, 2), /* [5] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0A\0B\0C\0D\0E"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "union_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "invalid struct, kind_flag, bitfield_size greater than struct size", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 2), 4), /* [2] */ + BTF_MEMBER_ENC(NAME_TBD, 1, BTF_MEMBER_OFFSET(20, 0)), + BTF_MEMBER_ENC(NAME_TBD, 1, BTF_MEMBER_OFFSET(20, 20)), + BTF_END_RAW, + }, + BTF_STR_SEC("\0A\0B"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Member exceeds struct_size", +}, + +{ + .descr = "invalid struct, kind_flag, bitfield base_type int not regular", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 20, 4), /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 2), 4), /* [3] */ + BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(20, 0)), + BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(20, 20)), + BTF_END_RAW, + }, + BTF_STR_SEC("\0A\0B"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid member base type", +}, + +{ + .descr = "invalid struct, kind_flag, base_type int not regular", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 12, 4), /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 2), 4), /* [3] */ + BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(8, 0)), + BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(8, 8)), + BTF_END_RAW, + }, + BTF_STR_SEC("\0A\0B"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid member base type", +}, + +{ + .descr = "invalid union, kind_flag, bitfield_size greater than struct size", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_UNION, 1, 2), 2), /* [2] */ + BTF_MEMBER_ENC(NAME_TBD, 1, BTF_MEMBER_OFFSET(8, 0)), + BTF_MEMBER_ENC(NAME_TBD, 1, BTF_MEMBER_OFFSET(20, 0)), + BTF_END_RAW, + }, + BTF_STR_SEC("\0A\0B"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "union_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Member exceeds struct_size", +}, + +{ + .descr = "invalid struct, kind_flag, int member, bitfield_size = 0, wrong byte alignment", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 2), 12), /* [3] */ + BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(0, 0)), + BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(0, 36)), + BTF_END_RAW, + }, + BTF_STR_SEC("\0A\0B"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid member offset", +}, + +{ + .descr = "invalid struct, kind_flag, enum member, bitfield_size = 0, wrong byte alignment", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), /* [2] */ + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 2), 12), /* [3] */ + BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(0, 0)), + BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(0, 36)), + BTF_END_RAW, + }, + BTF_STR_SEC("\0A\0B\0C"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, + .btf_load_err = true, + .err_str = "Invalid member offset", +}, + }; /* struct btf_raw_test raw_tests[] */ static const char *get_next_str(const char *start, const char *end) @@ -1380,11 +2711,11 @@ static const char *get_next_str(const char *start, const char *end) return start < end - 1 ? start + 1 : NULL; } -static int get_type_sec_size(const __u32 *raw_types) +static int get_raw_sec_size(const __u32 *raw_types) { int i; - for (i = MAX_NR_RAW_TYPES - 1; + for (i = MAX_NR_RAW_U32 - 1; i >= 0 && raw_types[i] != BTF_END_RAW; i--) ; @@ -1396,7 +2727,8 @@ static void *btf_raw_create(const struct btf_header *hdr, const __u32 *raw_types, const char *str, unsigned int str_sec_size, - unsigned int *btf_size) + unsigned int *btf_size, + const char **ret_next_str) { const char *next_str = str, *end_str = str + str_sec_size; unsigned int size_needed, offset; @@ -1405,7 +2737,7 @@ static void *btf_raw_create(const struct btf_header *hdr, uint32_t *ret_types; void *raw_btf; - type_sec_size = get_type_sec_size(raw_types); + type_sec_size = get_raw_sec_size(raw_types); if (CHECK(type_sec_size < 0, "Cannot get nr_raw_types")) return NULL; @@ -1444,6 +2776,8 @@ static void *btf_raw_create(const struct btf_header *hdr, ret_hdr->str_len = str_sec_size; *btf_size = size_needed; + if (ret_next_str) + *ret_next_str = next_str; return raw_btf; } @@ -1463,7 +2797,7 @@ static int do_test_raw(unsigned int test_num) test->raw_types, test->str_sec, test->str_sec_size, - &raw_btf_size); + &raw_btf_size, NULL); if (!raw_btf) return -1; @@ -1540,7 +2874,7 @@ static int test_raw(void) struct btf_get_info_test { const char *descr; const char *str_sec; - __u32 raw_types[MAX_NR_RAW_TYPES]; + __u32 raw_types[MAX_NR_RAW_U32]; __u32 str_sec_size; int btf_size_delta; int (*special_test)(unsigned int test_num); @@ -1620,7 +2954,7 @@ static int test_big_btf_info(unsigned int test_num) test->raw_types, test->str_sec, test->str_sec_size, - &raw_btf_size); + &raw_btf_size, NULL); if (!raw_btf) return -1; @@ -1704,7 +3038,7 @@ static int test_btf_id(unsigned int test_num) test->raw_types, test->str_sec, test->str_sec_size, - &raw_btf_size); + &raw_btf_size, NULL); if (!raw_btf) return -1; @@ -1842,7 +3176,7 @@ static int do_test_get_info(unsigned int test_num) test->raw_types, test->str_sec, test->str_sec_size, - &raw_btf_size); + &raw_btf_size, NULL); if (!raw_btf) return -1; @@ -1939,13 +3273,13 @@ static struct btf_file_test file_tests[] = { }, }; -static int file_has_btf_elf(const char *fn) +static int file_has_btf_elf(const char *fn, bool *has_btf_ext) { Elf_Scn *scn = NULL; GElf_Ehdr ehdr; + int ret = 0; int elf_fd; Elf *elf; - int ret; if (CHECK(elf_version(EV_CURRENT) == EV_NONE, "elf_version(EV_CURRENT) == EV_NONE")) @@ -1977,14 +3311,12 @@ static int file_has_btf_elf(const char *fn) } sh_name = elf_strptr(elf, ehdr.e_shstrndx, sh.sh_name); - if (!strcmp(sh_name, BTF_ELF_SEC)) { + if (!strcmp(sh_name, BTF_ELF_SEC)) ret = 1; - goto done; - } + if (!strcmp(sh_name, BTF_EXT_ELF_SEC)) + *has_btf_ext = true; } - ret = 0; - done: close(elf_fd); elf_end(elf); @@ -1994,15 +3326,24 @@ done: static int do_test_file(unsigned int test_num) { const struct btf_file_test *test = &file_tests[test_num - 1]; + const char *expected_fnames[] = {"_dummy_tracepoint", + "test_long_fname_1", + "test_long_fname_2"}; + struct bpf_prog_info info = {}; struct bpf_object *obj = NULL; + struct bpf_func_info *finfo; struct bpf_program *prog; + __u32 info_len, rec_size; + bool has_btf_ext = false; + struct btf *btf = NULL; + void *func_info = NULL; struct bpf_map *map; - int err; + int i, err, prog_fd; fprintf(stderr, "BTF libbpf test[%u] (%s): ", test_num, test->file); - err = file_has_btf_elf(test->file); + err = file_has_btf_elf(test->file, &has_btf_ext); if (err == -1) return err; @@ -2030,6 +3371,7 @@ static int do_test_file(unsigned int test_num) err = bpf_object__load(obj); if (CHECK(err < 0, "bpf_object__load: %d", err)) goto done; + prog_fd = bpf_program__fd(prog); map = bpf_object__find_map_by_name(obj, "btf_map"); if (CHECK(!map, "btf_map not found")) { @@ -2044,9 +3386,100 @@ static int do_test_file(unsigned int test_num) test->btf_kv_notfound)) goto done; + if (!has_btf_ext) + goto skip; + + /* get necessary program info */ + info_len = sizeof(struct bpf_prog_info); + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + + if (CHECK(err == -1, "invalid get info (1st) errno:%d", errno)) { + fprintf(stderr, "%s\n", btf_log_buf); + err = -1; + goto done; + } + if (CHECK(info.nr_func_info != 3, + "incorrect info.nr_func_info (1st) %d", + info.nr_func_info)) { + err = -1; + goto done; + } + rec_size = info.func_info_rec_size; + if (CHECK(rec_size != sizeof(struct bpf_func_info), + "incorrect info.func_info_rec_size (1st) %d\n", rec_size)) { + err = -1; + goto done; + } + + func_info = malloc(info.nr_func_info * rec_size); + if (CHECK(!func_info, "out of memory")) { + err = -1; + goto done; + } + + /* reset info to only retrieve func_info related data */ + memset(&info, 0, sizeof(info)); + info.nr_func_info = 3; + info.func_info_rec_size = rec_size; + info.func_info = ptr_to_u64(func_info); + + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + + if (CHECK(err == -1, "invalid get info (2nd) errno:%d", errno)) { + fprintf(stderr, "%s\n", btf_log_buf); + err = -1; + goto done; + } + if (CHECK(info.nr_func_info != 3, + "incorrect info.nr_func_info (2nd) %d", + info.nr_func_info)) { + err = -1; + goto done; + } + if (CHECK(info.func_info_rec_size != rec_size, + "incorrect info.func_info_rec_size (2nd) %d", + info.func_info_rec_size)) { + err = -1; + goto done; + } + + err = btf__get_from_id(info.btf_id, &btf); + if (CHECK(err, "cannot get btf from kernel, err: %d", err)) + goto done; + + /* check three functions */ + finfo = func_info; + for (i = 0; i < 3; i++) { + const struct btf_type *t; + const char *fname; + + t = btf__type_by_id(btf, finfo->type_id); + if (CHECK(!t, "btf__type_by_id failure: id %u", + finfo->type_id)) { + err = -1; + goto done; + } + + fname = btf__name_by_offset(btf, t->name_off); + err = strcmp(fname, expected_fnames[i]); + /* for the second and third functions in .text section, + * the compiler may order them either way. + */ + if (i && err) + err = strcmp(fname, expected_fnames[3 - i]); + if (CHECK(err, "incorrect fname %s", fname ? : "")) { + err = -1; + goto done; + } + + finfo = (void *)finfo + rec_size; + } + +skip: fprintf(stderr, "OK"); done: + free(func_info); bpf_object__close(obj); return err; } @@ -2090,9 +3523,12 @@ struct pprint_mapv { ENUM_TWO, ENUM_THREE, } aenum; + uint32_t ui32b; + uint32_t bits2c:2; }; -static struct btf_raw_test pprint_test_template = { +static struct btf_raw_test pprint_test_template[] = { +{ .raw_types = { /* unsighed char */ /* [1] */ BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 8, 1), @@ -2131,7 +3567,7 @@ static struct btf_raw_test pprint_test_template = { BTF_ENUM_ENC(NAME_TBD, 2), BTF_ENUM_ENC(NAME_TBD, 3), /* struct pprint_mapv */ /* [16] */ - BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 8), 32), + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 10), 40), BTF_MEMBER_ENC(NAME_TBD, 11, 0), /* uint32_t ui32 */ BTF_MEMBER_ENC(NAME_TBD, 10, 32), /* uint16_t ui16 */ BTF_MEMBER_ENC(NAME_TBD, 12, 64), /* int32_t si32 */ @@ -2140,15 +3576,148 @@ static struct btf_raw_test pprint_test_template = { BTF_MEMBER_ENC(NAME_TBD, 6, 126), /* unused_bits2b */ BTF_MEMBER_ENC(0, 14, 128), /* union (anon) */ BTF_MEMBER_ENC(NAME_TBD, 15, 192), /* aenum */ + BTF_MEMBER_ENC(NAME_TBD, 11, 224), /* uint32_t ui32b */ + BTF_MEMBER_ENC(NAME_TBD, 6, 256), /* bits2c */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum\0ui32b\0bits2c"), + .key_size = sizeof(unsigned int), + .value_size = sizeof(struct pprint_mapv), + .key_type_id = 3, /* unsigned int */ + .value_type_id = 16, /* struct pprint_mapv */ + .max_entries = 128 * 1024, +}, + +{ + /* this type will have the same type as the + * first .raw_types definition, but struct type will + * be encoded with kind_flag set. + */ + .raw_types = { + /* unsighed char */ /* [1] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 8, 1), + /* unsigned short */ /* [2] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 16, 2), + /* unsigned int */ /* [3] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 32, 4), + /* int */ /* [4] */ + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), + /* unsigned long long */ /* [5] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 64, 8), + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [6] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [7] */ + /* uint8_t[8] */ /* [8] */ + BTF_TYPE_ARRAY_ENC(9, 1, 8), + /* typedef unsigned char uint8_t */ /* [9] */ + BTF_TYPEDEF_ENC(NAME_TBD, 1), + /* typedef unsigned short uint16_t */ /* [10] */ + BTF_TYPEDEF_ENC(NAME_TBD, 2), + /* typedef unsigned int uint32_t */ /* [11] */ + BTF_TYPEDEF_ENC(NAME_TBD, 3), + /* typedef int int32_t */ /* [12] */ + BTF_TYPEDEF_ENC(NAME_TBD, 4), + /* typedef unsigned long long uint64_t *//* [13] */ + BTF_TYPEDEF_ENC(NAME_TBD, 5), + /* union (anon) */ /* [14] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_UNION, 0, 2), 8), + BTF_MEMBER_ENC(NAME_TBD, 13, 0),/* uint64_t ui64; */ + BTF_MEMBER_ENC(NAME_TBD, 8, 0), /* uint8_t ui8a[8]; */ + /* enum (anon) */ /* [15] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 4), 4), + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_ENUM_ENC(NAME_TBD, 1), + BTF_ENUM_ENC(NAME_TBD, 2), + BTF_ENUM_ENC(NAME_TBD, 3), + /* struct pprint_mapv */ /* [16] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 10), 40), + BTF_MEMBER_ENC(NAME_TBD, 11, BTF_MEMBER_OFFSET(0, 0)), /* uint32_t ui32 */ + BTF_MEMBER_ENC(NAME_TBD, 10, BTF_MEMBER_OFFSET(0, 32)), /* uint16_t ui16 */ + BTF_MEMBER_ENC(NAME_TBD, 12, BTF_MEMBER_OFFSET(0, 64)), /* int32_t si32 */ + BTF_MEMBER_ENC(NAME_TBD, 6, BTF_MEMBER_OFFSET(2, 96)), /* unused_bits2a */ + BTF_MEMBER_ENC(NAME_TBD, 7, BTF_MEMBER_OFFSET(28, 98)), /* bits28 */ + BTF_MEMBER_ENC(NAME_TBD, 6, BTF_MEMBER_OFFSET(2, 126)), /* unused_bits2b */ + BTF_MEMBER_ENC(0, 14, BTF_MEMBER_OFFSET(0, 128)), /* union (anon) */ + BTF_MEMBER_ENC(NAME_TBD, 15, BTF_MEMBER_OFFSET(0, 192)), /* aenum */ + BTF_MEMBER_ENC(NAME_TBD, 11, BTF_MEMBER_OFFSET(0, 224)), /* uint32_t ui32b */ + BTF_MEMBER_ENC(NAME_TBD, 6, BTF_MEMBER_OFFSET(2, 256)), /* bits2c */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum\0ui32b\0bits2c"), + .key_size = sizeof(unsigned int), + .value_size = sizeof(struct pprint_mapv), + .key_type_id = 3, /* unsigned int */ + .value_type_id = 16, /* struct pprint_mapv */ + .max_entries = 128 * 1024, +}, + +{ + /* this type will have the same layout as the + * first .raw_types definition. The struct type will + * be encoded with kind_flag set, bitfield members + * are added typedef/const/volatile, and bitfield members + * will have both int and enum types. + */ + .raw_types = { + /* unsighed char */ /* [1] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 8, 1), + /* unsigned short */ /* [2] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 16, 2), + /* unsigned int */ /* [3] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 32, 4), + /* int */ /* [4] */ + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), + /* unsigned long long */ /* [5] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 64, 8), + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [6] */ + BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [7] */ + /* uint8_t[8] */ /* [8] */ + BTF_TYPE_ARRAY_ENC(9, 1, 8), + /* typedef unsigned char uint8_t */ /* [9] */ + BTF_TYPEDEF_ENC(NAME_TBD, 1), + /* typedef unsigned short uint16_t */ /* [10] */ + BTF_TYPEDEF_ENC(NAME_TBD, 2), + /* typedef unsigned int uint32_t */ /* [11] */ + BTF_TYPEDEF_ENC(NAME_TBD, 3), + /* typedef int int32_t */ /* [12] */ + BTF_TYPEDEF_ENC(NAME_TBD, 4), + /* typedef unsigned long long uint64_t *//* [13] */ + BTF_TYPEDEF_ENC(NAME_TBD, 5), + /* union (anon) */ /* [14] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_UNION, 0, 2), 8), + BTF_MEMBER_ENC(NAME_TBD, 13, 0),/* uint64_t ui64; */ + BTF_MEMBER_ENC(NAME_TBD, 8, 0), /* uint8_t ui8a[8]; */ + /* enum (anon) */ /* [15] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 4), 4), + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_ENUM_ENC(NAME_TBD, 1), + BTF_ENUM_ENC(NAME_TBD, 2), + BTF_ENUM_ENC(NAME_TBD, 3), + /* struct pprint_mapv */ /* [16] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 10), 40), + BTF_MEMBER_ENC(NAME_TBD, 11, BTF_MEMBER_OFFSET(0, 0)), /* uint32_t ui32 */ + BTF_MEMBER_ENC(NAME_TBD, 10, BTF_MEMBER_OFFSET(0, 32)), /* uint16_t ui16 */ + BTF_MEMBER_ENC(NAME_TBD, 12, BTF_MEMBER_OFFSET(0, 64)), /* int32_t si32 */ + BTF_MEMBER_ENC(NAME_TBD, 17, BTF_MEMBER_OFFSET(2, 96)), /* unused_bits2a */ + BTF_MEMBER_ENC(NAME_TBD, 7, BTF_MEMBER_OFFSET(28, 98)), /* bits28 */ + BTF_MEMBER_ENC(NAME_TBD, 19, BTF_MEMBER_OFFSET(2, 126)),/* unused_bits2b */ + BTF_MEMBER_ENC(0, 14, BTF_MEMBER_OFFSET(0, 128)), /* union (anon) */ + BTF_MEMBER_ENC(NAME_TBD, 15, BTF_MEMBER_OFFSET(0, 192)), /* aenum */ + BTF_MEMBER_ENC(NAME_TBD, 11, BTF_MEMBER_OFFSET(0, 224)), /* uint32_t ui32b */ + BTF_MEMBER_ENC(NAME_TBD, 17, BTF_MEMBER_OFFSET(2, 256)), /* bits2c */ + /* typedef unsigned int ___int */ /* [17] */ + BTF_TYPEDEF_ENC(NAME_TBD, 18), + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_VOLATILE, 0, 0), 6), /* [18] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 15), /* [19] */ BTF_END_RAW, }, - .str_sec = "\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum", - .str_sec_size = sizeof("\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum"), + BTF_STR_SEC("\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum\0ui32b\0bits2c\0___int"), .key_size = sizeof(unsigned int), .value_size = sizeof(struct pprint_mapv), .key_type_id = 3, /* unsigned int */ .value_type_id = 16, /* struct pprint_mapv */ .max_entries = 128 * 1024, +}, + }; static struct btf_pprint_test_meta { @@ -2157,6 +3726,7 @@ static struct btf_pprint_test_meta { const char *map_name; bool ordered_map; bool lossless_map; + bool percpu_map; } pprint_tests_meta[] = { { .descr = "BTF pretty print array", @@ -2164,6 +3734,7 @@ static struct btf_pprint_test_meta { .map_name = "pprint_test_array", .ordered_map = true, .lossless_map = true, + .percpu_map = false, }, { @@ -2172,6 +3743,7 @@ static struct btf_pprint_test_meta { .map_name = "pprint_test_hash", .ordered_map = false, .lossless_map = true, + .percpu_map = false, }, { @@ -2180,30 +3752,85 @@ static struct btf_pprint_test_meta { .map_name = "pprint_test_lru_hash", .ordered_map = false, .lossless_map = false, + .percpu_map = false, +}, + +{ + .descr = "BTF pretty print percpu array", + .map_type = BPF_MAP_TYPE_PERCPU_ARRAY, + .map_name = "pprint_test_percpu_array", + .ordered_map = true, + .lossless_map = true, + .percpu_map = true, +}, + +{ + .descr = "BTF pretty print percpu hash", + .map_type = BPF_MAP_TYPE_PERCPU_HASH, + .map_name = "pprint_test_percpu_hash", + .ordered_map = false, + .lossless_map = true, + .percpu_map = true, +}, + +{ + .descr = "BTF pretty print lru percpu hash", + .map_type = BPF_MAP_TYPE_LRU_PERCPU_HASH, + .map_name = "pprint_test_lru_percpu_hash", + .ordered_map = false, + .lossless_map = false, + .percpu_map = true, }, }; -static void set_pprint_mapv(struct pprint_mapv *v, uint32_t i) +static void set_pprint_mapv(struct pprint_mapv *v, uint32_t i, + int num_cpus, int rounded_value_size) +{ + int cpu; + + for (cpu = 0; cpu < num_cpus; cpu++) { + v->ui32 = i + cpu; + v->si32 = -i; + v->unused_bits2a = 3; + v->bits28 = i; + v->unused_bits2b = 3; + v->ui64 = i; + v->aenum = i & 0x03; + v->ui32b = 4; + v->bits2c = 1; + v = (void *)v + rounded_value_size; + } +} + +static int check_line(const char *expected_line, int nexpected_line, + int expected_line_len, const char *line) { - v->ui32 = i; - v->si32 = -i; - v->unused_bits2a = 3; - v->bits28 = i; - v->unused_bits2b = 3; - v->ui64 = i; - v->aenum = i & 0x03; + if (CHECK(nexpected_line == expected_line_len, + "expected_line is too long")) + return -1; + + if (strcmp(expected_line, line)) { + fprintf(stderr, "unexpected pprint output\n"); + fprintf(stderr, "expected: %s", expected_line); + fprintf(stderr, " read: %s", line); + return -1; + } + + return 0; } -static int do_test_pprint(void) + +static int do_test_pprint(int test_num) { - const struct btf_raw_test *test = &pprint_test_template; + const struct btf_raw_test *test = &pprint_test_template[test_num]; struct bpf_create_map_attr create_attr = {}; + bool ordered_map, lossless_map, percpu_map; + int err, ret, num_cpus, rounded_value_size; + struct pprint_mapv *mapv = NULL; unsigned int key, nr_read_elems; - bool ordered_map, lossless_map; int map_fd = -1, btf_fd = -1; - struct pprint_mapv mapv = {}; unsigned int raw_btf_size; char expected_line[255]; FILE *pin_file = NULL; @@ -2212,12 +3839,11 @@ static int do_test_pprint(void) char *line = NULL; uint8_t *raw_btf; ssize_t nread; - int err, ret; - fprintf(stderr, "%s......", test->descr); + fprintf(stderr, "%s(#%d)......", test->descr, test_num); raw_btf = btf_raw_create(&hdr_tmpl, test->raw_types, test->str_sec, test->str_sec_size, - &raw_btf_size); + &raw_btf_size, NULL); if (!raw_btf) return -1; @@ -2261,9 +3887,18 @@ static int do_test_pprint(void) if (CHECK(err, "bpf_obj_pin(%s): errno:%d.", pin_path, errno)) goto done; + percpu_map = test->percpu_map; + num_cpus = percpu_map ? bpf_num_possible_cpus() : 1; + rounded_value_size = round_up(sizeof(struct pprint_mapv), 8); + mapv = calloc(num_cpus, rounded_value_size); + if (CHECK(!mapv, "mapv allocation failure")) { + err = -1; + goto done; + } + for (key = 0; key < test->max_entries; key++) { - set_pprint_mapv(&mapv, key); - bpf_map_update_elem(map_fd, &key, &mapv, 0); + set_pprint_mapv(mapv, key, num_cpus, rounded_value_size); + bpf_map_update_elem(map_fd, &key, mapv, 0); } pin_file = fopen(pin_path, "r"); @@ -2286,33 +3921,77 @@ static int do_test_pprint(void) ordered_map = test->ordered_map; lossless_map = test->lossless_map; do { + struct pprint_mapv *cmapv; ssize_t nexpected_line; unsigned int next_key; + int cpu; next_key = ordered_map ? nr_read_elems : atoi(line); - set_pprint_mapv(&mapv, next_key); - nexpected_line = snprintf(expected_line, sizeof(expected_line), - "%u: {%u,0,%d,0x%x,0x%x,0x%x,{%lu|[%u,%u,%u,%u,%u,%u,%u,%u]},%s}\n", - next_key, - mapv.ui32, mapv.si32, - mapv.unused_bits2a, mapv.bits28, mapv.unused_bits2b, - mapv.ui64, - mapv.ui8a[0], mapv.ui8a[1], mapv.ui8a[2], mapv.ui8a[3], - mapv.ui8a[4], mapv.ui8a[5], mapv.ui8a[6], mapv.ui8a[7], - pprint_enum_str[mapv.aenum]); - - if (CHECK(nexpected_line == sizeof(expected_line), - "expected_line is too long")) { - err = -1; - goto done; + set_pprint_mapv(mapv, next_key, num_cpus, rounded_value_size); + cmapv = mapv; + + for (cpu = 0; cpu < num_cpus; cpu++) { + if (percpu_map) { + /* for percpu map, the format looks like: + * <key>: { + * cpu0: <value_on_cpu0> + * cpu1: <value_on_cpu1> + * ... + * cpun: <value_on_cpun> + * } + * + * let us verify the line containing the key here. + */ + if (cpu == 0) { + nexpected_line = snprintf(expected_line, + sizeof(expected_line), + "%u: {\n", + next_key); + + err = check_line(expected_line, nexpected_line, + sizeof(expected_line), line); + if (err == -1) + goto done; + } + + /* read value@cpu */ + nread = getline(&line, &line_len, pin_file); + if (nread < 0) + break; + } + + nexpected_line = snprintf(expected_line, sizeof(expected_line), + "%s%u: {%u,0,%d,0x%x,0x%x,0x%x," + "{%lu|[%u,%u,%u,%u,%u,%u,%u,%u]},%s," + "%u,0x%x}\n", + percpu_map ? "\tcpu" : "", + percpu_map ? cpu : next_key, + cmapv->ui32, cmapv->si32, + cmapv->unused_bits2a, + cmapv->bits28, + cmapv->unused_bits2b, + cmapv->ui64, + cmapv->ui8a[0], cmapv->ui8a[1], + cmapv->ui8a[2], cmapv->ui8a[3], + cmapv->ui8a[4], cmapv->ui8a[5], + cmapv->ui8a[6], cmapv->ui8a[7], + pprint_enum_str[cmapv->aenum], + cmapv->ui32b, + cmapv->bits2c); + + err = check_line(expected_line, nexpected_line, + sizeof(expected_line), line); + if (err == -1) + goto done; + + cmapv = (void *)cmapv + rounded_value_size; } - if (strcmp(expected_line, line)) { - err = -1; - fprintf(stderr, "unexpected pprint output\n"); - fprintf(stderr, "expected: %s", expected_line); - fprintf(stderr, " read: %s", line); - goto done; + if (percpu_map) { + /* skip the last bracket for the percpu map */ + nread = getline(&line, &line_len, pin_file); + if (nread < 0) + break; } nread = getline(&line, &line_len, pin_file); @@ -2334,6 +4013,8 @@ static int do_test_pprint(void) err = 0; done: + if (mapv) + free(mapv); if (!err) fprintf(stderr, "OK"); if (*btf_log_buf && (err || args.always_log)) @@ -2355,29 +4036,940 @@ static int test_pprint(void) unsigned int i; int err = 0; + /* test various maps with the first test template */ for (i = 0; i < ARRAY_SIZE(pprint_tests_meta); i++) { - pprint_test_template.descr = pprint_tests_meta[i].descr; - pprint_test_template.map_type = pprint_tests_meta[i].map_type; - pprint_test_template.map_name = pprint_tests_meta[i].map_name; - pprint_test_template.ordered_map = pprint_tests_meta[i].ordered_map; - pprint_test_template.lossless_map = pprint_tests_meta[i].lossless_map; + pprint_test_template[0].descr = pprint_tests_meta[i].descr; + pprint_test_template[0].map_type = pprint_tests_meta[i].map_type; + pprint_test_template[0].map_name = pprint_tests_meta[i].map_name; + pprint_test_template[0].ordered_map = pprint_tests_meta[i].ordered_map; + pprint_test_template[0].lossless_map = pprint_tests_meta[i].lossless_map; + pprint_test_template[0].percpu_map = pprint_tests_meta[i].percpu_map; + + err |= count_result(do_test_pprint(0)); + } - err |= count_result(do_test_pprint()); + /* test rest test templates with the first map */ + for (i = 1; i < ARRAY_SIZE(pprint_test_template); i++) { + pprint_test_template[i].descr = pprint_tests_meta[0].descr; + pprint_test_template[i].map_type = pprint_tests_meta[0].map_type; + pprint_test_template[i].map_name = pprint_tests_meta[0].map_name; + pprint_test_template[i].ordered_map = pprint_tests_meta[0].ordered_map; + pprint_test_template[i].lossless_map = pprint_tests_meta[0].lossless_map; + pprint_test_template[i].percpu_map = pprint_tests_meta[0].percpu_map; + err |= count_result(do_test_pprint(i)); } return err; } +#define BPF_LINE_INFO_ENC(insn_off, file_off, line_off, line_num, line_col) \ + (insn_off), (file_off), (line_off), ((line_num) << 10 | ((line_col) & 0x3ff)) + +static struct prog_info_raw_test { + const char *descr; + const char *str_sec; + const char *err_str; + __u32 raw_types[MAX_NR_RAW_U32]; + __u32 str_sec_size; + struct bpf_insn insns[MAX_INSNS]; + __u32 prog_type; + __u32 func_info[MAX_SUBPROGS][2]; + __u32 func_info_rec_size; + __u32 func_info_cnt; + __u32 line_info[MAX_NR_RAW_U32]; + __u32 line_info_rec_size; + __u32 nr_jited_ksyms; + bool expected_prog_load_failure; +} info_raw_tests[] = { +{ + .descr = "func_type (main func + one sub)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 32, 4), /* [2] */ + BTF_FUNC_PROTO_ENC(1, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + BTF_FUNC_PROTO_ENC(1, 2), /* [4] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_ENC(NAME_TBD, 3), /* [5] */ + BTF_FUNC_ENC(NAME_TBD, 4), /* [6] */ + BTF_END_RAW, + }, + .str_sec = "\0int\0unsigned int\0a\0b\0c\0d\0funcA\0funcB", + .str_sec_size = sizeof("\0int\0unsigned int\0a\0b\0c\0d\0funcA\0funcB"), + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info = { {0, 5}, {3, 6} }, + .func_info_rec_size = 8, + .func_info_cnt = 2, + .line_info = { BTF_END_RAW }, +}, + +{ + .descr = "func_type (Incorrect func_info_rec_size)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 32, 4), /* [2] */ + BTF_FUNC_PROTO_ENC(1, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + BTF_FUNC_PROTO_ENC(1, 2), /* [4] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_ENC(NAME_TBD, 3), /* [5] */ + BTF_FUNC_ENC(NAME_TBD, 4), /* [6] */ + BTF_END_RAW, + }, + .str_sec = "\0int\0unsigned int\0a\0b\0c\0d\0funcA\0funcB", + .str_sec_size = sizeof("\0int\0unsigned int\0a\0b\0c\0d\0funcA\0funcB"), + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info = { {0, 5}, {3, 6} }, + .func_info_rec_size = 4, + .func_info_cnt = 2, + .line_info = { BTF_END_RAW }, + .expected_prog_load_failure = true, +}, + +{ + .descr = "func_type (Incorrect func_info_cnt)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 32, 4), /* [2] */ + BTF_FUNC_PROTO_ENC(1, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + BTF_FUNC_PROTO_ENC(1, 2), /* [4] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_ENC(NAME_TBD, 3), /* [5] */ + BTF_FUNC_ENC(NAME_TBD, 4), /* [6] */ + BTF_END_RAW, + }, + .str_sec = "\0int\0unsigned int\0a\0b\0c\0d\0funcA\0funcB", + .str_sec_size = sizeof("\0int\0unsigned int\0a\0b\0c\0d\0funcA\0funcB"), + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info = { {0, 5}, {3, 6} }, + .func_info_rec_size = 8, + .func_info_cnt = 1, + .line_info = { BTF_END_RAW }, + .expected_prog_load_failure = true, +}, + +{ + .descr = "func_type (Incorrect bpf_func_info.insn_off)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 32, 4), /* [2] */ + BTF_FUNC_PROTO_ENC(1, 2), /* [3] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + BTF_FUNC_PROTO_ENC(1, 2), /* [4] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 2), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_ENC(NAME_TBD, 3), /* [5] */ + BTF_FUNC_ENC(NAME_TBD, 4), /* [6] */ + BTF_END_RAW, + }, + .str_sec = "\0int\0unsigned int\0a\0b\0c\0d\0funcA\0funcB", + .str_sec_size = sizeof("\0int\0unsigned int\0a\0b\0c\0d\0funcA\0funcB"), + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info = { {0, 5}, {2, 6} }, + .func_info_rec_size = 8, + .func_info_cnt = 2, + .line_info = { BTF_END_RAW }, + .expected_prog_load_failure = true, +}, + +{ + .descr = "line_info (No subprog)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0int a=1;\0int b=2;\0return a + b;\0return a + b;"), + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_MOV64_IMM(BPF_REG_1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 0, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(1, 0, NAME_TBD, 2, 9), + BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 3, 8), + BPF_LINE_INFO_ENC(3, 0, NAME_TBD, 4, 7), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 1, +}, + +{ + .descr = "line_info (No subprog. insn_off >= prog->len)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0int a=1;\0int b=2;\0return a + b;\0return a + b;"), + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_MOV64_IMM(BPF_REG_1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 0, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(1, 0, NAME_TBD, 2, 9), + BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 3, 8), + BPF_LINE_INFO_ENC(3, 0, NAME_TBD, 4, 7), + BPF_LINE_INFO_ENC(4, 0, 0, 5, 6), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 1, + .err_str = "line_info[4].insn_off", + .expected_prog_load_failure = true, +}, + +{ + .descr = "line_info (Zero bpf insn code)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 64, 8), /* [2] */ + BTF_TYPEDEF_ENC(NAME_TBD, 2), /* [3] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0unsigned long\0u64\0u64 a=1;\0return a;"), + .insns = { + BPF_LD_IMM64(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 0, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(1, 0, 0, 2, 9), + BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 3, 8), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 1, + .err_str = "Invalid insn code at line_info[1]", + .expected_prog_load_failure = true, +}, + +{ + .descr = "line_info (No subprog. zero tailing line_info", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0int a=1;\0int b=2;\0return a + b;\0return a + b;"), + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_MOV64_IMM(BPF_REG_1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 0, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), 0, + BPF_LINE_INFO_ENC(1, 0, NAME_TBD, 2, 9), 0, + BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 3, 8), 0, + BPF_LINE_INFO_ENC(3, 0, NAME_TBD, 4, 7), 0, + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info) + sizeof(__u32), + .nr_jited_ksyms = 1, +}, + +{ + .descr = "line_info (No subprog. nonzero tailing line_info)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0int a=1;\0int b=2;\0return a + b;\0return a + b;"), + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_MOV64_IMM(BPF_REG_1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 0, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), 0, + BPF_LINE_INFO_ENC(1, 0, NAME_TBD, 2, 9), 0, + BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 3, 8), 0, + BPF_LINE_INFO_ENC(3, 0, NAME_TBD, 4, 7), 1, + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info) + sizeof(__u32), + .nr_jited_ksyms = 1, + .err_str = "nonzero tailing record in line_info", + .expected_prog_load_failure = true, +}, + +{ + .descr = "line_info (subprog)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0int a=1+1;\0return func(a);\0b+=1;\0return b;"), + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_CALL_REL(1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 0, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 2, 9), + BPF_LINE_INFO_ENC(5, 0, NAME_TBD, 3, 8), + BPF_LINE_INFO_ENC(7, 0, NAME_TBD, 4, 7), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 2, +}, + +{ + .descr = "line_info (subprog + func_info)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_FUNC_PROTO_ENC(1, 1), /* [2] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_ENC(NAME_TBD, 2), /* [3] */ + BTF_FUNC_ENC(NAME_TBD, 2), /* [4] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0x\0sub\0main\0int a=1+1;\0return func(a);\0b+=1;\0return b;"), + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_CALL_REL(1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 2, + .func_info_rec_size = 8, + .func_info = { {0, 4}, {5, 3} }, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 2, 9), + BPF_LINE_INFO_ENC(5, 0, NAME_TBD, 3, 8), + BPF_LINE_INFO_ENC(7, 0, NAME_TBD, 4, 7), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 2, +}, + +{ + .descr = "line_info (subprog. missing 1st func line info)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0int a=1+1;\0return func(a);\0b+=1;\0return b;"), + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_CALL_REL(1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 0, + .line_info = { + BPF_LINE_INFO_ENC(1, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 2, 9), + BPF_LINE_INFO_ENC(5, 0, NAME_TBD, 3, 8), + BPF_LINE_INFO_ENC(7, 0, NAME_TBD, 4, 7), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 2, + .err_str = "missing bpf_line_info for func#0", + .expected_prog_load_failure = true, +}, + +{ + .descr = "line_info (subprog. missing 2nd func line info)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0int a=1+1;\0return func(a);\0b+=1;\0return b;"), + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_CALL_REL(1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 0, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 2, 9), + BPF_LINE_INFO_ENC(6, 0, NAME_TBD, 3, 8), + BPF_LINE_INFO_ENC(7, 0, NAME_TBD, 4, 7), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 2, + .err_str = "missing bpf_line_info for func#1", + .expected_prog_load_failure = true, +}, + +{ + .descr = "line_info (subprog. unordered insn offset)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0int a=1+1;\0return func(a);\0b+=1;\0return b;"), + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_CALL_REL(1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 0, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(5, 0, NAME_TBD, 2, 9), + BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 3, 8), + BPF_LINE_INFO_ENC(7, 0, NAME_TBD, 4, 7), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 2, + .err_str = "Invalid line_info[2].insn_off", + .expected_prog_load_failure = true, +}, + +}; + +static size_t probe_prog_length(const struct bpf_insn *fp) +{ + size_t len; + + for (len = MAX_INSNS - 1; len > 0; --len) + if (fp[len].code != 0 || fp[len].imm != 0) + break; + return len + 1; +} + +static __u32 *patch_name_tbd(const __u32 *raw_u32, + const char *str, __u32 str_off, + unsigned int str_sec_size, + unsigned int *ret_size) +{ + int i, raw_u32_size = get_raw_sec_size(raw_u32); + const char *end_str = str + str_sec_size; + const char *next_str = str + str_off; + __u32 *new_u32 = NULL; + + if (raw_u32_size == -1) + return ERR_PTR(-EINVAL); + + if (!raw_u32_size) { + *ret_size = 0; + return NULL; + } + + new_u32 = malloc(raw_u32_size); + if (!new_u32) + return ERR_PTR(-ENOMEM); + + for (i = 0; i < raw_u32_size / sizeof(raw_u32[0]); i++) { + if (raw_u32[i] == NAME_TBD) { + next_str = get_next_str(next_str, end_str); + if (CHECK(!next_str, "Error in getting next_str\n")) { + free(new_u32); + return ERR_PTR(-EINVAL); + } + new_u32[i] = next_str - str; + next_str += strlen(next_str); + } else { + new_u32[i] = raw_u32[i]; + } + } + + *ret_size = raw_u32_size; + return new_u32; +} + +static int test_get_finfo(const struct prog_info_raw_test *test, + int prog_fd) +{ + struct bpf_prog_info info = {}; + struct bpf_func_info *finfo; + __u32 info_len, rec_size, i; + void *func_info = NULL; + int err; + + /* get necessary lens */ + info_len = sizeof(struct bpf_prog_info); + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + if (CHECK(err == -1, "invalid get info (1st) errno:%d", errno)) { + fprintf(stderr, "%s\n", btf_log_buf); + return -1; + } + if (CHECK(info.nr_func_info != test->func_info_cnt, + "incorrect info.nr_func_info (1st) %d", + info.nr_func_info)) { + return -1; + } + + rec_size = info.func_info_rec_size; + if (CHECK(rec_size != sizeof(struct bpf_func_info), + "incorrect info.func_info_rec_size (1st) %d", rec_size)) { + return -1; + } + + if (!info.nr_func_info) + return 0; + + func_info = malloc(info.nr_func_info * rec_size); + if (CHECK(!func_info, "out of memory")) + return -1; + + /* reset info to only retrieve func_info related data */ + memset(&info, 0, sizeof(info)); + info.nr_func_info = test->func_info_cnt; + info.func_info_rec_size = rec_size; + info.func_info = ptr_to_u64(func_info); + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + if (CHECK(err == -1, "invalid get info (2nd) errno:%d", errno)) { + fprintf(stderr, "%s\n", btf_log_buf); + err = -1; + goto done; + } + if (CHECK(info.nr_func_info != test->func_info_cnt, + "incorrect info.nr_func_info (2nd) %d", + info.nr_func_info)) { + err = -1; + goto done; + } + if (CHECK(info.func_info_rec_size != rec_size, + "incorrect info.func_info_rec_size (2nd) %d", + info.func_info_rec_size)) { + err = -1; + goto done; + } + + finfo = func_info; + for (i = 0; i < test->func_info_cnt; i++) { + if (CHECK(finfo->type_id != test->func_info[i][1], + "incorrect func_type %u expected %u", + finfo->type_id, test->func_info[i][1])) { + err = -1; + goto done; + } + finfo = (void *)finfo + rec_size; + } + + err = 0; + +done: + free(func_info); + return err; +} + +static int test_get_linfo(const struct prog_info_raw_test *test, + const void *patched_linfo, + __u32 cnt, int prog_fd) +{ + __u32 i, info_len, nr_jited_ksyms, nr_jited_func_lens; + __u64 *jited_linfo = NULL, *jited_ksyms = NULL; + __u32 rec_size, jited_rec_size, jited_cnt; + struct bpf_line_info *linfo = NULL; + __u32 cur_func_len, ksyms_found; + struct bpf_prog_info info = {}; + __u32 *jited_func_lens = NULL; + __u64 cur_func_ksyms; + int err; + + jited_cnt = cnt; + rec_size = sizeof(*linfo); + jited_rec_size = sizeof(*jited_linfo); + if (test->nr_jited_ksyms) + nr_jited_ksyms = test->nr_jited_ksyms; + else + nr_jited_ksyms = test->func_info_cnt; + nr_jited_func_lens = nr_jited_ksyms; + + info_len = sizeof(struct bpf_prog_info); + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + if (CHECK(err == -1, "err:%d errno:%d", err, errno)) { + err = -1; + goto done; + } + + if (!info.jited_prog_len) { + /* prog is not jited */ + jited_cnt = 0; + nr_jited_ksyms = 1; + nr_jited_func_lens = 1; + } + + if (CHECK(info.nr_line_info != cnt || + info.nr_jited_line_info != jited_cnt || + info.nr_jited_ksyms != nr_jited_ksyms || + info.nr_jited_func_lens != nr_jited_func_lens || + (!info.nr_line_info && info.nr_jited_line_info), + "info: nr_line_info:%u(expected:%u) nr_jited_line_info:%u(expected:%u) nr_jited_ksyms:%u(expected:%u) nr_jited_func_lens:%u(expected:%u)", + info.nr_line_info, cnt, + info.nr_jited_line_info, jited_cnt, + info.nr_jited_ksyms, nr_jited_ksyms, + info.nr_jited_func_lens, nr_jited_func_lens)) { + err = -1; + goto done; + } + + if (CHECK(info.line_info_rec_size != sizeof(struct bpf_line_info) || + info.jited_line_info_rec_size != sizeof(__u64), + "info: line_info_rec_size:%u(userspace expected:%u) jited_line_info_rec_size:%u(userspace expected:%u)", + info.line_info_rec_size, rec_size, + info.jited_line_info_rec_size, jited_rec_size)) { + err = -1; + goto done; + } + + if (!cnt) + return 0; + + rec_size = info.line_info_rec_size; + jited_rec_size = info.jited_line_info_rec_size; + + memset(&info, 0, sizeof(info)); + + linfo = calloc(cnt, rec_size); + if (CHECK(!linfo, "!linfo")) { + err = -1; + goto done; + } + info.nr_line_info = cnt; + info.line_info_rec_size = rec_size; + info.line_info = ptr_to_u64(linfo); + + if (jited_cnt) { + jited_linfo = calloc(jited_cnt, jited_rec_size); + jited_ksyms = calloc(nr_jited_ksyms, sizeof(*jited_ksyms)); + jited_func_lens = calloc(nr_jited_func_lens, + sizeof(*jited_func_lens)); + if (CHECK(!jited_linfo || !jited_ksyms || !jited_func_lens, + "jited_linfo:%p jited_ksyms:%p jited_func_lens:%p", + jited_linfo, jited_ksyms, jited_func_lens)) { + err = -1; + goto done; + } + + info.nr_jited_line_info = jited_cnt; + info.jited_line_info_rec_size = jited_rec_size; + info.jited_line_info = ptr_to_u64(jited_linfo); + info.nr_jited_ksyms = nr_jited_ksyms; + info.jited_ksyms = ptr_to_u64(jited_ksyms); + info.nr_jited_func_lens = nr_jited_func_lens; + info.jited_func_lens = ptr_to_u64(jited_func_lens); + } + + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + + /* + * Only recheck the info.*line_info* fields. + * Other fields are not the concern of this test. + */ + if (CHECK(err == -1 || + info.nr_line_info != cnt || + (jited_cnt && !info.jited_line_info) || + info.nr_jited_line_info != jited_cnt || + info.line_info_rec_size != rec_size || + info.jited_line_info_rec_size != jited_rec_size, + "err:%d errno:%d info: nr_line_info:%u(expected:%u) nr_jited_line_info:%u(expected:%u) line_info_rec_size:%u(expected:%u) jited_linfo_rec_size:%u(expected:%u) line_info:%p jited_line_info:%p", + err, errno, + info.nr_line_info, cnt, + info.nr_jited_line_info, jited_cnt, + info.line_info_rec_size, rec_size, + info.jited_line_info_rec_size, jited_rec_size, + (void *)(long)info.line_info, + (void *)(long)info.jited_line_info)) { + err = -1; + goto done; + } + + CHECK(linfo[0].insn_off, "linfo[0].insn_off:%u", + linfo[0].insn_off); + for (i = 1; i < cnt; i++) { + const struct bpf_line_info *expected_linfo; + + expected_linfo = patched_linfo + (i * test->line_info_rec_size); + if (CHECK(linfo[i].insn_off <= linfo[i - 1].insn_off, + "linfo[%u].insn_off:%u <= linfo[%u].insn_off:%u", + i, linfo[i].insn_off, + i - 1, linfo[i - 1].insn_off)) { + err = -1; + goto done; + } + if (CHECK(linfo[i].file_name_off != expected_linfo->file_name_off || + linfo[i].line_off != expected_linfo->line_off || + linfo[i].line_col != expected_linfo->line_col, + "linfo[%u] (%u, %u, %u) != (%u, %u, %u)", i, + linfo[i].file_name_off, + linfo[i].line_off, + linfo[i].line_col, + expected_linfo->file_name_off, + expected_linfo->line_off, + expected_linfo->line_col)) { + err = -1; + goto done; + } + } + + if (!jited_cnt) { + fprintf(stderr, "not jited. skipping jited_line_info check. "); + err = 0; + goto done; + } + + if (CHECK(jited_linfo[0] != jited_ksyms[0], + "jited_linfo[0]:%lx != jited_ksyms[0]:%lx", + (long)(jited_linfo[0]), (long)(jited_ksyms[0]))) { + err = -1; + goto done; + } + + ksyms_found = 1; + cur_func_len = jited_func_lens[0]; + cur_func_ksyms = jited_ksyms[0]; + for (i = 1; i < jited_cnt; i++) { + if (ksyms_found < nr_jited_ksyms && + jited_linfo[i] == jited_ksyms[ksyms_found]) { + cur_func_ksyms = jited_ksyms[ksyms_found]; + cur_func_len = jited_ksyms[ksyms_found]; + ksyms_found++; + continue; + } + + if (CHECK(jited_linfo[i] <= jited_linfo[i - 1], + "jited_linfo[%u]:%lx <= jited_linfo[%u]:%lx", + i, (long)jited_linfo[i], + i - 1, (long)(jited_linfo[i - 1]))) { + err = -1; + goto done; + } + + if (CHECK(jited_linfo[i] - cur_func_ksyms > cur_func_len, + "jited_linfo[%u]:%lx - %lx > %u", + i, (long)jited_linfo[i], (long)cur_func_ksyms, + cur_func_len)) { + err = -1; + goto done; + } + } + + if (CHECK(ksyms_found != nr_jited_ksyms, + "ksyms_found:%u != nr_jited_ksyms:%u", + ksyms_found, nr_jited_ksyms)) { + err = -1; + goto done; + } + + err = 0; + +done: + free(linfo); + free(jited_linfo); + free(jited_ksyms); + free(jited_func_lens); + return err; +} + +static int do_test_info_raw(unsigned int test_num) +{ + const struct prog_info_raw_test *test = &info_raw_tests[test_num - 1]; + unsigned int raw_btf_size, linfo_str_off, linfo_size; + int btf_fd = -1, prog_fd = -1, err = 0; + void *raw_btf, *patched_linfo = NULL; + const char *ret_next_str; + union bpf_attr attr = {}; + + fprintf(stderr, "BTF prog info raw test[%u] (%s): ", test_num, test->descr); + raw_btf = btf_raw_create(&hdr_tmpl, test->raw_types, + test->str_sec, test->str_sec_size, + &raw_btf_size, &ret_next_str); + + if (!raw_btf) + return -1; + + *btf_log_buf = '\0'; + btf_fd = bpf_load_btf(raw_btf, raw_btf_size, + btf_log_buf, BTF_LOG_BUF_SIZE, + args.always_log); + free(raw_btf); + + if (CHECK(btf_fd == -1, "invalid btf_fd errno:%d", errno)) { + err = -1; + goto done; + } + + if (*btf_log_buf && args.always_log) + fprintf(stderr, "\n%s", btf_log_buf); + *btf_log_buf = '\0'; + + linfo_str_off = ret_next_str - test->str_sec; + patched_linfo = patch_name_tbd(test->line_info, + test->str_sec, linfo_str_off, + test->str_sec_size, &linfo_size); + if (IS_ERR(patched_linfo)) { + fprintf(stderr, "error in creating raw bpf_line_info"); + err = -1; + goto done; + } + + attr.prog_type = test->prog_type; + attr.insns = ptr_to_u64(test->insns); + attr.insn_cnt = probe_prog_length(test->insns); + attr.license = ptr_to_u64("GPL"); + attr.prog_btf_fd = btf_fd; + attr.func_info_rec_size = test->func_info_rec_size; + attr.func_info_cnt = test->func_info_cnt; + attr.func_info = ptr_to_u64(test->func_info); + attr.log_buf = ptr_to_u64(btf_log_buf); + attr.log_size = BTF_LOG_BUF_SIZE; + attr.log_level = 1; + if (linfo_size) { + attr.line_info_rec_size = test->line_info_rec_size; + attr.line_info = ptr_to_u64(patched_linfo); + attr.line_info_cnt = linfo_size / attr.line_info_rec_size; + } + + prog_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr)); + err = ((prog_fd == -1) != test->expected_prog_load_failure); + if (CHECK(err, "prog_fd:%d expected_prog_load_failure:%u errno:%d", + prog_fd, test->expected_prog_load_failure, errno) || + CHECK(test->err_str && !strstr(btf_log_buf, test->err_str), + "expected err_str:%s", test->err_str)) { + err = -1; + goto done; + } + + if (prog_fd == -1) + goto done; + + err = test_get_finfo(test, prog_fd); + if (err) + goto done; + + err = test_get_linfo(test, patched_linfo, attr.line_info_cnt, prog_fd); + if (err) + goto done; + +done: + if (!err) + fprintf(stderr, "OK"); + + if (*btf_log_buf && (err || args.always_log)) + fprintf(stderr, "\n%s", btf_log_buf); + + if (btf_fd != -1) + close(btf_fd); + if (prog_fd != -1) + close(prog_fd); + + if (!IS_ERR(patched_linfo)) + free(patched_linfo); + + return err; +} + +static int test_info_raw(void) +{ + unsigned int i; + int err = 0; + + if (args.info_raw_test_num) + return count_result(do_test_info_raw(args.info_raw_test_num)); + + for (i = 1; i <= ARRAY_SIZE(info_raw_tests); i++) + err |= count_result(do_test_info_raw(i)); + + return err; +} + static void usage(const char *cmd) { - fprintf(stderr, "Usage: %s [-l] [[-r test_num (1 - %zu)] | [-g test_num (1 - %zu)] | [-f test_num (1 - %zu)] | [-p]]\n", + fprintf(stderr, "Usage: %s [-l] [[-r btf_raw_test_num (1 - %zu)] |\n" + "\t[-g btf_get_info_test_num (1 - %zu)] |\n" + "\t[-f btf_file_test_num (1 - %zu)] |\n" + "\t[-k btf_prog_info_raw_test_num (1 - %zu)] |\n" + "\t[-p (pretty print test)]]\n", cmd, ARRAY_SIZE(raw_tests), ARRAY_SIZE(get_info_tests), - ARRAY_SIZE(file_tests)); + ARRAY_SIZE(file_tests), ARRAY_SIZE(info_raw_tests)); } static int parse_args(int argc, char **argv) { - const char *optstr = "lpf:r:g:"; + const char *optstr = "lpk:f:r:g:"; int opt; while ((opt = getopt(argc, argv, optstr)) != -1) { @@ -2400,6 +4992,10 @@ static int parse_args(int argc, char **argv) case 'p': args.pprint_test = true; break; + case 'k': + args.info_raw_test_num = atoi(optarg); + args.info_raw_test = true; + break; case 'h': usage(argv[0]); exit(0); @@ -2433,6 +5029,14 @@ static int parse_args(int argc, char **argv) return -1; } + if (args.info_raw_test_num && + (args.info_raw_test_num < 1 || + args.info_raw_test_num > ARRAY_SIZE(info_raw_tests))) { + fprintf(stderr, "BTF prog info raw test number must be [1 - %zu]\n", + ARRAY_SIZE(info_raw_tests)); + return -1; + } + return 0; } @@ -2465,13 +5069,17 @@ int main(int argc, char **argv) if (args.pprint_test) err |= test_pprint(); + if (args.info_raw_test) + err |= test_info_raw(); + if (args.raw_test || args.get_info_test || args.file_test || - args.pprint_test) + args.pprint_test || args.info_raw_test) goto done; err |= test_raw(); err |= test_get_info(); err |= test_file(); + err |= test_info_raw(); done: print_summary(); diff --git a/tools/testing/selftests/bpf/test_btf_haskv.c b/tools/testing/selftests/bpf/test_btf_haskv.c index b21b876f475d..e5c79fe0ffdb 100644 --- a/tools/testing/selftests/bpf/test_btf_haskv.c +++ b/tools/testing/selftests/bpf/test_btf_haskv.c @@ -24,8 +24,8 @@ struct dummy_tracepoint_args { struct sock *sock; }; -SEC("dummy_tracepoint") -int _dummy_tracepoint(struct dummy_tracepoint_args *arg) +__attribute__((noinline)) +static int test_long_fname_2(struct dummy_tracepoint_args *arg) { struct ipv_counts *counts; int key = 0; @@ -42,4 +42,16 @@ int _dummy_tracepoint(struct dummy_tracepoint_args *arg) return 0; } +__attribute__((noinline)) +static int test_long_fname_1(struct dummy_tracepoint_args *arg) +{ + return test_long_fname_2(arg); +} + +SEC("dummy_tracepoint") +int _dummy_tracepoint(struct dummy_tracepoint_args *arg) +{ + return test_long_fname_1(arg); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_btf_nokv.c b/tools/testing/selftests/bpf/test_btf_nokv.c index 0ed8e088eebf..434188c37774 100644 --- a/tools/testing/selftests/bpf/test_btf_nokv.c +++ b/tools/testing/selftests/bpf/test_btf_nokv.c @@ -22,8 +22,8 @@ struct dummy_tracepoint_args { struct sock *sock; }; -SEC("dummy_tracepoint") -int _dummy_tracepoint(struct dummy_tracepoint_args *arg) +__attribute__((noinline)) +static int test_long_fname_2(struct dummy_tracepoint_args *arg) { struct ipv_counts *counts; int key = 0; @@ -40,4 +40,16 @@ int _dummy_tracepoint(struct dummy_tracepoint_args *arg) return 0; } +__attribute__((noinline)) +static int test_long_fname_1(struct dummy_tracepoint_args *arg) +{ + return test_long_fname_2(arg); +} + +SEC("dummy_tracepoint") +int _dummy_tracepoint(struct dummy_tracepoint_args *arg) +{ + return test_long_fname_1(arg); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_cgroup_storage.c b/tools/testing/selftests/bpf/test_cgroup_storage.c index 4e196e3bfecf..2fc4625c1a15 100644 --- a/tools/testing/selftests/bpf/test_cgroup_storage.c +++ b/tools/testing/selftests/bpf/test_cgroup_storage.c @@ -4,6 +4,7 @@ #include <linux/filter.h> #include <stdio.h> #include <stdlib.h> +#include <sys/sysinfo.h> #include "bpf_rlimit.h" #include "cgroup_helpers.h" @@ -15,6 +16,14 @@ char bpf_log_buf[BPF_LOG_BUF_SIZE]; int main(int argc, char **argv) { struct bpf_insn prog[] = { + BPF_LD_MAP_FD(BPF_REG_1, 0), /* percpu map fd */ + BPF_MOV64_IMM(BPF_REG_2, 0), /* flags, not used */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_local_storage), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 0x1), + BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_3, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), /* map fd */ BPF_MOV64_IMM(BPF_REG_2, 0), /* flags, not used */ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, @@ -28,9 +37,18 @@ int main(int argc, char **argv) }; size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn); int error = EXIT_FAILURE; - int map_fd, prog_fd, cgroup_fd; + int map_fd, percpu_map_fd, prog_fd, cgroup_fd; struct bpf_cgroup_storage_key key; unsigned long long value; + unsigned long long *percpu_value; + int cpu, nproc; + + nproc = get_nprocs_conf(); + percpu_value = malloc(sizeof(*percpu_value) * nproc); + if (!percpu_value) { + printf("Not enough memory for per-cpu area (%d cpus)\n", nproc); + goto err; + } map_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_STORAGE, sizeof(key), sizeof(value), 0, 0); @@ -39,7 +57,15 @@ int main(int argc, char **argv) goto out; } - prog[0].imm = map_fd; + percpu_map_fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, + sizeof(key), sizeof(value), 0, 0); + if (percpu_map_fd < 0) { + printf("Failed to create map: %s\n", strerror(errno)); + goto out; + } + + prog[0].imm = percpu_map_fd; + prog[7].imm = map_fd; prog_fd = bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB, prog, insns_cnt, "GPL", 0, bpf_log_buf, BPF_LOG_BUF_SIZE); @@ -55,7 +81,7 @@ int main(int argc, char **argv) /* Create a cgroup, get fd, and join it */ cgroup_fd = create_and_get_cgroup(TEST_CGROUP); - if (!cgroup_fd) { + if (cgroup_fd < 0) { printf("Failed to create test cgroup\n"); goto err; } @@ -77,7 +103,15 @@ int main(int argc, char **argv) } if (bpf_map_lookup_elem(map_fd, &key, &value)) { - printf("Failed to lookup cgroup storage\n"); + printf("Failed to lookup cgroup storage 0\n"); + goto err; + } + + for (cpu = 0; cpu < nproc; cpu++) + percpu_value[cpu] = 1000; + + if (bpf_map_update_elem(percpu_map_fd, &key, percpu_value, 0)) { + printf("Failed to update the data in the cgroup storage\n"); goto err; } @@ -120,11 +154,31 @@ int main(int argc, char **argv) goto err; } + /* Check the final value of the counter in the percpu local storage */ + + for (cpu = 0; cpu < nproc; cpu++) + percpu_value[cpu] = 0; + + if (bpf_map_lookup_elem(percpu_map_fd, &key, percpu_value)) { + printf("Failed to lookup the per-cpu cgroup storage\n"); + goto err; + } + + value = 0; + for (cpu = 0; cpu < nproc; cpu++) + value += percpu_value[cpu]; + + if (value != nproc * 1000 + 6) { + printf("Unexpected data in the per-cpu cgroup storage\n"); + goto err; + } + error = 0; printf("test_cgroup_storage:PASS\n"); err: cleanup_cgroup_environment(); + free(percpu_value); out: return error; diff --git a/tools/testing/selftests/bpf/test_dev_cgroup.c b/tools/testing/selftests/bpf/test_dev_cgroup.c index 9c8b50bac7e0..76e4993b7c16 100644 --- a/tools/testing/selftests/bpf/test_dev_cgroup.c +++ b/tools/testing/selftests/bpf/test_dev_cgroup.c @@ -43,7 +43,7 @@ int main(int argc, char **argv) /* Create a cgroup, get fd, and join it */ cgroup_fd = create_and_get_cgroup(TEST_CGROUP); - if (!cgroup_fd) { + if (cgroup_fd < 0) { printf("Failed to create test cgroup\n"); goto err; } diff --git a/tools/testing/selftests/bpf/test_flow_dissector.c b/tools/testing/selftests/bpf/test_flow_dissector.c new file mode 100644 index 000000000000..12b784afba31 --- /dev/null +++ b/tools/testing/selftests/bpf/test_flow_dissector.c @@ -0,0 +1,782 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Inject packets with all sorts of encapsulation into the kernel. + * + * IPv4/IPv6 outer layer 3 + * GRE/GUE/BARE outer layer 4, where bare is IPIP/SIT/IPv4-in-IPv6/.. + * IPv4/IPv6 inner layer 3 + */ + +#define _GNU_SOURCE + +#include <stddef.h> +#include <arpa/inet.h> +#include <asm/byteorder.h> +#include <error.h> +#include <errno.h> +#include <linux/if_packet.h> +#include <linux/if_ether.h> +#include <linux/if_packet.h> +#include <linux/ipv6.h> +#include <netinet/ip.h> +#include <netinet/in.h> +#include <netinet/udp.h> +#include <poll.h> +#include <stdbool.h> +#include <stdlib.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <sys/types.h> +#include <unistd.h> + +#define CFG_PORT_INNER 8000 + +/* Add some protocol definitions that do not exist in userspace */ + +struct grehdr { + uint16_t unused; + uint16_t protocol; +} __attribute__((packed)); + +struct guehdr { + union { + struct { +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u8 hlen:5, + control:1, + version:2; +#elif defined (__BIG_ENDIAN_BITFIELD) + __u8 version:2, + control:1, + hlen:5; +#else +#error "Please fix <asm/byteorder.h>" +#endif + __u8 proto_ctype; + __be16 flags; + }; + __be32 word; + }; +}; + +static uint8_t cfg_dsfield_inner; +static uint8_t cfg_dsfield_outer; +static uint8_t cfg_encap_proto; +static bool cfg_expect_failure = false; +static int cfg_l3_extra = AF_UNSPEC; /* optional SIT prefix */ +static int cfg_l3_inner = AF_UNSPEC; +static int cfg_l3_outer = AF_UNSPEC; +static int cfg_num_pkt = 10; +static int cfg_num_secs = 0; +static char cfg_payload_char = 'a'; +static int cfg_payload_len = 100; +static int cfg_port_gue = 6080; +static bool cfg_only_rx; +static bool cfg_only_tx; +static int cfg_src_port = 9; + +static char buf[ETH_DATA_LEN]; + +#define INIT_ADDR4(name, addr4, port) \ + static struct sockaddr_in name = { \ + .sin_family = AF_INET, \ + .sin_port = __constant_htons(port), \ + .sin_addr.s_addr = __constant_htonl(addr4), \ + }; + +#define INIT_ADDR6(name, addr6, port) \ + static struct sockaddr_in6 name = { \ + .sin6_family = AF_INET6, \ + .sin6_port = __constant_htons(port), \ + .sin6_addr = addr6, \ + }; + +INIT_ADDR4(in_daddr4, INADDR_LOOPBACK, CFG_PORT_INNER) +INIT_ADDR4(in_saddr4, INADDR_LOOPBACK + 2, 0) +INIT_ADDR4(out_daddr4, INADDR_LOOPBACK, 0) +INIT_ADDR4(out_saddr4, INADDR_LOOPBACK + 1, 0) +INIT_ADDR4(extra_daddr4, INADDR_LOOPBACK, 0) +INIT_ADDR4(extra_saddr4, INADDR_LOOPBACK + 1, 0) + +INIT_ADDR6(in_daddr6, IN6ADDR_LOOPBACK_INIT, CFG_PORT_INNER) +INIT_ADDR6(in_saddr6, IN6ADDR_LOOPBACK_INIT, 0) +INIT_ADDR6(out_daddr6, IN6ADDR_LOOPBACK_INIT, 0) +INIT_ADDR6(out_saddr6, IN6ADDR_LOOPBACK_INIT, 0) +INIT_ADDR6(extra_daddr6, IN6ADDR_LOOPBACK_INIT, 0) +INIT_ADDR6(extra_saddr6, IN6ADDR_LOOPBACK_INIT, 0) + +static unsigned long util_gettime(void) +{ + struct timeval tv; + + gettimeofday(&tv, NULL); + return (tv.tv_sec * 1000) + (tv.tv_usec / 1000); +} + +static void util_printaddr(const char *msg, struct sockaddr *addr) +{ + unsigned long off = 0; + char nbuf[INET6_ADDRSTRLEN]; + + switch (addr->sa_family) { + case PF_INET: + off = __builtin_offsetof(struct sockaddr_in, sin_addr); + break; + case PF_INET6: + off = __builtin_offsetof(struct sockaddr_in6, sin6_addr); + break; + default: + error(1, 0, "printaddr: unsupported family %u\n", + addr->sa_family); + } + + if (!inet_ntop(addr->sa_family, ((void *) addr) + off, nbuf, + sizeof(nbuf))) + error(1, errno, "inet_ntop"); + + fprintf(stderr, "%s: %s\n", msg, nbuf); +} + +static unsigned long add_csum_hword(const uint16_t *start, int num_u16) +{ + unsigned long sum = 0; + int i; + + for (i = 0; i < num_u16; i++) + sum += start[i]; + + return sum; +} + +static uint16_t build_ip_csum(const uint16_t *start, int num_u16, + unsigned long sum) +{ + sum += add_csum_hword(start, num_u16); + + while (sum >> 16) + sum = (sum & 0xffff) + (sum >> 16); + + return ~sum; +} + +static void build_ipv4_header(void *header, uint8_t proto, + uint32_t src, uint32_t dst, + int payload_len, uint8_t tos) +{ + struct iphdr *iph = header; + + iph->ihl = 5; + iph->version = 4; + iph->tos = tos; + iph->ttl = 8; + iph->tot_len = htons(sizeof(*iph) + payload_len); + iph->id = htons(1337); + iph->protocol = proto; + iph->saddr = src; + iph->daddr = dst; + iph->check = build_ip_csum((void *) iph, iph->ihl << 1, 0); +} + +static void ipv6_set_dsfield(struct ipv6hdr *ip6h, uint8_t dsfield) +{ + uint16_t val, *ptr = (uint16_t *)ip6h; + + val = ntohs(*ptr); + val &= 0xF00F; + val |= ((uint16_t) dsfield) << 4; + *ptr = htons(val); +} + +static void build_ipv6_header(void *header, uint8_t proto, + struct sockaddr_in6 *src, + struct sockaddr_in6 *dst, + int payload_len, uint8_t dsfield) +{ + struct ipv6hdr *ip6h = header; + + ip6h->version = 6; + ip6h->payload_len = htons(payload_len); + ip6h->nexthdr = proto; + ip6h->hop_limit = 8; + ipv6_set_dsfield(ip6h, dsfield); + + memcpy(&ip6h->saddr, &src->sin6_addr, sizeof(ip6h->saddr)); + memcpy(&ip6h->daddr, &dst->sin6_addr, sizeof(ip6h->daddr)); +} + +static uint16_t build_udp_v4_csum(const struct iphdr *iph, + const struct udphdr *udph, + int num_words) +{ + unsigned long pseudo_sum; + int num_u16 = sizeof(iph->saddr); /* halfwords: twice byte len */ + + pseudo_sum = add_csum_hword((void *) &iph->saddr, num_u16); + pseudo_sum += htons(IPPROTO_UDP); + pseudo_sum += udph->len; + return build_ip_csum((void *) udph, num_words, pseudo_sum); +} + +static uint16_t build_udp_v6_csum(const struct ipv6hdr *ip6h, + const struct udphdr *udph, + int num_words) +{ + unsigned long pseudo_sum; + int num_u16 = sizeof(ip6h->saddr); /* halfwords: twice byte len */ + + pseudo_sum = add_csum_hword((void *) &ip6h->saddr, num_u16); + pseudo_sum += htons(ip6h->nexthdr); + pseudo_sum += ip6h->payload_len; + return build_ip_csum((void *) udph, num_words, pseudo_sum); +} + +static void build_udp_header(void *header, int payload_len, + uint16_t dport, int family) +{ + struct udphdr *udph = header; + int len = sizeof(*udph) + payload_len; + + udph->source = htons(cfg_src_port); + udph->dest = htons(dport); + udph->len = htons(len); + udph->check = 0; + if (family == AF_INET) + udph->check = build_udp_v4_csum(header - sizeof(struct iphdr), + udph, len >> 1); + else + udph->check = build_udp_v6_csum(header - sizeof(struct ipv6hdr), + udph, len >> 1); +} + +static void build_gue_header(void *header, uint8_t proto) +{ + struct guehdr *gueh = header; + + gueh->proto_ctype = proto; +} + +static void build_gre_header(void *header, uint16_t proto) +{ + struct grehdr *greh = header; + + greh->protocol = htons(proto); +} + +static int l3_length(int family) +{ + if (family == AF_INET) + return sizeof(struct iphdr); + else + return sizeof(struct ipv6hdr); +} + +static int build_packet(void) +{ + int ol3_len = 0, ol4_len = 0, il3_len = 0, il4_len = 0; + int el3_len = 0; + + if (cfg_l3_extra) + el3_len = l3_length(cfg_l3_extra); + + /* calculate header offsets */ + if (cfg_encap_proto) { + ol3_len = l3_length(cfg_l3_outer); + + if (cfg_encap_proto == IPPROTO_GRE) + ol4_len = sizeof(struct grehdr); + else if (cfg_encap_proto == IPPROTO_UDP) + ol4_len = sizeof(struct udphdr) + sizeof(struct guehdr); + } + + il3_len = l3_length(cfg_l3_inner); + il4_len = sizeof(struct udphdr); + + if (el3_len + ol3_len + ol4_len + il3_len + il4_len + cfg_payload_len >= + sizeof(buf)) + error(1, 0, "packet too large\n"); + + /* + * Fill packet from inside out, to calculate correct checksums. + * But create ip before udp headers, as udp uses ip for pseudo-sum. + */ + memset(buf + el3_len + ol3_len + ol4_len + il3_len + il4_len, + cfg_payload_char, cfg_payload_len); + + /* add zero byte for udp csum padding */ + buf[el3_len + ol3_len + ol4_len + il3_len + il4_len + cfg_payload_len] = 0; + + switch (cfg_l3_inner) { + case PF_INET: + build_ipv4_header(buf + el3_len + ol3_len + ol4_len, + IPPROTO_UDP, + in_saddr4.sin_addr.s_addr, + in_daddr4.sin_addr.s_addr, + il4_len + cfg_payload_len, + cfg_dsfield_inner); + break; + case PF_INET6: + build_ipv6_header(buf + el3_len + ol3_len + ol4_len, + IPPROTO_UDP, + &in_saddr6, &in_daddr6, + il4_len + cfg_payload_len, + cfg_dsfield_inner); + break; + } + + build_udp_header(buf + el3_len + ol3_len + ol4_len + il3_len, + cfg_payload_len, CFG_PORT_INNER, cfg_l3_inner); + + if (!cfg_encap_proto) + return il3_len + il4_len + cfg_payload_len; + + switch (cfg_l3_outer) { + case PF_INET: + build_ipv4_header(buf + el3_len, cfg_encap_proto, + out_saddr4.sin_addr.s_addr, + out_daddr4.sin_addr.s_addr, + ol4_len + il3_len + il4_len + cfg_payload_len, + cfg_dsfield_outer); + break; + case PF_INET6: + build_ipv6_header(buf + el3_len, cfg_encap_proto, + &out_saddr6, &out_daddr6, + ol4_len + il3_len + il4_len + cfg_payload_len, + cfg_dsfield_outer); + break; + } + + switch (cfg_encap_proto) { + case IPPROTO_UDP: + build_gue_header(buf + el3_len + ol3_len + ol4_len - + sizeof(struct guehdr), + cfg_l3_inner == PF_INET ? IPPROTO_IPIP + : IPPROTO_IPV6); + build_udp_header(buf + el3_len + ol3_len, + sizeof(struct guehdr) + il3_len + il4_len + + cfg_payload_len, + cfg_port_gue, cfg_l3_outer); + break; + case IPPROTO_GRE: + build_gre_header(buf + el3_len + ol3_len, + cfg_l3_inner == PF_INET ? ETH_P_IP + : ETH_P_IPV6); + break; + } + + switch (cfg_l3_extra) { + case PF_INET: + build_ipv4_header(buf, + cfg_l3_outer == PF_INET ? IPPROTO_IPIP + : IPPROTO_IPV6, + extra_saddr4.sin_addr.s_addr, + extra_daddr4.sin_addr.s_addr, + ol3_len + ol4_len + il3_len + il4_len + + cfg_payload_len, 0); + break; + case PF_INET6: + build_ipv6_header(buf, + cfg_l3_outer == PF_INET ? IPPROTO_IPIP + : IPPROTO_IPV6, + &extra_saddr6, &extra_daddr6, + ol3_len + ol4_len + il3_len + il4_len + + cfg_payload_len, 0); + break; + } + + return el3_len + ol3_len + ol4_len + il3_len + il4_len + + cfg_payload_len; +} + +/* sender transmits encapsulated over RAW or unencap'd over UDP */ +static int setup_tx(void) +{ + int family, fd, ret; + + if (cfg_l3_extra) + family = cfg_l3_extra; + else if (cfg_l3_outer) + family = cfg_l3_outer; + else + family = cfg_l3_inner; + + fd = socket(family, SOCK_RAW, IPPROTO_RAW); + if (fd == -1) + error(1, errno, "socket tx"); + + if (cfg_l3_extra) { + if (cfg_l3_extra == PF_INET) + ret = connect(fd, (void *) &extra_daddr4, + sizeof(extra_daddr4)); + else + ret = connect(fd, (void *) &extra_daddr6, + sizeof(extra_daddr6)); + if (ret) + error(1, errno, "connect tx"); + } else if (cfg_l3_outer) { + /* connect to destination if not encapsulated */ + if (cfg_l3_outer == PF_INET) + ret = connect(fd, (void *) &out_daddr4, + sizeof(out_daddr4)); + else + ret = connect(fd, (void *) &out_daddr6, + sizeof(out_daddr6)); + if (ret) + error(1, errno, "connect tx"); + } else { + /* otherwise using loopback */ + if (cfg_l3_inner == PF_INET) + ret = connect(fd, (void *) &in_daddr4, + sizeof(in_daddr4)); + else + ret = connect(fd, (void *) &in_daddr6, + sizeof(in_daddr6)); + if (ret) + error(1, errno, "connect tx"); + } + + return fd; +} + +/* receiver reads unencapsulated UDP */ +static int setup_rx(void) +{ + int fd, ret; + + fd = socket(cfg_l3_inner, SOCK_DGRAM, 0); + if (fd == -1) + error(1, errno, "socket rx"); + + if (cfg_l3_inner == PF_INET) + ret = bind(fd, (void *) &in_daddr4, sizeof(in_daddr4)); + else + ret = bind(fd, (void *) &in_daddr6, sizeof(in_daddr6)); + if (ret) + error(1, errno, "bind rx"); + + return fd; +} + +static int do_tx(int fd, const char *pkt, int len) +{ + int ret; + + ret = write(fd, pkt, len); + if (ret == -1) + error(1, errno, "send"); + if (ret != len) + error(1, errno, "send: len (%d < %d)\n", ret, len); + + return 1; +} + +static int do_poll(int fd, short events, int timeout) +{ + struct pollfd pfd; + int ret; + + pfd.fd = fd; + pfd.events = events; + + ret = poll(&pfd, 1, timeout); + if (ret == -1) + error(1, errno, "poll"); + if (ret && !(pfd.revents & POLLIN)) + error(1, errno, "poll: unexpected event 0x%x\n", pfd.revents); + + return ret; +} + +static int do_rx(int fd) +{ + char rbuf; + int ret, num = 0; + + while (1) { + ret = recv(fd, &rbuf, 1, MSG_DONTWAIT); + if (ret == -1 && errno == EAGAIN) + break; + if (ret == -1) + error(1, errno, "recv"); + if (rbuf != cfg_payload_char) + error(1, 0, "recv: payload mismatch"); + num++; + }; + + return num; +} + +static int do_main(void) +{ + unsigned long tstop, treport, tcur; + int fdt = -1, fdr = -1, len, tx = 0, rx = 0; + + if (!cfg_only_tx) + fdr = setup_rx(); + if (!cfg_only_rx) + fdt = setup_tx(); + + len = build_packet(); + + tcur = util_gettime(); + treport = tcur + 1000; + tstop = tcur + (cfg_num_secs * 1000); + + while (1) { + if (!cfg_only_rx) + tx += do_tx(fdt, buf, len); + + if (!cfg_only_tx) + rx += do_rx(fdr); + + if (cfg_num_secs) { + tcur = util_gettime(); + if (tcur >= tstop) + break; + if (tcur >= treport) { + fprintf(stderr, "pkts: tx=%u rx=%u\n", tx, rx); + tx = 0; + rx = 0; + treport = tcur + 1000; + } + } else { + if (tx == cfg_num_pkt) + break; + } + } + + /* read straggler packets, if any */ + if (rx < tx) { + tstop = util_gettime() + 100; + while (rx < tx) { + tcur = util_gettime(); + if (tcur >= tstop) + break; + + do_poll(fdr, POLLIN, tstop - tcur); + rx += do_rx(fdr); + } + } + + fprintf(stderr, "pkts: tx=%u rx=%u\n", tx, rx); + + if (fdr != -1 && close(fdr)) + error(1, errno, "close rx"); + if (fdt != -1 && close(fdt)) + error(1, errno, "close tx"); + + /* + * success (== 0) only if received all packets + * unless failure is expected, in which case none must arrive. + */ + if (cfg_expect_failure) + return rx != 0; + else + return rx != tx; +} + + +static void __attribute__((noreturn)) usage(const char *filepath) +{ + fprintf(stderr, "Usage: %s [-e gre|gue|bare|none] [-i 4|6] [-l len] " + "[-O 4|6] [-o 4|6] [-n num] [-t secs] [-R] [-T] " + "[-s <osrc> [-d <odst>] [-S <isrc>] [-D <idst>] " + "[-x <otos>] [-X <itos>] [-f <isport>] [-F]\n", + filepath); + exit(1); +} + +static void parse_addr(int family, void *addr, const char *optarg) +{ + int ret; + + ret = inet_pton(family, optarg, addr); + if (ret == -1) + error(1, errno, "inet_pton"); + if (ret == 0) + error(1, 0, "inet_pton: bad string"); +} + +static void parse_addr4(struct sockaddr_in *addr, const char *optarg) +{ + parse_addr(AF_INET, &addr->sin_addr, optarg); +} + +static void parse_addr6(struct sockaddr_in6 *addr, const char *optarg) +{ + parse_addr(AF_INET6, &addr->sin6_addr, optarg); +} + +static int parse_protocol_family(const char *filepath, const char *optarg) +{ + if (!strcmp(optarg, "4")) + return PF_INET; + if (!strcmp(optarg, "6")) + return PF_INET6; + + usage(filepath); +} + +static void parse_opts(int argc, char **argv) +{ + int c; + + while ((c = getopt(argc, argv, "d:D:e:f:Fhi:l:n:o:O:Rs:S:t:Tx:X:")) != -1) { + switch (c) { + case 'd': + if (cfg_l3_outer == AF_UNSPEC) + error(1, 0, "-d must be preceded by -o"); + if (cfg_l3_outer == AF_INET) + parse_addr4(&out_daddr4, optarg); + else + parse_addr6(&out_daddr6, optarg); + break; + case 'D': + if (cfg_l3_inner == AF_UNSPEC) + error(1, 0, "-D must be preceded by -i"); + if (cfg_l3_inner == AF_INET) + parse_addr4(&in_daddr4, optarg); + else + parse_addr6(&in_daddr6, optarg); + break; + case 'e': + if (!strcmp(optarg, "gre")) + cfg_encap_proto = IPPROTO_GRE; + else if (!strcmp(optarg, "gue")) + cfg_encap_proto = IPPROTO_UDP; + else if (!strcmp(optarg, "bare")) + cfg_encap_proto = IPPROTO_IPIP; + else if (!strcmp(optarg, "none")) + cfg_encap_proto = IPPROTO_IP; /* == 0 */ + else + usage(argv[0]); + break; + case 'f': + cfg_src_port = strtol(optarg, NULL, 0); + break; + case 'F': + cfg_expect_failure = true; + break; + case 'h': + usage(argv[0]); + break; + case 'i': + if (!strcmp(optarg, "4")) + cfg_l3_inner = PF_INET; + else if (!strcmp(optarg, "6")) + cfg_l3_inner = PF_INET6; + else + usage(argv[0]); + break; + case 'l': + cfg_payload_len = strtol(optarg, NULL, 0); + break; + case 'n': + cfg_num_pkt = strtol(optarg, NULL, 0); + break; + case 'o': + cfg_l3_outer = parse_protocol_family(argv[0], optarg); + break; + case 'O': + cfg_l3_extra = parse_protocol_family(argv[0], optarg); + break; + case 'R': + cfg_only_rx = true; + break; + case 's': + if (cfg_l3_outer == AF_INET) + parse_addr4(&out_saddr4, optarg); + else + parse_addr6(&out_saddr6, optarg); + break; + case 'S': + if (cfg_l3_inner == AF_INET) + parse_addr4(&in_saddr4, optarg); + else + parse_addr6(&in_saddr6, optarg); + break; + case 't': + cfg_num_secs = strtol(optarg, NULL, 0); + break; + case 'T': + cfg_only_tx = true; + break; + case 'x': + cfg_dsfield_outer = strtol(optarg, NULL, 0); + break; + case 'X': + cfg_dsfield_inner = strtol(optarg, NULL, 0); + break; + } + } + + if (cfg_only_rx && cfg_only_tx) + error(1, 0, "options: cannot combine rx-only and tx-only"); + + if (cfg_encap_proto && cfg_l3_outer == AF_UNSPEC) + error(1, 0, "options: must specify outer with encap"); + else if ((!cfg_encap_proto) && cfg_l3_outer != AF_UNSPEC) + error(1, 0, "options: cannot combine no-encap and outer"); + else if ((!cfg_encap_proto) && cfg_l3_extra != AF_UNSPEC) + error(1, 0, "options: cannot combine no-encap and extra"); + + if (cfg_l3_inner == AF_UNSPEC) + cfg_l3_inner = AF_INET6; + if (cfg_l3_inner == AF_INET6 && cfg_encap_proto == IPPROTO_IPIP) + cfg_encap_proto = IPPROTO_IPV6; + + /* RFC 6040 4.2: + * on decap, if outer encountered congestion (CE == 0x3), + * but inner cannot encode ECN (NoECT == 0x0), then drop packet. + */ + if (((cfg_dsfield_outer & 0x3) == 0x3) && + ((cfg_dsfield_inner & 0x3) == 0x0)) + cfg_expect_failure = true; +} + +static void print_opts(void) +{ + if (cfg_l3_inner == PF_INET6) { + util_printaddr("inner.dest6", (void *) &in_daddr6); + util_printaddr("inner.source6", (void *) &in_saddr6); + } else { + util_printaddr("inner.dest4", (void *) &in_daddr4); + util_printaddr("inner.source4", (void *) &in_saddr4); + } + + if (!cfg_l3_outer) + return; + + fprintf(stderr, "encap proto: %u\n", cfg_encap_proto); + + if (cfg_l3_outer == PF_INET6) { + util_printaddr("outer.dest6", (void *) &out_daddr6); + util_printaddr("outer.source6", (void *) &out_saddr6); + } else { + util_printaddr("outer.dest4", (void *) &out_daddr4); + util_printaddr("outer.source4", (void *) &out_saddr4); + } + + if (!cfg_l3_extra) + return; + + if (cfg_l3_outer == PF_INET6) { + util_printaddr("extra.dest6", (void *) &extra_daddr6); + util_printaddr("extra.source6", (void *) &extra_saddr6); + } else { + util_printaddr("extra.dest4", (void *) &extra_daddr4); + util_printaddr("extra.source4", (void *) &extra_saddr4); + } + +} + +int main(int argc, char **argv) +{ + parse_opts(argc, argv); + print_opts(); + return do_main(); +} diff --git a/tools/testing/selftests/bpf/test_flow_dissector.sh b/tools/testing/selftests/bpf/test_flow_dissector.sh new file mode 100755 index 000000000000..d23d4da66b83 --- /dev/null +++ b/tools/testing/selftests/bpf/test_flow_dissector.sh @@ -0,0 +1,115 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Load BPF flow dissector and verify it correctly dissects traffic +export TESTNAME=test_flow_dissector +unmount=0 + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +msg="skip all tests:" +if [ $UID != 0 ]; then + echo $msg please run this as root >&2 + exit $ksft_skip +fi + +# This test needs to be run in a network namespace with in_netns.sh. Check if +# this is the case and run it with in_netns.sh if it is being run in the root +# namespace. +if [[ -z $(ip netns identify $$) ]]; then + ../net/in_netns.sh "$0" "$@" + exit $? +fi + +# Determine selftest success via shell exit code +exit_handler() +{ + if (( $? == 0 )); then + echo "selftests: $TESTNAME [PASS]"; + else + echo "selftests: $TESTNAME [FAILED]"; + fi + + set +e + + # Cleanup + tc filter del dev lo ingress pref 1337 2> /dev/null + tc qdisc del dev lo ingress 2> /dev/null + ./flow_dissector_load -d 2> /dev/null + if [ $unmount -ne 0 ]; then + umount bpffs 2> /dev/null + fi +} + +# Exit script immediately (well catched by trap handler) if any +# program/thing exits with a non-zero status. +set -e + +# (Use 'trap -l' to list meaning of numbers) +trap exit_handler 0 2 3 6 9 + +# Mount BPF file system +if /bin/mount | grep /sys/fs/bpf > /dev/null; then + echo "bpffs already mounted" +else + echo "bpffs not mounted. Mounting..." + unmount=1 + /bin/mount bpffs /sys/fs/bpf -t bpf +fi + +# Attach BPF program +./flow_dissector_load -p bpf_flow.o -s flow_dissector + +# Setup +tc qdisc add dev lo ingress + +echo "Testing IPv4..." +# Drops all IP/UDP packets coming from port 9 +tc filter add dev lo parent ffff: protocol ip pref 1337 flower ip_proto \ + udp src_port 9 action drop + +# Send 10 IPv4/UDP packets from port 8. Filter should not drop any. +./test_flow_dissector -i 4 -f 8 +# Send 10 IPv4/UDP packets from port 9. Filter should drop all. +./test_flow_dissector -i 4 -f 9 -F +# Send 10 IPv4/UDP packets from port 10. Filter should not drop any. +./test_flow_dissector -i 4 -f 10 + +echo "Testing IPIP..." +# Send 10 IPv4/IPv4/UDP packets from port 8. Filter should not drop any. +./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e bare -i 4 \ + -D 192.168.0.1 -S 1.1.1.1 -f 8 +# Send 10 IPv4/IPv4/UDP packets from port 9. Filter should drop all. +./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e bare -i 4 \ + -D 192.168.0.1 -S 1.1.1.1 -f 9 -F +# Send 10 IPv4/IPv4/UDP packets from port 10. Filter should not drop any. +./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e bare -i 4 \ + -D 192.168.0.1 -S 1.1.1.1 -f 10 + +echo "Testing IPv4 + GRE..." +# Send 10 IPv4/GRE/IPv4/UDP packets from port 8. Filter should not drop any. +./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e gre -i 4 \ + -D 192.168.0.1 -S 1.1.1.1 -f 8 +# Send 10 IPv4/GRE/IPv4/UDP packets from port 9. Filter should drop all. +./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e gre -i 4 \ + -D 192.168.0.1 -S 1.1.1.1 -f 9 -F +# Send 10 IPv4/GRE/IPv4/UDP packets from port 10. Filter should not drop any. +./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e gre -i 4 \ + -D 192.168.0.1 -S 1.1.1.1 -f 10 + +tc filter del dev lo ingress pref 1337 + +echo "Testing IPv6..." +# Drops all IPv6/UDP packets coming from port 9 +tc filter add dev lo parent ffff: protocol ipv6 pref 1337 flower ip_proto \ + udp src_port 9 action drop + +# Send 10 IPv6/UDP packets from port 8. Filter should not drop any. +./test_flow_dissector -i 6 -f 8 +# Send 10 IPv6/UDP packets from port 9. Filter should drop all. +./test_flow_dissector -i 6 -f 9 -F +# Send 10 IPv6/UDP packets from port 10. Filter should not drop any. +./test_flow_dissector -i 6 -f 10 + +exit 0 diff --git a/tools/testing/selftests/bpf/test_libbpf.sh b/tools/testing/selftests/bpf/test_libbpf.sh index d97dc914cd49..2989b2e2d856 100755 --- a/tools/testing/selftests/bpf/test_libbpf.sh +++ b/tools/testing/selftests/bpf/test_libbpf.sh @@ -6,7 +6,7 @@ export TESTNAME=test_libbpf # Determine selftest success via shell exit code exit_handler() { - if (( $? == 0 )); then + if [ $? -eq 0 ]; then echo "selftests: $TESTNAME [PASS]"; else echo "$TESTNAME: failed at file $LAST_LOADED" 1>&2 @@ -33,17 +33,11 @@ trap exit_handler 0 2 3 6 9 libbpf_open_file test_l4lb.o -# TODO: fix libbpf to load noinline functions -# [warning] libbpf: incorrect bpf_call opcode -#libbpf_open_file test_l4lb_noinline.o +# Load a program with BPF-to-BPF calls +libbpf_open_file test_l4lb_noinline.o -# TODO: fix test_xdp_meta.c to load with libbpf -# [warning] libbpf: test_xdp_meta.o doesn't provide kernel version -#libbpf_open_file test_xdp_meta.o - -# TODO: fix libbpf to handle .eh_frame -# [warning] libbpf: relocation failed: no section(10) -#libbpf_open_file ../../../../samples/bpf/tracex3_kern.o +# Load a program compiled without the "-target bpf" flag +libbpf_open_file test_xdp.o # Success exit 0 diff --git a/tools/testing/selftests/bpf/test_lirc_mode2.sh b/tools/testing/selftests/bpf/test_lirc_mode2.sh index 677686198df3..ec4e15948e40 100755 --- a/tools/testing/selftests/bpf/test_lirc_mode2.sh +++ b/tools/testing/selftests/bpf/test_lirc_mode2.sh @@ -21,13 +21,14 @@ do if grep -q DRV_NAME=rc-loopback $i/uevent then LIRCDEV=$(grep DEVNAME= $i/lirc*/uevent | sed sQDEVNAME=Q/dev/Q) + INPUTDEV=$(grep DEVNAME= $i/input*/event*/uevent | sed sQDEVNAME=Q/dev/Q) fi done if [ -n $LIRCDEV ]; then TYPE=lirc_mode2 - ./test_lirc_mode2_user $LIRCDEV + ./test_lirc_mode2_user $LIRCDEV $INPUTDEV ret=$? if [ $ret -ne 0 ]; then echo -e ${RED}"FAIL: $TYPE"${NC} diff --git a/tools/testing/selftests/bpf/test_lirc_mode2_kern.c b/tools/testing/selftests/bpf/test_lirc_mode2_kern.c index ba26855563a5..4147130cc3b7 100644 --- a/tools/testing/selftests/bpf/test_lirc_mode2_kern.c +++ b/tools/testing/selftests/bpf/test_lirc_mode2_kern.c @@ -15,6 +15,9 @@ int bpf_decoder(unsigned int *sample) if (duration & 0x10000) bpf_rc_keydown(sample, 0x40, duration & 0xffff, 0); + if (duration & 0x20000) + bpf_rc_pointer_rel(sample, (duration >> 8) & 0xff, + duration & 0xff); } return 0; diff --git a/tools/testing/selftests/bpf/test_lirc_mode2_user.c b/tools/testing/selftests/bpf/test_lirc_mode2_user.c index d470d63c33db..fb5fd6841ef3 100644 --- a/tools/testing/selftests/bpf/test_lirc_mode2_user.c +++ b/tools/testing/selftests/bpf/test_lirc_mode2_user.c @@ -29,6 +29,7 @@ #include <linux/bpf.h> #include <linux/lirc.h> +#include <linux/input.h> #include <errno.h> #include <stdio.h> #include <stdlib.h> @@ -47,12 +48,13 @@ int main(int argc, char **argv) { struct bpf_object *obj; - int ret, lircfd, progfd, mode; - int testir = 0x1dead; + int ret, lircfd, progfd, inputfd; + int testir1 = 0x1dead; + int testir2 = 0x20101; u32 prog_ids[10], prog_flags[10], prog_cnt; - if (argc != 2) { - printf("Usage: %s /dev/lircN\n", argv[0]); + if (argc != 3) { + printf("Usage: %s /dev/lircN /dev/input/eventM\n", argv[0]); return 2; } @@ -76,9 +78,9 @@ int main(int argc, char **argv) return 1; } - mode = LIRC_MODE_SCANCODE; - if (ioctl(lircfd, LIRC_SET_REC_MODE, &mode)) { - printf("failed to set rec mode: %m\n"); + inputfd = open(argv[2], O_RDONLY | O_NONBLOCK); + if (inputfd == -1) { + printf("failed to open input device %s: %m\n", argv[1]); return 1; } @@ -102,29 +104,54 @@ int main(int argc, char **argv) } /* Write raw IR */ - ret = write(lircfd, &testir, sizeof(testir)); - if (ret != sizeof(testir)) { + ret = write(lircfd, &testir1, sizeof(testir1)); + if (ret != sizeof(testir1)) { printf("Failed to send test IR message: %m\n"); return 1; } - struct pollfd pfd = { .fd = lircfd, .events = POLLIN }; - struct lirc_scancode lsc; + struct pollfd pfd = { .fd = inputfd, .events = POLLIN }; + struct input_event event; - poll(&pfd, 1, 100); + for (;;) { + poll(&pfd, 1, 100); - /* Read decoded IR */ - ret = read(lircfd, &lsc, sizeof(lsc)); - if (ret != sizeof(lsc)) { - printf("Failed to read decoded IR: %m\n"); - return 1; + /* Read decoded IR */ + ret = read(inputfd, &event, sizeof(event)); + if (ret != sizeof(event)) { + printf("Failed to read decoded IR: %m\n"); + return 1; + } + + if (event.type == EV_MSC && event.code == MSC_SCAN && + event.value == 0xdead) { + break; + } } - if (lsc.scancode != 0xdead || lsc.rc_proto != 64) { - printf("Incorrect scancode decoded\n"); + /* Write raw IR */ + ret = write(lircfd, &testir2, sizeof(testir2)); + if (ret != sizeof(testir2)) { + printf("Failed to send test IR message: %m\n"); return 1; } + for (;;) { + poll(&pfd, 1, 100); + + /* Read decoded IR */ + ret = read(inputfd, &event, sizeof(event)); + if (ret != sizeof(event)) { + printf("Failed to read decoded IR: %m\n"); + return 1; + } + + if (event.type == EV_REL && event.code == REL_Y && + event.value == 1 ) { + break; + } + } + prog_cnt = 10; ret = bpf_prog_query(lircfd, BPF_LIRC_MODE2, 0, prog_flags, prog_ids, &prog_cnt); diff --git a/tools/testing/selftests/bpf/test_lpm_map.c b/tools/testing/selftests/bpf/test_lpm_map.c index 147e34cfceb7..02d7c871862a 100644 --- a/tools/testing/selftests/bpf/test_lpm_map.c +++ b/tools/testing/selftests/bpf/test_lpm_map.c @@ -474,6 +474,16 @@ static void test_lpm_delete(void) assert(bpf_map_lookup_elem(map_fd, key, &value) == -1 && errno == ENOENT); + key->prefixlen = 30; // unused prefix so far + inet_pton(AF_INET, "192.255.0.0", key->data); + assert(bpf_map_delete_elem(map_fd, key) == -1 && + errno == ENOENT); + + key->prefixlen = 16; // same prefix as the root node + inet_pton(AF_INET, "192.255.0.0", key->data); + assert(bpf_map_delete_elem(map_fd, key) == -1 && + errno == ENOENT); + /* assert initial lookup */ key->prefixlen = 32; inet_pton(AF_INET, "192.168.0.1", key->data); diff --git a/tools/testing/selftests/bpf/test_map_in_map.c b/tools/testing/selftests/bpf/test_map_in_map.c new file mode 100644 index 000000000000..ce923e67e08e --- /dev/null +++ b/tools/testing/selftests/bpf/test_map_in_map.c @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2018 Facebook */ +#include <stddef.h> +#include <linux/bpf.h> +#include <linux/types.h> +#include "bpf_helpers.h" + +struct bpf_map_def SEC("maps") mim_array = { + .type = BPF_MAP_TYPE_ARRAY_OF_MAPS, + .key_size = sizeof(int), + /* must be sizeof(__u32) for map in map */ + .value_size = sizeof(__u32), + .max_entries = 1, + .map_flags = 0, +}; + +struct bpf_map_def SEC("maps") mim_hash = { + .type = BPF_MAP_TYPE_HASH_OF_MAPS, + .key_size = sizeof(int), + /* must be sizeof(__u32) for map in map */ + .value_size = sizeof(__u32), + .max_entries = 1, + .map_flags = 0, +}; + +SEC("xdp_mimtest") +int xdp_mimtest0(struct xdp_md *ctx) +{ + int value = 123; + int key = 0; + void *map; + + map = bpf_map_lookup_elem(&mim_array, &key); + if (!map) + return XDP_DROP; + + bpf_map_update_elem(map, &key, &value, 0); + + map = bpf_map_lookup_elem(&mim_hash, &key); + if (!map) + return XDP_DROP; + + bpf_map_update_elem(map, &key, &value, 0); + + return XDP_PASS; +} + +int _version SEC("version") = 1; +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index 9b552c0fc47d..e2b9eee37187 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -15,6 +15,7 @@ #include <string.h> #include <assert.h> #include <stdlib.h> +#include <time.h> #include <sys/wait.h> #include <sys/socket.h> @@ -257,24 +258,36 @@ static void test_hashmap_percpu(int task, void *data) close(fd); } -static void test_hashmap_walk(int task, void *data) +static int helper_fill_hashmap(int max_entries) { - int fd, i, max_entries = 1000; - long long key, value, next_key; - bool next_key_valid = true; + int i, fd, ret; + long long key, value; fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value), max_entries, map_flags); - if (fd < 0) { - printf("Failed to create hashmap '%s'!\n", strerror(errno)); - exit(1); - } + CHECK(fd < 0, + "failed to create hashmap", + "err: %s, flags: 0x%x\n", strerror(errno), map_flags); for (i = 0; i < max_entries; i++) { key = i; value = key; - assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == 0); + ret = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST); + CHECK(ret != 0, + "can't update hashmap", + "err: %s\n", strerror(ret)); } + return fd; +} + +static void test_hashmap_walk(int task, void *data) +{ + int fd, i, max_entries = 1000; + long long key, value, next_key; + bool next_key_valid = true; + + fd = helper_fill_hashmap(max_entries); + for (i = 0; bpf_map_get_next_key(fd, !i ? NULL : &key, &next_key) == 0; i++) { key = next_key; @@ -305,6 +318,39 @@ static void test_hashmap_walk(int task, void *data) close(fd); } +static void test_hashmap_zero_seed(void) +{ + int i, first, second, old_flags; + long long key, next_first, next_second; + + old_flags = map_flags; + map_flags |= BPF_F_ZERO_SEED; + + first = helper_fill_hashmap(3); + second = helper_fill_hashmap(3); + + for (i = 0; ; i++) { + void *key_ptr = !i ? NULL : &key; + + if (bpf_map_get_next_key(first, key_ptr, &next_first) != 0) + break; + + CHECK(bpf_map_get_next_key(second, key_ptr, &next_second) != 0, + "next_key for second map must succeed", + "key_ptr: %p", key_ptr); + CHECK(next_first != next_second, + "keys must match", + "i: %d first: %lld second: %lld\n", i, + next_first, next_second); + + key = next_first; + } + + map_flags = old_flags; + close(first); + close(second); +} + static void test_arraymap(int task, void *data) { int key, next_key, fd; @@ -464,13 +510,129 @@ static void test_devmap(int task, void *data) fd = bpf_create_map(BPF_MAP_TYPE_DEVMAP, sizeof(key), sizeof(value), 2, 0); if (fd < 0) { - printf("Failed to create arraymap '%s'!\n", strerror(errno)); + printf("Failed to create devmap '%s'!\n", strerror(errno)); exit(1); } close(fd); } +static void test_queuemap(int task, void *data) +{ + const int MAP_SIZE = 32; + __u32 vals[MAP_SIZE + MAP_SIZE/2], val; + int fd, i; + + /* Fill test values to be used */ + for (i = 0; i < MAP_SIZE + MAP_SIZE/2; i++) + vals[i] = rand(); + + /* Invalid key size */ + fd = bpf_create_map(BPF_MAP_TYPE_QUEUE, 4, sizeof(val), MAP_SIZE, + map_flags); + assert(fd < 0 && errno == EINVAL); + + fd = bpf_create_map(BPF_MAP_TYPE_QUEUE, 0, sizeof(val), MAP_SIZE, + map_flags); + /* Queue map does not support BPF_F_NO_PREALLOC */ + if (map_flags & BPF_F_NO_PREALLOC) { + assert(fd < 0 && errno == EINVAL); + return; + } + if (fd < 0) { + printf("Failed to create queuemap '%s'!\n", strerror(errno)); + exit(1); + } + + /* Push MAP_SIZE elements */ + for (i = 0; i < MAP_SIZE; i++) + assert(bpf_map_update_elem(fd, NULL, &vals[i], 0) == 0); + + /* Check that element cannot be pushed due to max_entries limit */ + assert(bpf_map_update_elem(fd, NULL, &val, 0) == -1 && + errno == E2BIG); + + /* Peek element */ + assert(bpf_map_lookup_elem(fd, NULL, &val) == 0 && val == vals[0]); + + /* Replace half elements */ + for (i = MAP_SIZE; i < MAP_SIZE + MAP_SIZE/2; i++) + assert(bpf_map_update_elem(fd, NULL, &vals[i], BPF_EXIST) == 0); + + /* Pop all elements */ + for (i = MAP_SIZE/2; i < MAP_SIZE + MAP_SIZE/2; i++) + assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) == 0 && + val == vals[i]); + + /* Check that there are not elements left */ + assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) == -1 && + errno == ENOENT); + + /* Check that non supported functions set errno to EINVAL */ + assert(bpf_map_delete_elem(fd, NULL) == -1 && errno == EINVAL); + assert(bpf_map_get_next_key(fd, NULL, NULL) == -1 && errno == EINVAL); + + close(fd); +} + +static void test_stackmap(int task, void *data) +{ + const int MAP_SIZE = 32; + __u32 vals[MAP_SIZE + MAP_SIZE/2], val; + int fd, i; + + /* Fill test values to be used */ + for (i = 0; i < MAP_SIZE + MAP_SIZE/2; i++) + vals[i] = rand(); + + /* Invalid key size */ + fd = bpf_create_map(BPF_MAP_TYPE_STACK, 4, sizeof(val), MAP_SIZE, + map_flags); + assert(fd < 0 && errno == EINVAL); + + fd = bpf_create_map(BPF_MAP_TYPE_STACK, 0, sizeof(val), MAP_SIZE, + map_flags); + /* Stack map does not support BPF_F_NO_PREALLOC */ + if (map_flags & BPF_F_NO_PREALLOC) { + assert(fd < 0 && errno == EINVAL); + return; + } + if (fd < 0) { + printf("Failed to create stackmap '%s'!\n", strerror(errno)); + exit(1); + } + + /* Push MAP_SIZE elements */ + for (i = 0; i < MAP_SIZE; i++) + assert(bpf_map_update_elem(fd, NULL, &vals[i], 0) == 0); + + /* Check that element cannot be pushed due to max_entries limit */ + assert(bpf_map_update_elem(fd, NULL, &val, 0) == -1 && + errno == E2BIG); + + /* Peek element */ + assert(bpf_map_lookup_elem(fd, NULL, &val) == 0 && val == vals[i - 1]); + + /* Replace half elements */ + for (i = MAP_SIZE; i < MAP_SIZE + MAP_SIZE/2; i++) + assert(bpf_map_update_elem(fd, NULL, &vals[i], BPF_EXIST) == 0); + + /* Pop all elements */ + for (i = MAP_SIZE + MAP_SIZE/2 - 1; i >= MAP_SIZE/2; i--) + assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) == 0 && + val == vals[i]); + + /* Check that there are not elements left */ + assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) == -1 && + errno == ENOENT); + + /* Check that non supported functions set errno to EINVAL */ + assert(bpf_map_delete_elem(fd, NULL) == -1 && errno == EINVAL); + assert(bpf_map_get_next_key(fd, NULL, NULL) == -1 && errno == EINVAL); + + close(fd); +} + #include <sys/socket.h> #include <sys/ioctl.h> #include <arpa/inet.h> @@ -963,6 +1125,94 @@ out_sockmap: exit(1); } +#define MAPINMAP_PROG "./test_map_in_map.o" +static void test_map_in_map(void) +{ + struct bpf_program *prog; + struct bpf_object *obj; + struct bpf_map *map; + int mim_fd, fd, err; + int pos = 0; + + obj = bpf_object__open(MAPINMAP_PROG); + + fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(int), sizeof(int), + 2, 0); + if (fd < 0) { + printf("Failed to create hashmap '%s'!\n", strerror(errno)); + exit(1); + } + + map = bpf_object__find_map_by_name(obj, "mim_array"); + if (IS_ERR(map)) { + printf("Failed to load array of maps from test prog\n"); + goto out_map_in_map; + } + err = bpf_map__set_inner_map_fd(map, fd); + if (err) { + printf("Failed to set inner_map_fd for array of maps\n"); + goto out_map_in_map; + } + + map = bpf_object__find_map_by_name(obj, "mim_hash"); + if (IS_ERR(map)) { + printf("Failed to load hash of maps from test prog\n"); + goto out_map_in_map; + } + err = bpf_map__set_inner_map_fd(map, fd); + if (err) { + printf("Failed to set inner_map_fd for hash of maps\n"); + goto out_map_in_map; + } + + bpf_object__for_each_program(prog, obj) { + bpf_program__set_xdp(prog); + } + bpf_object__load(obj); + + map = bpf_object__find_map_by_name(obj, "mim_array"); + if (IS_ERR(map)) { + printf("Failed to load array of maps from test prog\n"); + goto out_map_in_map; + } + mim_fd = bpf_map__fd(map); + if (mim_fd < 0) { + printf("Failed to get descriptor for array of maps\n"); + goto out_map_in_map; + } + + err = bpf_map_update_elem(mim_fd, &pos, &fd, 0); + if (err) { + printf("Failed to update array of maps\n"); + goto out_map_in_map; + } + + map = bpf_object__find_map_by_name(obj, "mim_hash"); + if (IS_ERR(map)) { + printf("Failed to load hash of maps from test prog\n"); + goto out_map_in_map; + } + mim_fd = bpf_map__fd(map); + if (mim_fd < 0) { + printf("Failed to get descriptor for hash of maps\n"); + goto out_map_in_map; + } + + err = bpf_map_update_elem(mim_fd, &pos, &fd, 0); + if (err) { + printf("Failed to update hash of maps\n"); + goto out_map_in_map; + } + + close(fd); + bpf_object__close(obj); + return; + +out_map_in_map: + close(fd); + exit(1); +} + #define MAP_SIZE (32 * 1024) static void test_map_large(void) @@ -1417,6 +1667,7 @@ static void run_all_tests(void) test_hashmap(0, NULL); test_hashmap_percpu(0, NULL); test_hashmap_walk(0, NULL); + test_hashmap_zero_seed(); test_arraymap(0, NULL); test_arraymap_percpu(0, NULL); @@ -1434,10 +1685,17 @@ static void run_all_tests(void) test_map_wronly(); test_reuseport_array(); + + test_queuemap(0, NULL); + test_stackmap(0, NULL); + + test_map_in_map(); } int main(void) { + srand(time(NULL)); + map_flags = 0; run_all_tests(); diff --git a/tools/testing/selftests/bpf/test_netcnt.c b/tools/testing/selftests/bpf/test_netcnt.c new file mode 100644 index 000000000000..c1da5404454a --- /dev/null +++ b/tools/testing/selftests/bpf/test_netcnt.c @@ -0,0 +1,161 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <assert.h> +#include <sys/sysinfo.h> +#include <sys/time.h> + +#include <linux/bpf.h> +#include <bpf/bpf.h> +#include <bpf/libbpf.h> + +#include "cgroup_helpers.h" +#include "bpf_rlimit.h" +#include "netcnt_common.h" + +#define BPF_PROG "./netcnt_prog.o" +#define TEST_CGROUP "/test-network-counters/" + +static int bpf_find_map(const char *test, struct bpf_object *obj, + const char *name) +{ + struct bpf_map *map; + + map = bpf_object__find_map_by_name(obj, name); + if (!map) { + printf("%s:FAIL:map '%s' not found\n", test, name); + return -1; + } + return bpf_map__fd(map); +} + +int main(int argc, char **argv) +{ + struct percpu_net_cnt *percpu_netcnt; + struct bpf_cgroup_storage_key key; + int map_fd, percpu_map_fd; + int error = EXIT_FAILURE; + struct net_cnt netcnt; + struct bpf_object *obj; + int prog_fd, cgroup_fd; + unsigned long packets; + unsigned long bytes; + int cpu, nproc; + __u32 prog_cnt; + + nproc = get_nprocs_conf(); + percpu_netcnt = malloc(sizeof(*percpu_netcnt) * nproc); + if (!percpu_netcnt) { + printf("Not enough memory for per-cpu area (%d cpus)\n", nproc); + goto err; + } + + if (bpf_prog_load(BPF_PROG, BPF_PROG_TYPE_CGROUP_SKB, + &obj, &prog_fd)) { + printf("Failed to load bpf program\n"); + goto out; + } + + if (setup_cgroup_environment()) { + printf("Failed to load bpf program\n"); + goto err; + } + + /* Create a cgroup, get fd, and join it */ + cgroup_fd = create_and_get_cgroup(TEST_CGROUP); + if (cgroup_fd < 0) { + printf("Failed to create test cgroup\n"); + goto err; + } + + if (join_cgroup(TEST_CGROUP)) { + printf("Failed to join cgroup\n"); + goto err; + } + + /* Attach bpf program */ + if (bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0)) { + printf("Failed to attach bpf program"); + goto err; + } + + if (system("which ping6 &>/dev/null") == 0) + assert(!system("ping6 localhost -c 10000 -f -q > /dev/null")); + else + assert(!system("ping -6 localhost -c 10000 -f -q > /dev/null")); + + if (bpf_prog_query(cgroup_fd, BPF_CGROUP_INET_EGRESS, 0, NULL, NULL, + &prog_cnt)) { + printf("Failed to query attached programs"); + goto err; + } + + map_fd = bpf_find_map(__func__, obj, "netcnt"); + if (map_fd < 0) { + printf("Failed to find bpf map with net counters"); + goto err; + } + + percpu_map_fd = bpf_find_map(__func__, obj, "percpu_netcnt"); + if (percpu_map_fd < 0) { + printf("Failed to find bpf map with percpu net counters"); + goto err; + } + + if (bpf_map_get_next_key(map_fd, NULL, &key)) { + printf("Failed to get key in cgroup storage\n"); + goto err; + } + + if (bpf_map_lookup_elem(map_fd, &key, &netcnt)) { + printf("Failed to lookup cgroup storage\n"); + goto err; + } + + if (bpf_map_lookup_elem(percpu_map_fd, &key, &percpu_netcnt[0])) { + printf("Failed to lookup percpu cgroup storage\n"); + goto err; + } + + /* Some packets can be still in per-cpu cache, but not more than + * MAX_PERCPU_PACKETS. + */ + packets = netcnt.packets; + bytes = netcnt.bytes; + for (cpu = 0; cpu < nproc; cpu++) { + if (percpu_netcnt[cpu].packets > MAX_PERCPU_PACKETS) { + printf("Unexpected percpu value: %llu\n", + percpu_netcnt[cpu].packets); + goto err; + } + + packets += percpu_netcnt[cpu].packets; + bytes += percpu_netcnt[cpu].bytes; + } + + /* No packets should be lost */ + if (packets != 10000) { + printf("Unexpected packet count: %lu\n", packets); + goto err; + } + + /* Let's check that bytes counter matches the number of packets + * multiplied by the size of ipv6 ICMP packet. + */ + if (bytes != packets * 104) { + printf("Unexpected bytes count: %lu\n", bytes); + goto err; + } + + error = 0; + printf("test_netcnt:PASS\n"); + +err: + cleanup_cgroup_environment(); + free(percpu_netcnt); + +out: + return error; +} diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 0ef68204c84b..25f0083a9b2e 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -51,10 +51,10 @@ static struct { struct iphdr iph; struct tcphdr tcp; } __packed pkt_v4 = { - .eth.h_proto = bpf_htons(ETH_P_IP), + .eth.h_proto = __bpf_constant_htons(ETH_P_IP), .iph.ihl = 5, .iph.protocol = 6, - .iph.tot_len = bpf_htons(MAGIC_BYTES), + .iph.tot_len = __bpf_constant_htons(MAGIC_BYTES), .tcp.urg_ptr = 123, }; @@ -64,13 +64,13 @@ static struct { struct ipv6hdr iph; struct tcphdr tcp; } __packed pkt_v6 = { - .eth.h_proto = bpf_htons(ETH_P_IPV6), + .eth.h_proto = __bpf_constant_htons(ETH_P_IPV6), .iph.nexthdr = 6, - .iph.payload_len = bpf_htons(MAGIC_BYTES), + .iph.payload_len = __bpf_constant_htons(MAGIC_BYTES), .tcp.urg_ptr = 123, }; -#define CHECK(condition, tag, format...) ({ \ +#define _CHECK(condition, tag, duration, format...) ({ \ int __ret = !!(condition); \ if (__ret) { \ error_cnt++; \ @@ -83,6 +83,11 @@ static struct { __ret; \ }) +#define CHECK(condition, tag, format...) \ + _CHECK(condition, tag, duration, format) +#define CHECK_ATTR(condition, tag, format...) \ + _CHECK(condition, tag, tattr.duration, format) + static int bpf_find_map(const char *test, struct bpf_object *obj, const char *name) { @@ -112,18 +117,65 @@ static void test_pkt_access(void) err = bpf_prog_test_run(prog_fd, 100000, &pkt_v4, sizeof(pkt_v4), NULL, NULL, &retval, &duration); - CHECK(err || errno || retval, "ipv4", + CHECK(err || retval, "ipv4", "err %d errno %d retval %d duration %d\n", err, errno, retval, duration); err = bpf_prog_test_run(prog_fd, 100000, &pkt_v6, sizeof(pkt_v6), NULL, NULL, &retval, &duration); - CHECK(err || errno || retval, "ipv6", + CHECK(err || retval, "ipv6", "err %d errno %d retval %d duration %d\n", err, errno, retval, duration); bpf_object__close(obj); } +static void test_prog_run_xattr(void) +{ + const char *file = "./test_pkt_access.o"; + struct bpf_object *obj; + char buf[10]; + int err; + struct bpf_prog_test_run_attr tattr = { + .repeat = 1, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .data_out = buf, + .data_size_out = 5, + }; + + err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, + &tattr.prog_fd); + if (CHECK_ATTR(err, "load", "err %d errno %d\n", err, errno)) + return; + + memset(buf, 0, sizeof(buf)); + + err = bpf_prog_test_run_xattr(&tattr); + CHECK_ATTR(err != -1 || errno != ENOSPC || tattr.retval, "run", + "err %d errno %d retval %d\n", err, errno, tattr.retval); + + CHECK_ATTR(tattr.data_size_out != sizeof(pkt_v4), "data_size_out", + "incorrect output size, want %lu have %u\n", + sizeof(pkt_v4), tattr.data_size_out); + + CHECK_ATTR(buf[5] != 0, "overflow", + "BPF_PROG_TEST_RUN ignored size hint\n"); + + tattr.data_out = NULL; + tattr.data_size_out = 0; + errno = 0; + + err = bpf_prog_test_run_xattr(&tattr); + CHECK_ATTR(err || errno || tattr.retval, "run_no_output", + "err %d errno %d retval %d\n", err, errno, tattr.retval); + + tattr.data_size_out = 1; + err = bpf_prog_test_run_xattr(&tattr); + CHECK_ATTR(err != -EINVAL, "run_wrong_size_out", "err %d\n", err); + + bpf_object__close(obj); +} + static void test_xdp(void) { struct vip key4 = {.protocol = 6, .family = AF_INET}; @@ -153,14 +205,14 @@ static void test_xdp(void) err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), buf, &size, &retval, &duration); - CHECK(err || errno || retval != XDP_TX || size != 74 || + CHECK(err || retval != XDP_TX || size != 74 || iph->protocol != IPPROTO_IPIP, "ipv4", "err %d errno %d retval %d size %d\n", err, errno, retval, size); err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6), buf, &size, &retval, &duration); - CHECK(err || errno || retval != XDP_TX || size != 114 || + CHECK(err || retval != XDP_TX || size != 114 || iph6->nexthdr != IPPROTO_IPV6, "ipv6", "err %d errno %d retval %d size %d\n", err, errno, retval, size); @@ -185,13 +237,13 @@ static void test_xdp_adjust_tail(void) err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), buf, &size, &retval, &duration); - CHECK(err || errno || retval != XDP_DROP, + CHECK(err || retval != XDP_DROP, "ipv4", "err %d errno %d retval %d size %d\n", err, errno, retval, size); err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6), buf, &size, &retval, &duration); - CHECK(err || errno || retval != XDP_TX || size != 54, + CHECK(err || retval != XDP_TX || size != 54, "ipv6", "err %d errno %d retval %d size %d\n", err, errno, retval, size); bpf_object__close(obj); @@ -254,14 +306,14 @@ static void test_l4lb(const char *file) err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v4, sizeof(pkt_v4), buf, &size, &retval, &duration); - CHECK(err || errno || retval != 7/*TC_ACT_REDIRECT*/ || size != 54 || + CHECK(err || retval != 7/*TC_ACT_REDIRECT*/ || size != 54 || *magic != MAGIC_VAL, "ipv4", "err %d errno %d retval %d size %d magic %x\n", err, errno, retval, size, *magic); err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v6, sizeof(pkt_v6), buf, &size, &retval, &duration); - CHECK(err || errno || retval != 7/*TC_ACT_REDIRECT*/ || size != 74 || + CHECK(err || retval != 7/*TC_ACT_REDIRECT*/ || size != 74 || *magic != MAGIC_VAL, "ipv6", "err %d errno %d retval %d size %d magic %x\n", err, errno, retval, size, *magic); @@ -343,14 +395,14 @@ static void test_xdp_noinline(void) err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v4, sizeof(pkt_v4), buf, &size, &retval, &duration); - CHECK(err || errno || retval != 1 || size != 54 || + CHECK(err || retval != 1 || size != 54 || *magic != MAGIC_VAL, "ipv4", "err %d errno %d retval %d size %d magic %x\n", err, errno, retval, size, *magic); err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v6, sizeof(pkt_v6), buf, &size, &retval, &duration); - CHECK(err || errno || retval != 1 || size != 74 || + CHECK(err || retval != 1 || size != 74 || *magic != MAGIC_VAL, "ipv6", "err %d errno %d retval %d size %d magic %x\n", err, errno, retval, size, *magic); @@ -524,7 +576,7 @@ static void test_bpf_obj_id(void) load_time < now - 60 || load_time > now + 60 || prog_infos[i].created_by_uid != my_uid || prog_infos[i].nr_map_ids != 1 || - *(int *)prog_infos[i].map_ids != map_infos[i].id || + *(int *)(long)prog_infos[i].map_ids != map_infos[i].id || strcmp((char *)prog_infos[i].name, expected_prog_name), "get-prog-info(fd)", "err %d errno %d i %d type %d(%d) info_len %u(%Zu) jit_enabled %d jited_prog_len %u xlated_prog_len %u jited_prog %d xlated_prog %d load_time %lu(%lu) uid %u(%u) nr_map_ids %u(%u) map_id %u(%u) name %s(%s)\n", @@ -539,7 +591,7 @@ static void test_bpf_obj_id(void) load_time, now, prog_infos[i].created_by_uid, my_uid, prog_infos[i].nr_map_ids, 1, - *(int *)prog_infos[i].map_ids, map_infos[i].id, + *(int *)(long)prog_infos[i].map_ids, map_infos[i].id, prog_infos[i].name, expected_prog_name)) goto done; } @@ -585,7 +637,7 @@ static void test_bpf_obj_id(void) bzero(&prog_info, sizeof(prog_info)); info_len = sizeof(prog_info); - saved_map_id = *(int *)(prog_infos[i].map_ids); + saved_map_id = *(int *)((long)prog_infos[i].map_ids); prog_info.map_ids = prog_infos[i].map_ids; prog_info.nr_map_ids = 2; err = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &info_len); @@ -593,12 +645,12 @@ static void test_bpf_obj_id(void) prog_infos[i].xlated_prog_insns = 0; CHECK(err || info_len != sizeof(struct bpf_prog_info) || memcmp(&prog_info, &prog_infos[i], info_len) || - *(int *)prog_info.map_ids != saved_map_id, + *(int *)(long)prog_info.map_ids != saved_map_id, "get-prog-info(next_id->fd)", "err %d errno %d info_len %u(%Zu) memcmp %d map_id %u(%u)\n", err, errno, info_len, sizeof(struct bpf_prog_info), memcmp(&prog_info, &prog_infos[i], info_len), - *(int *)prog_info.map_ids, saved_map_id); + *(int *)(long)prog_info.map_ids, saved_map_id); close(prog_fd); } CHECK(nr_id_found != nr_iters, @@ -1136,7 +1188,9 @@ static void test_stacktrace_build_id(void) int i, j; struct bpf_stack_build_id id_offs[PERF_MAX_STACK_DEPTH]; int build_id_matches = 0; + int retry = 1; +retry: err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd); if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno)) goto out; @@ -1249,6 +1303,19 @@ static void test_stacktrace_build_id(void) previous_key = key; } while (bpf_map_get_next_key(stackmap_fd, &previous_key, &key) == 0); + /* stack_map_get_build_id_offset() is racy and sometimes can return + * BPF_STACK_BUILD_ID_IP instead of BPF_STACK_BUILD_ID_VALID; + * try it one more time. + */ + if (build_id_matches < 1 && retry--) { + ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE); + close(pmu_fd); + bpf_object__close(obj); + printf("%s:WARN:Didn't find expected build ID from the map, retrying\n", + __func__); + goto retry; + } + if (CHECK(build_id_matches < 1, "build id match", "Didn't find expected build ID from the map\n")) goto disable_pmu; @@ -1289,7 +1356,9 @@ static void test_stacktrace_build_id_nmi(void) int i, j; struct bpf_stack_build_id id_offs[PERF_MAX_STACK_DEPTH]; int build_id_matches = 0; + int retry = 1; +retry: err = bpf_prog_load(file, BPF_PROG_TYPE_PERF_EVENT, &obj, &prog_fd); if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno)) return; @@ -1384,6 +1453,19 @@ static void test_stacktrace_build_id_nmi(void) previous_key = key; } while (bpf_map_get_next_key(stackmap_fd, &previous_key, &key) == 0); + /* stack_map_get_build_id_offset() is racy and sometimes can return + * BPF_STACK_BUILD_ID_IP instead of BPF_STACK_BUILD_ID_VALID; + * try it one more time. + */ + if (build_id_matches < 1 && retry--) { + ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE); + close(pmu_fd); + bpf_object__close(obj); + printf("%s:WARN:Didn't find expected build ID from the map, retrying\n", + __func__); + goto retry; + } + if (CHECK(build_id_matches < 1, "build id match", "Didn't find expected build ID from the map\n")) goto disable_pmu; @@ -1698,11 +1780,146 @@ static void test_task_fd_query_tp(void) "sys_enter_read"); } +static void test_reference_tracking() +{ + const char *file = "./test_sk_lookup_kern.o"; + struct bpf_object *obj; + struct bpf_program *prog; + __u32 duration = 0; + int err = 0; + + obj = bpf_object__open(file); + if (IS_ERR(obj)) { + error_cnt++; + return; + } + + bpf_object__for_each_program(prog, obj) { + const char *title; + + /* Ignore .text sections */ + title = bpf_program__title(prog, false); + if (strstr(title, ".text") != NULL) + continue; + + bpf_program__set_type(prog, BPF_PROG_TYPE_SCHED_CLS); + + /* Expect verifier failure if test name has 'fail' */ + if (strstr(title, "fail") != NULL) { + libbpf_set_print(NULL, NULL, NULL); + err = !bpf_program__load(prog, "GPL", 0); + libbpf_set_print(printf, printf, NULL); + } else { + err = bpf_program__load(prog, "GPL", 0); + } + CHECK(err, title, "\n"); + } + bpf_object__close(obj); +} + +enum { + QUEUE, + STACK, +}; + +static void test_queue_stack_map(int type) +{ + const int MAP_SIZE = 32; + __u32 vals[MAP_SIZE], duration, retval, size, val; + int i, err, prog_fd, map_in_fd, map_out_fd; + char file[32], buf[128]; + struct bpf_object *obj; + struct iphdr *iph = (void *)buf + sizeof(struct ethhdr); + + /* Fill test values to be used */ + for (i = 0; i < MAP_SIZE; i++) + vals[i] = rand(); + + if (type == QUEUE) + strncpy(file, "./test_queue_map.o", sizeof(file)); + else if (type == STACK) + strncpy(file, "./test_stack_map.o", sizeof(file)); + else + return; + + err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); + if (err) { + error_cnt++; + return; + } + + map_in_fd = bpf_find_map(__func__, obj, "map_in"); + if (map_in_fd < 0) + goto out; + + map_out_fd = bpf_find_map(__func__, obj, "map_out"); + if (map_out_fd < 0) + goto out; + + /* Push 32 elements to the input map */ + for (i = 0; i < MAP_SIZE; i++) { + err = bpf_map_update_elem(map_in_fd, NULL, &vals[i], 0); + if (err) { + error_cnt++; + goto out; + } + } + + /* The eBPF program pushes iph.saddr in the output map, + * pops the input map and saves this value in iph.daddr + */ + for (i = 0; i < MAP_SIZE; i++) { + if (type == QUEUE) { + val = vals[i]; + pkt_v4.iph.saddr = vals[i] * 5; + } else if (type == STACK) { + val = vals[MAP_SIZE - 1 - i]; + pkt_v4.iph.saddr = vals[MAP_SIZE - 1 - i] * 5; + } + + err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), + buf, &size, &retval, &duration); + if (err || retval || size != sizeof(pkt_v4) || + iph->daddr != val) + break; + } + + CHECK(err || retval || size != sizeof(pkt_v4) || iph->daddr != val, + "bpf_map_pop_elem", + "err %d errno %d retval %d size %d iph->daddr %u\n", + err, errno, retval, size, iph->daddr); + + /* Queue is empty, program should return TC_ACT_SHOT */ + err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), + buf, &size, &retval, &duration); + CHECK(err || retval != 2 /* TC_ACT_SHOT */|| size != sizeof(pkt_v4), + "check-queue-stack-map-empty", + "err %d errno %d retval %d size %d\n", + err, errno, retval, size); + + /* Check that the program pushed elements correctly */ + for (i = 0; i < MAP_SIZE; i++) { + err = bpf_map_lookup_and_delete_elem(map_out_fd, NULL, &val); + if (err || val != vals[i] * 5) + break; + } + + CHECK(i != MAP_SIZE && (err || val != vals[i] * 5), + "bpf_map_push_elem", "err %d value %u\n", err, val); + +out: + pkt_v4.iph.saddr = 0; + bpf_object__close(obj); +} + int main(void) { + srand(time(NULL)); + jit_enabled = is_jit_enabled(); test_pkt_access(); + test_prog_run_xattr(); test_xdp(); test_xdp_adjust_tail(); test_l4lb_all(); @@ -1719,6 +1936,9 @@ int main(void) test_get_stack_raw_tp(); test_task_fd_query_rawtp(); test_task_fd_query_tp(); + test_reference_tracking(); + test_queue_stack_map(QUEUE); + test_queue_stack_map(STACK); printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt); return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS; diff --git a/tools/testing/selftests/bpf/test_queue_map.c b/tools/testing/selftests/bpf/test_queue_map.c new file mode 100644 index 000000000000..87db1f9da33d --- /dev/null +++ b/tools/testing/selftests/bpf/test_queue_map.c @@ -0,0 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2018 Politecnico di Torino +#define MAP_TYPE BPF_MAP_TYPE_QUEUE +#include "test_queue_stack_map.h" diff --git a/tools/testing/selftests/bpf/test_queue_stack_map.h b/tools/testing/selftests/bpf/test_queue_stack_map.h new file mode 100644 index 000000000000..295b9b3bc5c7 --- /dev/null +++ b/tools/testing/selftests/bpf/test_queue_stack_map.h @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +// Copyright (c) 2018 Politecnico di Torino +#include <stddef.h> +#include <string.h> +#include <linux/bpf.h> +#include <linux/if_ether.h> +#include <linux/ip.h> +#include <linux/pkt_cls.h> +#include "bpf_helpers.h" + +int _version SEC("version") = 1; + +struct bpf_map_def __attribute__ ((section("maps"), used)) map_in = { + .type = MAP_TYPE, + .key_size = 0, + .value_size = sizeof(__u32), + .max_entries = 32, + .map_flags = 0, +}; + +struct bpf_map_def __attribute__ ((section("maps"), used)) map_out = { + .type = MAP_TYPE, + .key_size = 0, + .value_size = sizeof(__u32), + .max_entries = 32, + .map_flags = 0, +}; + +SEC("test") +int _test(struct __sk_buff *skb) +{ + void *data_end = (void *)(long)skb->data_end; + void *data = (void *)(long)skb->data; + struct ethhdr *eth = (struct ethhdr *)(data); + __u32 value; + int err; + + if (eth + 1 > data_end) + return TC_ACT_SHOT; + + struct iphdr *iph = (struct iphdr *)(eth + 1); + + if (iph + 1 > data_end) + return TC_ACT_SHOT; + + err = bpf_map_pop_elem(&map_in, &value); + if (err) + return TC_ACT_SHOT; + + iph->daddr = value; + + err = bpf_map_push_elem(&map_out, &iph->saddr, 0); + if (err) + return TC_ACT_SHOT; + + return TC_ACT_OK; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_section_names.c b/tools/testing/selftests/bpf/test_section_names.c new file mode 100644 index 000000000000..7c4f41572b1c --- /dev/null +++ b/tools/testing/selftests/bpf/test_section_names.c @@ -0,0 +1,208 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2018 Facebook + +#include <err.h> +#include <bpf/libbpf.h> + +#include "bpf_util.h" + +struct sec_name_test { + const char sec_name[32]; + struct { + int rc; + enum bpf_prog_type prog_type; + enum bpf_attach_type expected_attach_type; + } expected_load; + struct { + int rc; + enum bpf_attach_type attach_type; + } expected_attach; +}; + +static struct sec_name_test tests[] = { + {"InvAliD", {-EINVAL, 0, 0}, {-EINVAL, 0} }, + {"cgroup", {-EINVAL, 0, 0}, {-EINVAL, 0} }, + {"socket", {0, BPF_PROG_TYPE_SOCKET_FILTER, 0}, {-EINVAL, 0} }, + {"kprobe/", {0, BPF_PROG_TYPE_KPROBE, 0}, {-EINVAL, 0} }, + {"kretprobe/", {0, BPF_PROG_TYPE_KPROBE, 0}, {-EINVAL, 0} }, + {"classifier", {0, BPF_PROG_TYPE_SCHED_CLS, 0}, {-EINVAL, 0} }, + {"action", {0, BPF_PROG_TYPE_SCHED_ACT, 0}, {-EINVAL, 0} }, + {"tracepoint/", {0, BPF_PROG_TYPE_TRACEPOINT, 0}, {-EINVAL, 0} }, + { + "raw_tracepoint/", + {0, BPF_PROG_TYPE_RAW_TRACEPOINT, 0}, + {-EINVAL, 0}, + }, + {"xdp", {0, BPF_PROG_TYPE_XDP, 0}, {-EINVAL, 0} }, + {"perf_event", {0, BPF_PROG_TYPE_PERF_EVENT, 0}, {-EINVAL, 0} }, + {"lwt_in", {0, BPF_PROG_TYPE_LWT_IN, 0}, {-EINVAL, 0} }, + {"lwt_out", {0, BPF_PROG_TYPE_LWT_OUT, 0}, {-EINVAL, 0} }, + {"lwt_xmit", {0, BPF_PROG_TYPE_LWT_XMIT, 0}, {-EINVAL, 0} }, + {"lwt_seg6local", {0, BPF_PROG_TYPE_LWT_SEG6LOCAL, 0}, {-EINVAL, 0} }, + { + "cgroup_skb/ingress", + {0, BPF_PROG_TYPE_CGROUP_SKB, 0}, + {0, BPF_CGROUP_INET_INGRESS}, + }, + { + "cgroup_skb/egress", + {0, BPF_PROG_TYPE_CGROUP_SKB, 0}, + {0, BPF_CGROUP_INET_EGRESS}, + }, + {"cgroup/skb", {0, BPF_PROG_TYPE_CGROUP_SKB, 0}, {-EINVAL, 0} }, + { + "cgroup/sock", + {0, BPF_PROG_TYPE_CGROUP_SOCK, 0}, + {0, BPF_CGROUP_INET_SOCK_CREATE}, + }, + { + "cgroup/post_bind4", + {0, BPF_PROG_TYPE_CGROUP_SOCK, BPF_CGROUP_INET4_POST_BIND}, + {0, BPF_CGROUP_INET4_POST_BIND}, + }, + { + "cgroup/post_bind6", + {0, BPF_PROG_TYPE_CGROUP_SOCK, BPF_CGROUP_INET6_POST_BIND}, + {0, BPF_CGROUP_INET6_POST_BIND}, + }, + { + "cgroup/dev", + {0, BPF_PROG_TYPE_CGROUP_DEVICE, 0}, + {0, BPF_CGROUP_DEVICE}, + }, + {"sockops", {0, BPF_PROG_TYPE_SOCK_OPS, 0}, {0, BPF_CGROUP_SOCK_OPS} }, + { + "sk_skb/stream_parser", + {0, BPF_PROG_TYPE_SK_SKB, 0}, + {0, BPF_SK_SKB_STREAM_PARSER}, + }, + { + "sk_skb/stream_verdict", + {0, BPF_PROG_TYPE_SK_SKB, 0}, + {0, BPF_SK_SKB_STREAM_VERDICT}, + }, + {"sk_skb", {0, BPF_PROG_TYPE_SK_SKB, 0}, {-EINVAL, 0} }, + {"sk_msg", {0, BPF_PROG_TYPE_SK_MSG, 0}, {0, BPF_SK_MSG_VERDICT} }, + {"lirc_mode2", {0, BPF_PROG_TYPE_LIRC_MODE2, 0}, {0, BPF_LIRC_MODE2} }, + { + "flow_dissector", + {0, BPF_PROG_TYPE_FLOW_DISSECTOR, 0}, + {0, BPF_FLOW_DISSECTOR}, + }, + { + "cgroup/bind4", + {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_BIND}, + {0, BPF_CGROUP_INET4_BIND}, + }, + { + "cgroup/bind6", + {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_BIND}, + {0, BPF_CGROUP_INET6_BIND}, + }, + { + "cgroup/connect4", + {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_CONNECT}, + {0, BPF_CGROUP_INET4_CONNECT}, + }, + { + "cgroup/connect6", + {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_CONNECT}, + {0, BPF_CGROUP_INET6_CONNECT}, + }, + { + "cgroup/sendmsg4", + {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_SENDMSG}, + {0, BPF_CGROUP_UDP4_SENDMSG}, + }, + { + "cgroup/sendmsg6", + {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_SENDMSG}, + {0, BPF_CGROUP_UDP6_SENDMSG}, + }, +}; + +static int test_prog_type_by_name(const struct sec_name_test *test) +{ + enum bpf_attach_type expected_attach_type; + enum bpf_prog_type prog_type; + int rc; + + rc = libbpf_prog_type_by_name(test->sec_name, &prog_type, + &expected_attach_type); + + if (rc != test->expected_load.rc) { + warnx("prog: unexpected rc=%d for %s", rc, test->sec_name); + return -1; + } + + if (rc) + return 0; + + if (prog_type != test->expected_load.prog_type) { + warnx("prog: unexpected prog_type=%d for %s", prog_type, + test->sec_name); + return -1; + } + + if (expected_attach_type != test->expected_load.expected_attach_type) { + warnx("prog: unexpected expected_attach_type=%d for %s", + expected_attach_type, test->sec_name); + return -1; + } + + return 0; +} + +static int test_attach_type_by_name(const struct sec_name_test *test) +{ + enum bpf_attach_type attach_type; + int rc; + + rc = libbpf_attach_type_by_name(test->sec_name, &attach_type); + + if (rc != test->expected_attach.rc) { + warnx("attach: unexpected rc=%d for %s", rc, test->sec_name); + return -1; + } + + if (rc) + return 0; + + if (attach_type != test->expected_attach.attach_type) { + warnx("attach: unexpected attach_type=%d for %s", attach_type, + test->sec_name); + return -1; + } + + return 0; +} + +static int run_test_case(const struct sec_name_test *test) +{ + if (test_prog_type_by_name(test)) + return -1; + if (test_attach_type_by_name(test)) + return -1; + return 0; +} + +static int run_tests(void) +{ + int passes = 0; + int fails = 0; + int i; + + for (i = 0; i < ARRAY_SIZE(tests); ++i) { + if (run_test_case(&tests[i])) + ++fails; + else + ++passes; + } + printf("Summary: %d PASSED, %d FAILED\n", passes, fails); + return fails ? -1 : 0; +} + +int main(int argc, char **argv) +{ + return run_tests(); +} diff --git a/tools/testing/selftests/bpf/test_sk_lookup_kern.c b/tools/testing/selftests/bpf/test_sk_lookup_kern.c new file mode 100644 index 000000000000..e21cd736c196 --- /dev/null +++ b/tools/testing/selftests/bpf/test_sk_lookup_kern.c @@ -0,0 +1,180 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +// Copyright (c) 2018 Covalent IO, Inc. http://covalent.io + +#include <stddef.h> +#include <stdbool.h> +#include <string.h> +#include <linux/bpf.h> +#include <linux/if_ether.h> +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/pkt_cls.h> +#include <linux/tcp.h> +#include <sys/socket.h> +#include "bpf_helpers.h" +#include "bpf_endian.h" + +int _version SEC("version") = 1; +char _license[] SEC("license") = "GPL"; + +/* Fill 'tuple' with L3 info, and attempt to find L4. On fail, return NULL. */ +static struct bpf_sock_tuple *get_tuple(void *data, __u64 nh_off, + void *data_end, __u16 eth_proto, + bool *ipv4) +{ + struct bpf_sock_tuple *result; + __u8 proto = 0; + __u64 ihl_len; + + if (eth_proto == bpf_htons(ETH_P_IP)) { + struct iphdr *iph = (struct iphdr *)(data + nh_off); + + if (iph + 1 > data_end) + return NULL; + ihl_len = iph->ihl * 4; + proto = iph->protocol; + *ipv4 = true; + result = (struct bpf_sock_tuple *)&iph->saddr; + } else if (eth_proto == bpf_htons(ETH_P_IPV6)) { + struct ipv6hdr *ip6h = (struct ipv6hdr *)(data + nh_off); + + if (ip6h + 1 > data_end) + return NULL; + ihl_len = sizeof(*ip6h); + proto = ip6h->nexthdr; + *ipv4 = true; + result = (struct bpf_sock_tuple *)&ip6h->saddr; + } + + if (data + nh_off + ihl_len > data_end || proto != IPPROTO_TCP) + return NULL; + + return result; +} + +SEC("sk_lookup_success") +int bpf_sk_lookup_test0(struct __sk_buff *skb) +{ + void *data_end = (void *)(long)skb->data_end; + void *data = (void *)(long)skb->data; + struct ethhdr *eth = (struct ethhdr *)(data); + struct bpf_sock_tuple *tuple; + struct bpf_sock *sk; + size_t tuple_len; + bool ipv4; + + if (eth + 1 > data_end) + return TC_ACT_SHOT; + + tuple = get_tuple(data, sizeof(*eth), data_end, eth->h_proto, &ipv4); + if (!tuple || tuple + sizeof *tuple > data_end) + return TC_ACT_SHOT; + + tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6); + sk = bpf_sk_lookup_tcp(skb, tuple, tuple_len, BPF_F_CURRENT_NETNS, 0); + if (sk) + bpf_sk_release(sk); + return sk ? TC_ACT_OK : TC_ACT_UNSPEC; +} + +SEC("sk_lookup_success_simple") +int bpf_sk_lookup_test1(struct __sk_buff *skb) +{ + struct bpf_sock_tuple tuple = {}; + struct bpf_sock *sk; + + sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); + if (sk) + bpf_sk_release(sk); + return 0; +} + +SEC("fail_use_after_free") +int bpf_sk_lookup_uaf(struct __sk_buff *skb) +{ + struct bpf_sock_tuple tuple = {}; + struct bpf_sock *sk; + __u32 family = 0; + + sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); + if (sk) { + bpf_sk_release(sk); + family = sk->family; + } + return family; +} + +SEC("fail_modify_sk_pointer") +int bpf_sk_lookup_modptr(struct __sk_buff *skb) +{ + struct bpf_sock_tuple tuple = {}; + struct bpf_sock *sk; + __u32 family; + + sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); + if (sk) { + sk += 1; + bpf_sk_release(sk); + } + return 0; +} + +SEC("fail_modify_sk_or_null_pointer") +int bpf_sk_lookup_modptr_or_null(struct __sk_buff *skb) +{ + struct bpf_sock_tuple tuple = {}; + struct bpf_sock *sk; + __u32 family; + + sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); + sk += 1; + if (sk) + bpf_sk_release(sk); + return 0; +} + +SEC("fail_no_release") +int bpf_sk_lookup_test2(struct __sk_buff *skb) +{ + struct bpf_sock_tuple tuple = {}; + + bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); + return 0; +} + +SEC("fail_release_twice") +int bpf_sk_lookup_test3(struct __sk_buff *skb) +{ + struct bpf_sock_tuple tuple = {}; + struct bpf_sock *sk; + + sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); + bpf_sk_release(sk); + bpf_sk_release(sk); + return 0; +} + +SEC("fail_release_unchecked") +int bpf_sk_lookup_test4(struct __sk_buff *skb) +{ + struct bpf_sock_tuple tuple = {}; + struct bpf_sock *sk; + + sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); + bpf_sk_release(sk); + return 0; +} + +void lookup_no_release(struct __sk_buff *skb) +{ + struct bpf_sock_tuple tuple = {}; + bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); +} + +SEC("fail_no_release_subcall") +int bpf_sk_lookup_test5(struct __sk_buff *skb) +{ + lookup_no_release(skb); + return 0; +} diff --git a/tools/testing/selftests/bpf/test_skb_cgroup_id.sh b/tools/testing/selftests/bpf/test_skb_cgroup_id.sh index 42544a969abc..a9bc6f82abc1 100755 --- a/tools/testing/selftests/bpf/test_skb_cgroup_id.sh +++ b/tools/testing/selftests/bpf/test_skb_cgroup_id.sh @@ -10,7 +10,7 @@ wait_for_ip() echo -n "Wait for testing link-local IP to become available " for _i in $(seq ${MAX_PING_TRIES}); do echo -n "." - if ping -6 -q -c 1 -W 1 ff02::1%${TEST_IF} >/dev/null 2>&1; then + if $PING6 -c 1 -W 1 ff02::1%${TEST_IF} >/dev/null 2>&1; then echo " OK" return fi @@ -58,5 +58,6 @@ BPF_PROG_OBJ="${DIR}/test_skb_cgroup_id_kern.o" BPF_PROG_SECTION="cgroup_id_logger" BPF_PROG_ID=0 PROG="${DIR}/test_skb_cgroup_id_user" +type ping6 >/dev/null 2>&1 && PING6="ping6" || PING6="ping -6" main diff --git a/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c b/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c index c121cc59f314..9220747c069d 100644 --- a/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c +++ b/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c @@ -164,7 +164,7 @@ int main(int argc, char **argv) goto err; cgfd = create_and_get_cgroup(CGROUP_PATH); - if (!cgfd) + if (cgfd < 0) goto err; if (join_cgroup(CGROUP_PATH)) diff --git a/tools/testing/selftests/bpf/test_sock.c b/tools/testing/selftests/bpf/test_sock.c index b8ebe2f58074..561ffb6d6433 100644 --- a/tools/testing/selftests/bpf/test_sock.c +++ b/tools/testing/selftests/bpf/test_sock.c @@ -458,7 +458,7 @@ int main(int argc, char **argv) goto err; cgfd = create_and_get_cgroup(CG_PATH); - if (!cgfd) + if (cgfd < 0) goto err; if (join_cgroup(CG_PATH)) diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c index aeeb76a54d63..3f110eaaf29c 100644 --- a/tools/testing/selftests/bpf/test_sock_addr.c +++ b/tools/testing/selftests/bpf/test_sock_addr.c @@ -44,6 +44,7 @@ #define SERV6_V4MAPPED_IP "::ffff:192.168.0.4" #define SRC6_IP "::1" #define SRC6_REWRITE_IP "::6" +#define WILDCARD6_IP "::" #define SERV6_PORT 6060 #define SERV6_REWRITE_PORT 6666 @@ -85,12 +86,14 @@ static int bind4_prog_load(const struct sock_addr_test *test); static int bind6_prog_load(const struct sock_addr_test *test); static int connect4_prog_load(const struct sock_addr_test *test); static int connect6_prog_load(const struct sock_addr_test *test); +static int sendmsg_allow_prog_load(const struct sock_addr_test *test); static int sendmsg_deny_prog_load(const struct sock_addr_test *test); static int sendmsg4_rw_asm_prog_load(const struct sock_addr_test *test); static int sendmsg4_rw_c_prog_load(const struct sock_addr_test *test); static int sendmsg6_rw_asm_prog_load(const struct sock_addr_test *test); static int sendmsg6_rw_c_prog_load(const struct sock_addr_test *test); static int sendmsg6_rw_v4mapped_prog_load(const struct sock_addr_test *test); +static int sendmsg6_rw_wildcard_prog_load(const struct sock_addr_test *test); static struct sock_addr_test tests[] = { /* bind */ @@ -463,6 +466,34 @@ static struct sock_addr_test tests[] = { SYSCALL_ENOTSUPP, }, { + "sendmsg6: set dst IP = [::] (BSD'ism)", + sendmsg6_rw_wildcard_prog_load, + BPF_CGROUP_UDP6_SENDMSG, + BPF_CGROUP_UDP6_SENDMSG, + AF_INET6, + SOCK_DGRAM, + SERV6_IP, + SERV6_PORT, + SERV6_REWRITE_IP, + SERV6_REWRITE_PORT, + SRC6_REWRITE_IP, + SUCCESS, + }, + { + "sendmsg6: preserve dst IP = [::] (BSD'ism)", + sendmsg_allow_prog_load, + BPF_CGROUP_UDP6_SENDMSG, + BPF_CGROUP_UDP6_SENDMSG, + AF_INET6, + SOCK_DGRAM, + WILDCARD6_IP, + SERV6_PORT, + SERV6_REWRITE_IP, + SERV6_PORT, + SRC6_IP, + SUCCESS, + }, + { "sendmsg6: deny call", sendmsg_deny_prog_load, BPF_CGROUP_UDP6_SENDMSG, @@ -574,24 +605,44 @@ static int bind4_prog_load(const struct sock_addr_test *test) /* if (sk.family == AF_INET && */ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, offsetof(struct bpf_sock_addr, family)), - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET, 16), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET, 24), /* (sk.type == SOCK_DGRAM || sk.type == SOCK_STREAM) && */ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, offsetof(struct bpf_sock_addr, type)), BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_DGRAM, 1), BPF_JMP_A(1), - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_STREAM, 12), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_STREAM, 20), /* 1st_byte_of_user_ip4 == expected && */ BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6, offsetof(struct bpf_sock_addr, user_ip4)), - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[0], 10), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[0], 18), + + /* 2nd_byte_of_user_ip4 == expected && */ + BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6, + offsetof(struct bpf_sock_addr, user_ip4) + 1), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[1], 16), + + /* 3rd_byte_of_user_ip4 == expected && */ + BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6, + offsetof(struct bpf_sock_addr, user_ip4) + 2), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[2], 14), + + /* 4th_byte_of_user_ip4 == expected && */ + BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6, + offsetof(struct bpf_sock_addr, user_ip4) + 3), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[3], 12), /* 1st_half_of_user_ip4 == expected && */ BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_6, offsetof(struct bpf_sock_addr, user_ip4)), - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr16[0], 8), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr16[0], 10), + + /* 2nd_half_of_user_ip4 == expected && */ + BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_6, + offsetof(struct bpf_sock_addr, user_ip4) + 2), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr16[1], 8), /* whole_user_ip4 == expected) { */ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, @@ -714,16 +765,27 @@ static int connect6_prog_load(const struct sock_addr_test *test) return load_path(test, CONNECT6_PROG_PATH); } -static int sendmsg_deny_prog_load(const struct sock_addr_test *test) +static int sendmsg_ret_only_prog_load(const struct sock_addr_test *test, + int32_t rc) { struct bpf_insn insns[] = { - /* return 0 */ - BPF_MOV64_IMM(BPF_REG_0, 0), + /* return rc */ + BPF_MOV64_IMM(BPF_REG_0, rc), BPF_EXIT_INSN(), }; return load_insns(test, insns, sizeof(insns) / sizeof(struct bpf_insn)); } +static int sendmsg_allow_prog_load(const struct sock_addr_test *test) +{ + return sendmsg_ret_only_prog_load(test, /*rc*/ 1); +} + +static int sendmsg_deny_prog_load(const struct sock_addr_test *test) +{ + return sendmsg_ret_only_prog_load(test, /*rc*/ 0); +} + static int sendmsg4_rw_asm_prog_load(const struct sock_addr_test *test) { struct sockaddr_in dst4_rw_addr; @@ -844,6 +906,11 @@ static int sendmsg6_rw_v4mapped_prog_load(const struct sock_addr_test *test) return sendmsg6_rw_dst_asm_prog_load(test, SERV6_V4MAPPED_IP); } +static int sendmsg6_rw_wildcard_prog_load(const struct sock_addr_test *test) +{ + return sendmsg6_rw_dst_asm_prog_load(test, WILDCARD6_IP); +} + static int sendmsg6_rw_c_prog_load(const struct sock_addr_test *test) { return load_path(test, SENDMSG6_PROG_PATH); @@ -1375,7 +1442,7 @@ int main(int argc, char **argv) goto err; cgfd = create_and_get_cgroup(CG_PATH); - if (!cgfd) + if (cgfd < 0) goto err; if (join_cgroup(CG_PATH)) diff --git a/tools/testing/selftests/bpf/test_sock_addr.sh b/tools/testing/selftests/bpf/test_sock_addr.sh index 9832a875a828..3b9fdb8094aa 100755 --- a/tools/testing/selftests/bpf/test_sock_addr.sh +++ b/tools/testing/selftests/bpf/test_sock_addr.sh @@ -4,7 +4,8 @@ set -eu ping_once() { - ping -${1} -q -c 1 -W 1 ${2%%/*} >/dev/null 2>&1 + type ping${1} >/dev/null 2>&1 && PING="ping${1}" || PING="ping -${1}" + $PING -q -c 1 -W 1 ${2%%/*} >/dev/null 2>&1 } wait_for_ip() diff --git a/tools/testing/selftests/bpf/test_socket_cookie.c b/tools/testing/selftests/bpf/test_socket_cookie.c index 68e108e4687a..fc7832ee566b 100644 --- a/tools/testing/selftests/bpf/test_socket_cookie.c +++ b/tools/testing/selftests/bpf/test_socket_cookie.c @@ -158,11 +158,7 @@ static int run_test(int cgfd) bpf_object__for_each_program(prog, pobj) { prog_name = bpf_program__title(prog, /*needs_copy*/ false); - if (strcmp(prog_name, "cgroup/connect6") == 0) { - attach_type = BPF_CGROUP_INET6_CONNECT; - } else if (strcmp(prog_name, "sockops") == 0) { - attach_type = BPF_CGROUP_SOCK_OPS; - } else { + if (libbpf_attach_type_by_name(prog_name, &attach_type)) { log_err("Unexpected prog: %s", prog_name); goto err; } @@ -206,7 +202,7 @@ int main(int argc, char **argv) goto err; cgfd = create_and_get_cgroup(CG_PATH); - if (!cgfd) + if (cgfd < 0) goto err; if (join_cgroup(CG_PATH)) diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c index 0c7d9e556b47..e85a771f607b 100644 --- a/tools/testing/selftests/bpf/test_sockmap.c +++ b/tools/testing/selftests/bpf/test_sockmap.c @@ -28,6 +28,7 @@ #include <linux/sock_diag.h> #include <linux/bpf.h> #include <linux/if_link.h> +#include <linux/tls.h> #include <assert.h> #include <libgen.h> @@ -43,6 +44,13 @@ int running; static void running_handler(int a); +#ifndef TCP_ULP +# define TCP_ULP 31 +#endif +#ifndef SOL_TLS +# define SOL_TLS 282 +#endif + /* randomly selected ports for testing on lo */ #define S1_PORT 10000 #define S2_PORT 10001 @@ -69,8 +77,14 @@ int txmsg_apply; int txmsg_cork; int txmsg_start; int txmsg_end; +int txmsg_start_push; +int txmsg_end_push; +int txmsg_start_pop; +int txmsg_pop; int txmsg_ingress; int txmsg_skb; +int ktls; +int peek_flag; static const struct option long_options[] = { {"help", no_argument, NULL, 'h' }, @@ -90,8 +104,14 @@ static const struct option long_options[] = { {"txmsg_cork", required_argument, NULL, 'k'}, {"txmsg_start", required_argument, NULL, 's'}, {"txmsg_end", required_argument, NULL, 'e'}, + {"txmsg_start_push", required_argument, NULL, 'p'}, + {"txmsg_end_push", required_argument, NULL, 'q'}, + {"txmsg_start_pop", required_argument, NULL, 'w'}, + {"txmsg_pop", required_argument, NULL, 'x'}, {"txmsg_ingress", no_argument, &txmsg_ingress, 1 }, {"txmsg_skb", no_argument, &txmsg_skb, 1 }, + {"ktls", no_argument, &ktls, 1 }, + {"peek", no_argument, &peek_flag, 1 }, {0, 0, NULL, 0 } }; @@ -112,6 +132,71 @@ static void usage(char *argv[]) printf("\n"); } +char *sock_to_string(int s) +{ + if (s == c1) + return "client1"; + else if (s == c2) + return "client2"; + else if (s == s1) + return "server1"; + else if (s == s2) + return "server2"; + else if (s == p1) + return "peer1"; + else if (s == p2) + return "peer2"; + else + return "unknown"; +} + +static int sockmap_init_ktls(int verbose, int s) +{ + struct tls12_crypto_info_aes_gcm_128 tls_tx = { + .info = { + .version = TLS_1_2_VERSION, + .cipher_type = TLS_CIPHER_AES_GCM_128, + }, + }; + struct tls12_crypto_info_aes_gcm_128 tls_rx = { + .info = { + .version = TLS_1_2_VERSION, + .cipher_type = TLS_CIPHER_AES_GCM_128, + }, + }; + int so_buf = 6553500; + int err; + + err = setsockopt(s, 6, TCP_ULP, "tls", sizeof("tls")); + if (err) { + fprintf(stderr, "setsockopt: TCP_ULP(%s) failed with error %i\n", sock_to_string(s), err); + return -EINVAL; + } + err = setsockopt(s, SOL_TLS, TLS_TX, (void *)&tls_tx, sizeof(tls_tx)); + if (err) { + fprintf(stderr, "setsockopt: TLS_TX(%s) failed with error %i\n", sock_to_string(s), err); + return -EINVAL; + } + err = setsockopt(s, SOL_TLS, TLS_RX, (void *)&tls_rx, sizeof(tls_rx)); + if (err) { + fprintf(stderr, "setsockopt: TLS_RX(%s) failed with error %i\n", sock_to_string(s), err); + return -EINVAL; + } + err = setsockopt(s, SOL_SOCKET, SO_SNDBUF, &so_buf, sizeof(so_buf)); + if (err) { + fprintf(stderr, "setsockopt: (%s) failed sndbuf with error %i\n", sock_to_string(s), err); + return -EINVAL; + } + err = setsockopt(s, SOL_SOCKET, SO_RCVBUF, &so_buf, sizeof(so_buf)); + if (err) { + fprintf(stderr, "setsockopt: (%s) failed rcvbuf with error %i\n", sock_to_string(s), err); + return -EINVAL; + } + + if (verbose) + fprintf(stdout, "socket(%s) kTLS enabled\n", sock_to_string(s)); + return 0; +} static int sockmap_init_sockets(int verbose) { int i, err, one = 1; @@ -277,33 +362,40 @@ static int msg_loop_sendpage(int fd, int iov_length, int cnt, return 0; } -static int msg_loop(int fd, int iov_count, int iov_length, int cnt, - struct msg_stats *s, bool tx, - struct sockmap_options *opt) +static void msg_free_iov(struct msghdr *msg) { - struct msghdr msg = {0}; - int err, i, flags = MSG_NOSIGNAL; + int i; + + for (i = 0; i < msg->msg_iovlen; i++) + free(msg->msg_iov[i].iov_base); + free(msg->msg_iov); + msg->msg_iov = NULL; + msg->msg_iovlen = 0; +} + +static int msg_alloc_iov(struct msghdr *msg, + int iov_count, int iov_length, + bool data, bool xmit) +{ + unsigned char k = 0; struct iovec *iov; - unsigned char k; - bool data_test = opt->data_test; - bool drop = opt->drop_expected; + int i; iov = calloc(iov_count, sizeof(struct iovec)); if (!iov) return errno; - k = 0; for (i = 0; i < iov_count; i++) { unsigned char *d = calloc(iov_length, sizeof(char)); if (!d) { fprintf(stderr, "iov_count %i/%i OOM\n", i, iov_count); - goto out_errno; + goto unwind_iov; } iov[i].iov_base = d; iov[i].iov_len = iov_length; - if (data_test && tx) { + if (data && xmit) { int j; for (j = 0; j < iov_length; j++) @@ -311,9 +403,60 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt, } } - msg.msg_iov = iov; - msg.msg_iovlen = iov_count; - k = 0; + msg->msg_iov = iov; + msg->msg_iovlen = iov_count; + + return 0; +unwind_iov: + for (i--; i >= 0 ; i--) + free(msg->msg_iov[i].iov_base); + return -ENOMEM; +} + +static int msg_verify_data(struct msghdr *msg, int size, int chunk_sz) +{ + int i, j, bytes_cnt = 0; + unsigned char k = 0; + + for (i = 0; i < msg->msg_iovlen; i++) { + unsigned char *d = msg->msg_iov[i].iov_base; + + for (j = 0; + j < msg->msg_iov[i].iov_len && size; j++) { + if (d[j] != k++) { + fprintf(stderr, + "detected data corruption @iov[%i]:%i %02x != %02x, %02x ?= %02x\n", + i, j, d[j], k - 1, d[j+1], k); + return -EIO; + } + bytes_cnt++; + if (bytes_cnt == chunk_sz) { + k = 0; + bytes_cnt = 0; + } + size--; + } + } + return 0; +} + +static int msg_loop(int fd, int iov_count, int iov_length, int cnt, + struct msg_stats *s, bool tx, + struct sockmap_options *opt) +{ + struct msghdr msg = {0}, msg_peek = {0}; + int err, i, flags = MSG_NOSIGNAL; + bool drop = opt->drop_expected; + bool data = opt->data_test; + + err = msg_alloc_iov(&msg, iov_count, iov_length, data, tx); + if (err) + goto out_errno; + if (peek_flag) { + err = msg_alloc_iov(&msg_peek, iov_count, iov_length, data, tx); + if (err) + goto out_errno; + } if (tx) { clock_gettime(CLOCK_MONOTONIC, &s->start); @@ -333,21 +476,28 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt, } clock_gettime(CLOCK_MONOTONIC, &s->end); } else { - int slct, recv, max_fd = fd; + int slct, recvp = 0, recv, max_fd = fd; + float total_bytes, txmsg_pop_total; int fd_flags = O_NONBLOCK; struct timeval timeout; - float total_bytes; - int bytes_cnt = 0; - int chunk_sz; fd_set w; - if (opt->sendpage) - chunk_sz = iov_length * cnt; - else - chunk_sz = iov_length * iov_count; - fcntl(fd, fd_flags); + /* Account for pop bytes noting each iteration of apply will + * call msg_pop_data helper so we need to account for this + * by calculating the number of apply iterations. Note user + * of the tool can create cases where no data is sent by + * manipulating pop/push/pull/etc. For example txmsg_apply 1 + * with txmsg_pop 1 will try to apply 1B at a time but each + * iteration will then pop 1B so no data will ever be sent. + * This is really only useful for testing edge cases in code + * paths. + */ total_bytes = (float)iov_count * (float)iov_length * (float)cnt; + txmsg_pop_total = txmsg_pop; + if (txmsg_apply) + txmsg_pop_total *= (total_bytes / txmsg_apply); + total_bytes -= txmsg_pop_total; err = clock_gettime(CLOCK_MONOTONIC, &s->start); if (err < 0) perror("recv start time: "); @@ -356,7 +506,7 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt, timeout.tv_sec = 0; timeout.tv_usec = 300000; } else { - timeout.tv_sec = 1; + timeout.tv_sec = 3; timeout.tv_usec = 0; } @@ -371,12 +521,25 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt, goto out_errno; } else if (!slct) { if (opt->verbose) - fprintf(stderr, "unexpected timeout\n"); + fprintf(stderr, "unexpected timeout: recved %zu/%f pop_total %f\n", s->bytes_recvd, total_bytes, txmsg_pop_total); errno = -EIO; clock_gettime(CLOCK_MONOTONIC, &s->end); goto out_errno; } + errno = 0; + if (peek_flag) { + flags |= MSG_PEEK; + recvp = recvmsg(fd, &msg_peek, flags); + if (recvp < 0) { + if (errno != EWOULDBLOCK) { + clock_gettime(CLOCK_MONOTONIC, &s->end); + goto out_errno; + } + } + flags = 0; + } + recv = recvmsg(fd, &msg, flags); if (recv < 0) { if (errno != EWOULDBLOCK) { @@ -388,27 +551,23 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt, s->bytes_recvd += recv; - if (data_test) { - int j; - - for (i = 0; i < msg.msg_iovlen; i++) { - unsigned char *d = iov[i].iov_base; - - for (j = 0; - j < iov[i].iov_len && recv; j++) { - if (d[j] != k++) { - errno = -EIO; - fprintf(stderr, - "detected data corruption @iov[%i]:%i %02x != %02x, %02x ?= %02x\n", - i, j, d[j], k - 1, d[j+1], k); - goto out_errno; - } - bytes_cnt++; - if (bytes_cnt == chunk_sz) { - k = 0; - bytes_cnt = 0; - } - recv--; + if (data) { + int chunk_sz = opt->sendpage ? + iov_length * cnt : + iov_length * iov_count; + + errno = msg_verify_data(&msg, recv, chunk_sz); + if (errno) { + perror("data verify msg failed\n"); + goto out_errno; + } + if (recvp) { + errno = msg_verify_data(&msg_peek, + recvp, + chunk_sz); + if (errno) { + perror("data verify msg_peek failed\n"); + goto out_errno; } } } @@ -416,14 +575,12 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt, clock_gettime(CLOCK_MONOTONIC, &s->end); } - for (i = 0; i < iov_count; i++) - free(iov[i].iov_base); - free(iov); - return 0; + msg_free_iov(&msg); + msg_free_iov(&msg_peek); + return err; out_errno: - for (i = 0; i < iov_count; i++) - free(iov[i].iov_base); - free(iov); + msg_free_iov(&msg); + msg_free_iov(&msg_peek); return errno; } @@ -456,6 +613,21 @@ static int sendmsg_test(struct sockmap_options *opt) else rx_fd = p2; + if (ktls) { + /* Redirecting into non-TLS socket which sends into a TLS + * socket is not a valid test. So in this case lets not + * enable kTLS but still run the test. + */ + if (!txmsg_redir || (txmsg_redir && txmsg_ingress)) { + err = sockmap_init_ktls(opt->verbose, rx_fd); + if (err) + return err; + } + err = sockmap_init_ktls(opt->verbose, c1); + if (err) + return err; + } + rxpid = fork(); if (rxpid == 0) { if (opt->drop_expected) @@ -465,21 +637,20 @@ static int sendmsg_test(struct sockmap_options *opt) iov_count = 1; err = msg_loop(rx_fd, iov_count, iov_buf, cnt, &s, false, opt); - if (err && opt->verbose) + if (opt->verbose) fprintf(stderr, "msg_loop_rx: iov_count %i iov_buf %i cnt %i err %i\n", iov_count, iov_buf, cnt, err); - shutdown(p2, SHUT_RDWR); - shutdown(p1, SHUT_RDWR); if (s.end.tv_sec - s.start.tv_sec) { sent_Bps = sentBps(s); recvd_Bps = recvdBps(s); } if (opt->verbose) fprintf(stdout, - "rx_sendmsg: TX: %zuB %fB/s %fGB/s RX: %zuB %fB/s %fGB/s\n", + "rx_sendmsg: TX: %zuB %fB/s %fGB/s RX: %zuB %fB/s %fGB/s %s\n", s.bytes_sent, sent_Bps, sent_Bps/giga, - s.bytes_recvd, recvd_Bps, recvd_Bps/giga); + s.bytes_recvd, recvd_Bps, recvd_Bps/giga, + peek_flag ? "(peek_msg)" : ""); if (err && txmsg_cork) err = 0; exit(err ? 1 : 0); @@ -500,7 +671,6 @@ static int sendmsg_test(struct sockmap_options *opt) fprintf(stderr, "msg_loop_tx: iov_count %i iov_buf %i cnt %i err %i\n", iov_count, iov_buf, cnt, err); - shutdown(c1, SHUT_RDWR); if (s.end.tv_sec - s.start.tv_sec) { sent_Bps = sentBps(s); recvd_Bps = recvdBps(s); @@ -755,6 +925,63 @@ run: } } + if (txmsg_start_push) { + i = 2; + err = bpf_map_update_elem(map_fd[5], + &i, &txmsg_start_push, BPF_ANY); + if (err) { + fprintf(stderr, + "ERROR: bpf_map_update_elem (txmsg_start_push): %d (%s)\n", + err, strerror(errno)); + goto out; + } + } + + if (txmsg_end_push) { + i = 3; + err = bpf_map_update_elem(map_fd[5], + &i, &txmsg_end_push, BPF_ANY); + if (err) { + fprintf(stderr, + "ERROR: bpf_map_update_elem %i@%i (txmsg_end_push): %d (%s)\n", + txmsg_end_push, i, err, strerror(errno)); + goto out; + } + } + + if (txmsg_start_pop) { + i = 4; + err = bpf_map_update_elem(map_fd[5], + &i, &txmsg_start_pop, BPF_ANY); + if (err) { + fprintf(stderr, + "ERROR: bpf_map_update_elem %i@%i (txmsg_start_pop): %d (%s)\n", + txmsg_start_pop, i, err, strerror(errno)); + goto out; + } + } else { + i = 4; + bpf_map_update_elem(map_fd[5], + &i, &txmsg_start_pop, BPF_ANY); + } + + if (txmsg_pop) { + i = 5; + err = bpf_map_update_elem(map_fd[5], + &i, &txmsg_pop, BPF_ANY); + if (err) { + fprintf(stderr, + "ERROR: bpf_map_update_elem %i@%i (txmsg_pop): %d (%s)\n", + txmsg_pop, i, err, strerror(errno)); + goto out; + } + } else { + i = 5; + bpf_map_update_elem(map_fd[5], + &i, &txmsg_pop, BPF_ANY); + + } + if (txmsg_ingress) { int in = BPF_F_INGRESS; @@ -906,10 +1133,19 @@ static void test_options(char *options) snprintf(tstr, OPTSTRING, "end %d,", txmsg_end); strncat(options, tstr, OPTSTRING); } + if (txmsg_start_pop) { + snprintf(tstr, OPTSTRING, "pop (%d,%d),", + txmsg_start_pop, txmsg_start_pop + txmsg_pop); + strncat(options, tstr, OPTSTRING); + } if (txmsg_ingress) strncat(options, "ingress,", OPTSTRING); if (txmsg_skb) strncat(options, "skb,", OPTSTRING); + if (ktls) + strncat(options, "ktls,", OPTSTRING); + if (peek_flag) + strncat(options, "peek,", OPTSTRING); } static int __test_exec(int cgrp, int test, struct sockmap_options *opt) @@ -1083,6 +1319,9 @@ static int test_mixed(int cgrp) txmsg_pass = txmsg_noisy = txmsg_redir_noisy = txmsg_drop = 0; txmsg_apply = txmsg_cork = 0; txmsg_start = txmsg_end = 0; + txmsg_start_push = txmsg_end_push = 0; + txmsg_start_pop = txmsg_pop = 0; + /* Test small and large iov_count values with pass/redir/apply/cork */ txmsg_pass = 1; txmsg_redir = 0; @@ -1199,6 +1438,21 @@ static int test_start_end(int cgrp) /* Test basic start/end with lots of iov_count and iov_lengths */ txmsg_start = 1; txmsg_end = 2; + txmsg_start_push = 1; + txmsg_end_push = 2; + txmsg_start_pop = 1; + txmsg_pop = 1; + err = test_txmsg(cgrp); + if (err) + goto out; + + /* Cut a byte of pushed data but leave reamining in place */ + txmsg_start = 1; + txmsg_end = 2; + txmsg_start_push = 1; + txmsg_end_push = 3; + txmsg_start_pop = 1; + txmsg_pop = 1; err = test_txmsg(cgrp); if (err) goto out; @@ -1209,18 +1463,36 @@ static int test_start_end(int cgrp) opt.iov_length = 100; txmsg_cork = 1600; + txmsg_start_pop = 0; + txmsg_pop = 0; + for (i = 99; i <= 1600; i += 500) { txmsg_start = 0; txmsg_end = i; + txmsg_start_push = 0; + txmsg_end_push = i; err = test_exec(cgrp, &opt); if (err) goto out; } + /* Test pop data in middle of cork */ + for (i = 99; i <= 1600; i += 500) { + txmsg_start_pop = 10; + txmsg_pop = i; + err = test_exec(cgrp, &opt); + if (err) + goto out; + } + txmsg_start_pop = 0; + txmsg_pop = 0; + /* Test start/end with cork but pull data in middle */ for (i = 199; i <= 1600; i += 500) { txmsg_start = 100; txmsg_end = i; + txmsg_start_push = 100; + txmsg_end_push = i; err = test_exec(cgrp, &opt); if (err) goto out; @@ -1229,13 +1501,33 @@ static int test_start_end(int cgrp) /* Test start/end with cork pulling last sg entry */ txmsg_start = 1500; txmsg_end = 1600; + txmsg_start_push = 1500; + txmsg_end_push = 1600; + err = test_exec(cgrp, &opt); + if (err) + goto out; + + /* Test pop with cork pulling last sg entry */ + txmsg_start_pop = 1500; + txmsg_pop = 1600; err = test_exec(cgrp, &opt); if (err) goto out; + txmsg_start_pop = 0; + txmsg_pop = 0; /* Test start/end pull of single byte in last page */ txmsg_start = 1111; txmsg_end = 1112; + txmsg_start_push = 1111; + txmsg_end_push = 1112; + err = test_exec(cgrp, &opt); + if (err) + goto out; + + /* Test pop of single byte in last page */ + txmsg_start_pop = 1111; + txmsg_pop = 1112; err = test_exec(cgrp, &opt); if (err) goto out; @@ -1243,6 +1535,8 @@ static int test_start_end(int cgrp) /* Test start/end with end < start */ txmsg_start = 1111; txmsg_end = 0; + txmsg_start_push = 1111; + txmsg_end_push = 0; err = test_exec(cgrp, &opt); if (err) goto out; @@ -1250,6 +1544,8 @@ static int test_start_end(int cgrp) /* Test start/end with end > data */ txmsg_start = 0; txmsg_end = 1601; + txmsg_start_push = 0; + txmsg_end_push = 1601; err = test_exec(cgrp, &opt); if (err) goto out; @@ -1257,8 +1553,23 @@ static int test_start_end(int cgrp) /* Test start/end with start > data */ txmsg_start = 1601; txmsg_end = 1600; + txmsg_start_push = 1601; + txmsg_end_push = 1600; + err = test_exec(cgrp, &opt); + if (err) + goto out; + + /* Test pop with start > data */ + txmsg_start_pop = 1601; + txmsg_pop = 1; err = test_exec(cgrp, &opt); + if (err) + goto out; + /* Test pop with pop range > data */ + txmsg_start_pop = 1599; + txmsg_pop = 10; + err = test_exec(cgrp, &opt); out: txmsg_start = 0; txmsg_end = 0; @@ -1272,7 +1583,7 @@ char *map_names[] = { "sock_map_redir", "sock_apply_bytes", "sock_cork_bytes", - "sock_pull_bytes", + "sock_bytes", "sock_redir_flags", "sock_skb_opts", }; @@ -1348,9 +1659,9 @@ static int populate_progs(char *bpf_file) return 0; } -static int __test_suite(char *bpf_file) +static int __test_suite(int cg_fd, char *bpf_file) { - int cg_fd, err; + int err, cleanup = cg_fd; err = populate_progs(bpf_file); if (err < 0) { @@ -1358,26 +1669,28 @@ static int __test_suite(char *bpf_file) return err; } - if (setup_cgroup_environment()) { - fprintf(stderr, "ERROR: cgroup env failed\n"); - return -EINVAL; - } - - cg_fd = create_and_get_cgroup(CG_PATH); if (cg_fd < 0) { - fprintf(stderr, - "ERROR: (%i) open cg path failed: %s\n", - cg_fd, optarg); - return cg_fd; - } + if (setup_cgroup_environment()) { + fprintf(stderr, "ERROR: cgroup env failed\n"); + return -EINVAL; + } - if (join_cgroup(CG_PATH)) { - fprintf(stderr, "ERROR: failed to join cgroup\n"); - return -EINVAL; + cg_fd = create_and_get_cgroup(CG_PATH); + if (cg_fd < 0) { + fprintf(stderr, + "ERROR: (%i) open cg path failed: %s\n", + cg_fd, optarg); + return cg_fd; + } + + if (join_cgroup(CG_PATH)) { + fprintf(stderr, "ERROR: failed to join cgroup\n"); + return -EINVAL; + } } /* Tests basic commands and APIs with range of iov values */ - txmsg_start = txmsg_end = 0; + txmsg_start = txmsg_end = txmsg_start_push = txmsg_end_push = 0; err = test_txmsg(cg_fd); if (err) goto out; @@ -1394,20 +1707,24 @@ static int __test_suite(char *bpf_file) out: printf("Summary: %i PASSED %i FAILED\n", passed, failed); - cleanup_cgroup_environment(); - close(cg_fd); + if (cleanup < 0) { + cleanup_cgroup_environment(); + close(cg_fd); + } return err; } -static int test_suite(void) +static int test_suite(int cg_fd) { int err; - err = __test_suite(BPF_SOCKMAP_FILENAME); + err = __test_suite(cg_fd, BPF_SOCKMAP_FILENAME); if (err) goto out; - err = __test_suite(BPF_SOCKHASH_FILENAME); + err = __test_suite(cg_fd, BPF_SOCKHASH_FILENAME); out: + if (cg_fd > -1) + close(cg_fd); return err; } @@ -1420,9 +1737,9 @@ int main(int argc, char **argv) int test = PING_PONG; if (argc < 2) - return test_suite(); + return test_suite(-1); - while ((opt = getopt_long(argc, argv, ":dhvc:r:i:l:t:", + while ((opt = getopt_long(argc, argv, ":dhvc:r:i:l:t:p:q:", long_options, &longindex)) != -1) { switch (opt) { case 's': @@ -1431,6 +1748,18 @@ int main(int argc, char **argv) case 'e': txmsg_end = atoi(optarg); break; + case 'p': + txmsg_start_push = atoi(optarg); + break; + case 'q': + txmsg_end_push = atoi(optarg); + break; + case 'w': + txmsg_start_pop = atoi(optarg); + break; + case 'x': + txmsg_pop = atoi(optarg); + break; case 'a': txmsg_apply = atoi(optarg); break; @@ -1486,6 +1815,9 @@ int main(int argc, char **argv) } } + if (argc <= 3 && cg_fd) + return test_suite(cg_fd); + if (!cg_fd) { fprintf(stderr, "%s requires cgroup option: --cgroup <path>\n", argv[0]); diff --git a/tools/testing/selftests/bpf/test_sockmap_kern.h b/tools/testing/selftests/bpf/test_sockmap_kern.h index 8e8e41780bb9..e7639f66a941 100644 --- a/tools/testing/selftests/bpf/test_sockmap_kern.h +++ b/tools/testing/selftests/bpf/test_sockmap_kern.h @@ -70,11 +70,11 @@ struct bpf_map_def SEC("maps") sock_cork_bytes = { .max_entries = 1 }; -struct bpf_map_def SEC("maps") sock_pull_bytes = { +struct bpf_map_def SEC("maps") sock_bytes = { .type = BPF_MAP_TYPE_ARRAY, .key_size = sizeof(int), .value_size = sizeof(int), - .max_entries = 2 + .max_entries = 6 }; struct bpf_map_def SEC("maps") sock_redir_flags = { @@ -181,8 +181,8 @@ int bpf_sockmap(struct bpf_sock_ops *skops) SEC("sk_msg1") int bpf_prog4(struct sk_msg_md *msg) { - int *bytes, zero = 0, one = 1; - int *start, *end; + int *bytes, zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5; + int *start, *end, *start_push, *end_push, *start_pop, *pop; bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); if (bytes) @@ -190,18 +190,28 @@ int bpf_prog4(struct sk_msg_md *msg) bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero); if (bytes) bpf_msg_cork_bytes(msg, *bytes); - start = bpf_map_lookup_elem(&sock_pull_bytes, &zero); - end = bpf_map_lookup_elem(&sock_pull_bytes, &one); + start = bpf_map_lookup_elem(&sock_bytes, &zero); + end = bpf_map_lookup_elem(&sock_bytes, &one); if (start && end) bpf_msg_pull_data(msg, *start, *end, 0); + start_push = bpf_map_lookup_elem(&sock_bytes, &two); + end_push = bpf_map_lookup_elem(&sock_bytes, &three); + if (start_push && end_push) + bpf_msg_push_data(msg, *start_push, *end_push, 0); + start_pop = bpf_map_lookup_elem(&sock_bytes, &four); + pop = bpf_map_lookup_elem(&sock_bytes, &five); + if (start_pop && pop) + bpf_msg_pop_data(msg, *start_pop, *pop, 0); return SK_PASS; } SEC("sk_msg2") int bpf_prog5(struct sk_msg_md *msg) { - int err1 = -1, err2 = -1, zero = 0, one = 1; - int *bytes, *start, *end, len1, len2; + int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5; + int *start, *end, *start_push, *end_push, *start_pop, *pop; + int *bytes, len1, len2 = 0, len3, len4; + int err1 = -1, err2 = -1; bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); if (bytes) @@ -210,8 +220,8 @@ int bpf_prog5(struct sk_msg_md *msg) if (bytes) err2 = bpf_msg_cork_bytes(msg, *bytes); len1 = (__u64)msg->data_end - (__u64)msg->data; - start = bpf_map_lookup_elem(&sock_pull_bytes, &zero); - end = bpf_map_lookup_elem(&sock_pull_bytes, &one); + start = bpf_map_lookup_elem(&sock_bytes, &zero); + end = bpf_map_lookup_elem(&sock_bytes, &one); if (start && end) { int err; @@ -225,6 +235,37 @@ int bpf_prog5(struct sk_msg_md *msg) bpf_printk("sk_msg2: length update %i->%i\n", len1, len2); } + + start_push = bpf_map_lookup_elem(&sock_bytes, &two); + end_push = bpf_map_lookup_elem(&sock_bytes, &three); + if (start_push && end_push) { + int err; + + bpf_printk("sk_msg2: push(%i:%i)\n", + start_push ? *start_push : 0, + end_push ? *end_push : 0); + err = bpf_msg_push_data(msg, *start_push, *end_push, 0); + if (err) + bpf_printk("sk_msg2: push_data err %i\n", err); + len3 = (__u64)msg->data_end - (__u64)msg->data; + bpf_printk("sk_msg2: length push_update %i->%i\n", + len2 ? len2 : len1, len3); + } + start_pop = bpf_map_lookup_elem(&sock_bytes, &four); + pop = bpf_map_lookup_elem(&sock_bytes, &five); + if (start_pop && pop) { + int err; + + bpf_printk("sk_msg2: pop(%i@%i)\n", + start_pop, pop); + err = bpf_msg_pop_data(msg, *start_pop, *pop, 0); + if (err) + bpf_printk("sk_msg2: pop_data err %i\n", err); + len4 = (__u64)msg->data_end - (__u64)msg->data; + bpf_printk("sk_msg2: length pop_data %i->%i\n", + len1 ? len1 : 0, len4); + } + bpf_printk("sk_msg2: data length %i err1 %i err2 %i\n", len1, err1, err2); return SK_PASS; @@ -233,8 +274,8 @@ int bpf_prog5(struct sk_msg_md *msg) SEC("sk_msg3") int bpf_prog6(struct sk_msg_md *msg) { - int *bytes, zero = 0, one = 1, key = 0; - int *start, *end, *f; + int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5, key = 0; + int *bytes, *start, *end, *start_push, *end_push, *start_pop, *pop, *f; __u64 flags = 0; bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); @@ -243,10 +284,22 @@ int bpf_prog6(struct sk_msg_md *msg) bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero); if (bytes) bpf_msg_cork_bytes(msg, *bytes); - start = bpf_map_lookup_elem(&sock_pull_bytes, &zero); - end = bpf_map_lookup_elem(&sock_pull_bytes, &one); + + start = bpf_map_lookup_elem(&sock_bytes, &zero); + end = bpf_map_lookup_elem(&sock_bytes, &one); if (start && end) bpf_msg_pull_data(msg, *start, *end, 0); + + start_push = bpf_map_lookup_elem(&sock_bytes, &two); + end_push = bpf_map_lookup_elem(&sock_bytes, &three); + if (start_push && end_push) + bpf_msg_push_data(msg, *start_push, *end_push, 0); + + start_pop = bpf_map_lookup_elem(&sock_bytes, &four); + pop = bpf_map_lookup_elem(&sock_bytes, &five); + if (start_pop && pop) + bpf_msg_pop_data(msg, *start_pop, *pop, 0); + f = bpf_map_lookup_elem(&sock_redir_flags, &zero); if (f && *f) { key = 2; @@ -262,8 +315,10 @@ int bpf_prog6(struct sk_msg_md *msg) SEC("sk_msg4") int bpf_prog7(struct sk_msg_md *msg) { - int err1 = 0, err2 = 0, zero = 0, one = 1, key = 0; - int *f, *bytes, *start, *end, len1, len2; + int *bytes, *start, *end, *start_push, *end_push, *start_pop, *pop, *f; + int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5; + int len1, len2 = 0, len3, len4; + int err1 = 0, err2 = 0, key = 0; __u64 flags = 0; int err; @@ -274,10 +329,10 @@ int bpf_prog7(struct sk_msg_md *msg) if (bytes) err2 = bpf_msg_cork_bytes(msg, *bytes); len1 = (__u64)msg->data_end - (__u64)msg->data; - start = bpf_map_lookup_elem(&sock_pull_bytes, &zero); - end = bpf_map_lookup_elem(&sock_pull_bytes, &one); - if (start && end) { + start = bpf_map_lookup_elem(&sock_bytes, &zero); + end = bpf_map_lookup_elem(&sock_bytes, &one); + if (start && end) { bpf_printk("sk_msg2: pull(%i:%i)\n", start ? *start : 0, end ? *end : 0); err = bpf_msg_pull_data(msg, *start, *end, 0); @@ -288,6 +343,38 @@ int bpf_prog7(struct sk_msg_md *msg) bpf_printk("sk_msg2: length update %i->%i\n", len1, len2); } + + start_push = bpf_map_lookup_elem(&sock_bytes, &two); + end_push = bpf_map_lookup_elem(&sock_bytes, &three); + if (start_push && end_push) { + bpf_printk("sk_msg4: push(%i:%i)\n", + start_push ? *start_push : 0, + end_push ? *end_push : 0); + err = bpf_msg_push_data(msg, *start_push, *end_push, 0); + if (err) + bpf_printk("sk_msg4: push_data err %i\n", + err); + len3 = (__u64)msg->data_end - (__u64)msg->data; + bpf_printk("sk_msg4: length push_update %i->%i\n", + len2 ? len2 : len1, len3); + } + + start_pop = bpf_map_lookup_elem(&sock_bytes, &four); + pop = bpf_map_lookup_elem(&sock_bytes, &five); + if (start_pop && pop) { + int err; + + bpf_printk("sk_msg4: pop(%i@%i)\n", + start_pop, pop); + err = bpf_msg_pop_data(msg, *start_pop, *pop, 0); + if (err) + bpf_printk("sk_msg4: pop_data err %i\n", err); + len4 = (__u64)msg->data_end - (__u64)msg->data; + bpf_printk("sk_msg4: length pop_data %i->%i\n", + len1 ? len1 : 0, len4); + } + + f = bpf_map_lookup_elem(&sock_redir_flags, &zero); if (f && *f) { key = 2; @@ -342,8 +429,8 @@ int bpf_prog9(struct sk_msg_md *msg) SEC("sk_msg7") int bpf_prog10(struct sk_msg_md *msg) { - int *bytes, zero = 0, one = 1; - int *start, *end; + int *bytes, *start, *end, *start_push, *end_push, *start_pop, *pop; + int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5; bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); if (bytes) @@ -351,11 +438,19 @@ int bpf_prog10(struct sk_msg_md *msg) bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero); if (bytes) bpf_msg_cork_bytes(msg, *bytes); - start = bpf_map_lookup_elem(&sock_pull_bytes, &zero); - end = bpf_map_lookup_elem(&sock_pull_bytes, &one); + start = bpf_map_lookup_elem(&sock_bytes, &zero); + end = bpf_map_lookup_elem(&sock_bytes, &one); if (start && end) bpf_msg_pull_data(msg, *start, *end, 0); - + start_push = bpf_map_lookup_elem(&sock_bytes, &two); + end_push = bpf_map_lookup_elem(&sock_bytes, &three); + if (start_push && end_push) + bpf_msg_push_data(msg, *start_push, *end_push, 0); + start_pop = bpf_map_lookup_elem(&sock_bytes, &four); + pop = bpf_map_lookup_elem(&sock_bytes, &five); + if (start_pop && pop) + bpf_msg_pop_data(msg, *start_pop, *pop, 0); + bpf_printk("return sk drop\n"); return SK_DROP; } diff --git a/tools/testing/selftests/bpf/test_stack_map.c b/tools/testing/selftests/bpf/test_stack_map.c new file mode 100644 index 000000000000..31c3880e6da0 --- /dev/null +++ b/tools/testing/selftests/bpf/test_stack_map.c @@ -0,0 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2018 Politecnico di Torino +#define MAP_TYPE BPF_MAP_TYPE_STACK +#include "test_queue_stack_map.h" diff --git a/tools/testing/selftests/bpf/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/test_tcpbpf_kern.c index 4b7fd540cea9..74f73b33a7b0 100644 --- a/tools/testing/selftests/bpf/test_tcpbpf_kern.c +++ b/tools/testing/selftests/bpf/test_tcpbpf_kern.c @@ -5,6 +5,7 @@ #include <linux/if_ether.h> #include <linux/if_packet.h> #include <linux/ip.h> +#include <linux/ipv6.h> #include <linux/types.h> #include <linux/socket.h> #include <linux/tcp.h> @@ -17,6 +18,13 @@ struct bpf_map_def SEC("maps") global_map = { .type = BPF_MAP_TYPE_ARRAY, .key_size = sizeof(__u32), .value_size = sizeof(struct tcpbpf_globals), + .max_entries = 4, +}; + +struct bpf_map_def SEC("maps") sockopt_results = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(int), .max_entries = 2, }; @@ -45,11 +53,14 @@ int _version SEC("version") = 1; SEC("sockops") int bpf_testcb(struct bpf_sock_ops *skops) { - int rv = -1; - int bad_call_rv = 0; + char header[sizeof(struct ipv6hdr) + sizeof(struct tcphdr)]; + struct tcphdr *thdr; int good_call_rv = 0; - int op; + int bad_call_rv = 0; + int save_syn = 1; + int rv = -1; int v = 0; + int op; op = (int) skops->op; @@ -82,6 +93,21 @@ int bpf_testcb(struct bpf_sock_ops *skops) v = 0xff; rv = bpf_setsockopt(skops, SOL_IPV6, IPV6_TCLASS, &v, sizeof(v)); + if (skops->family == AF_INET6) { + v = bpf_getsockopt(skops, IPPROTO_TCP, TCP_SAVED_SYN, + header, (sizeof(struct ipv6hdr) + + sizeof(struct tcphdr))); + if (!v) { + int offset = sizeof(struct ipv6hdr); + + thdr = (struct tcphdr *)(header + offset); + v = thdr->syn; + __u32 key = 1; + + bpf_map_update_elem(&sockopt_results, &key, &v, + BPF_ANY); + } + } break; case BPF_SOCK_OPS_RTO_CB: break; @@ -111,6 +137,12 @@ int bpf_testcb(struct bpf_sock_ops *skops) break; case BPF_SOCK_OPS_TCP_LISTEN_CB: bpf_sock_ops_cb_flags_set(skops, BPF_SOCK_OPS_STATE_CB_FLAG); + v = bpf_setsockopt(skops, IPPROTO_TCP, TCP_SAVE_SYN, + &save_syn, sizeof(save_syn)); + /* Update global map w/ result of setsock opt */ + __u32 key = 0; + + bpf_map_update_elem(&sockopt_results, &key, &v, BPF_ANY); break; default: rv = -1; diff --git a/tools/testing/selftests/bpf/test_tcpbpf_user.c b/tools/testing/selftests/bpf/test_tcpbpf_user.c index a275c2971376..716b4e3be581 100644 --- a/tools/testing/selftests/bpf/test_tcpbpf_user.c +++ b/tools/testing/selftests/bpf/test_tcpbpf_user.c @@ -54,6 +54,26 @@ err: return -1; } +int verify_sockopt_result(int sock_map_fd) +{ + __u32 key = 0; + int res; + int rv; + + /* check setsockopt for SAVE_SYN */ + rv = bpf_map_lookup_elem(sock_map_fd, &key, &res); + EXPECT_EQ(0, rv, "d"); + EXPECT_EQ(0, res, "d"); + key = 1; + /* check getsockopt for SAVED_SYN */ + rv = bpf_map_lookup_elem(sock_map_fd, &key, &res); + EXPECT_EQ(0, rv, "d"); + EXPECT_EQ(1, res, "d"); + return 0; +err: + return -1; +} + static int bpf_find_map(const char *test, struct bpf_object *obj, const char *name) { @@ -70,11 +90,11 @@ static int bpf_find_map(const char *test, struct bpf_object *obj, int main(int argc, char **argv) { const char *file = "test_tcpbpf_kern.o"; + int prog_fd, map_fd, sock_map_fd; struct tcpbpf_globals g = {0}; const char *cg_path = "/foo"; int error = EXIT_FAILURE; struct bpf_object *obj; - int prog_fd, map_fd; int cg_fd = -1; __u32 key = 0; int rv; @@ -83,7 +103,7 @@ int main(int argc, char **argv) goto err; cg_fd = create_and_get_cgroup(cg_path); - if (!cg_fd) + if (cg_fd < 0) goto err; if (join_cgroup(cg_path)) @@ -110,6 +130,10 @@ int main(int argc, char **argv) if (map_fd < 0) goto err; + sock_map_fd = bpf_find_map(__func__, obj, "sockopt_results"); + if (sock_map_fd < 0) + goto err; + rv = bpf_map_lookup_elem(map_fd, &key, &g); if (rv != 0) { printf("FAILED: bpf_map_lookup_elem returns %d\n", rv); @@ -121,6 +145,11 @@ int main(int argc, char **argv) goto err; } + if (verify_sockopt_result(sock_map_fd)) { + printf("FAILED: Wrong sockopt stats\n"); + goto err; + } + printf("PASSED!\n"); error = 0; err: diff --git a/tools/testing/selftests/bpf/test_tcpnotify.h b/tools/testing/selftests/bpf/test_tcpnotify.h new file mode 100644 index 000000000000..8b6cea030bfc --- /dev/null +++ b/tools/testing/selftests/bpf/test_tcpnotify.h @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: GPL-2.0 + +#ifndef _TEST_TCPBPF_H +#define _TEST_TCPBPF_H + +struct tcpnotify_globals { + __u32 total_retrans; + __u32 ncalls; +}; + +struct tcp_notifier { + __u8 type; + __u8 subtype; + __u8 source; + __u8 hash; +}; + +#define TESTPORT 12877 +#endif diff --git a/tools/testing/selftests/bpf/test_tcpnotify_kern.c b/tools/testing/selftests/bpf/test_tcpnotify_kern.c new file mode 100644 index 000000000000..edbca203ce2d --- /dev/null +++ b/tools/testing/selftests/bpf/test_tcpnotify_kern.c @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stddef.h> +#include <string.h> +#include <linux/bpf.h> +#include <linux/if_ether.h> +#include <linux/if_packet.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/types.h> +#include <linux/socket.h> +#include <linux/tcp.h> +#include <netinet/in.h> +#include "bpf_helpers.h" +#include "bpf_endian.h" +#include "test_tcpnotify.h" + +struct bpf_map_def SEC("maps") global_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct tcpnotify_globals), + .max_entries = 4, +}; + +struct bpf_map_def SEC("maps") perf_event_map = { + .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(__u32), + .max_entries = 2, +}; + +int _version SEC("version") = 1; + +SEC("sockops") +int bpf_testcb(struct bpf_sock_ops *skops) +{ + int rv = -1; + int op; + + op = (int) skops->op; + + if (bpf_ntohl(skops->remote_port) != TESTPORT) { + skops->reply = -1; + return 0; + } + + switch (op) { + case BPF_SOCK_OPS_TIMEOUT_INIT: + case BPF_SOCK_OPS_RWND_INIT: + case BPF_SOCK_OPS_NEEDS_ECN: + case BPF_SOCK_OPS_BASE_RTT: + case BPF_SOCK_OPS_RTO_CB: + rv = 1; + break; + + case BPF_SOCK_OPS_TCP_CONNECT_CB: + case BPF_SOCK_OPS_TCP_LISTEN_CB: + case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: + case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: + bpf_sock_ops_cb_flags_set(skops, (BPF_SOCK_OPS_RETRANS_CB_FLAG| + BPF_SOCK_OPS_RTO_CB_FLAG)); + rv = 1; + break; + case BPF_SOCK_OPS_RETRANS_CB: { + __u32 key = 0; + struct tcpnotify_globals g, *gp; + struct tcp_notifier msg = { + .type = 0xde, + .subtype = 0xad, + .source = 0xbe, + .hash = 0xef, + }; + + rv = 1; + + /* Update results */ + gp = bpf_map_lookup_elem(&global_map, &key); + if (!gp) + break; + g = *gp; + g.total_retrans = skops->total_retrans; + g.ncalls++; + bpf_map_update_elem(&global_map, &key, &g, + BPF_ANY); + bpf_perf_event_output(skops, &perf_event_map, + BPF_F_CURRENT_CPU, + &msg, sizeof(msg)); + } + break; + default: + rv = -1; + } + skops->reply = rv; + return 1; +} +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_tcpnotify_user.c b/tools/testing/selftests/bpf/test_tcpnotify_user.c new file mode 100644 index 000000000000..4e4353711a86 --- /dev/null +++ b/tools/testing/selftests/bpf/test_tcpnotify_user.c @@ -0,0 +1,186 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include <pthread.h> +#include <inttypes.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <asm/types.h> +#include <sys/syscall.h> +#include <errno.h> +#include <string.h> +#include <linux/bpf.h> +#include <sys/socket.h> +#include <bpf/bpf.h> +#include <bpf/libbpf.h> +#include <sys/ioctl.h> +#include <linux/rtnetlink.h> +#include <signal.h> +#include <linux/perf_event.h> + +#include "bpf_rlimit.h" +#include "bpf_util.h" +#include "cgroup_helpers.h" + +#include "test_tcpnotify.h" +#include "trace_helpers.h" + +#define SOCKET_BUFFER_SIZE (getpagesize() < 8192L ? getpagesize() : 8192L) + +pthread_t tid; +int rx_callbacks; + +static int dummyfn(void *data, int size) +{ + struct tcp_notifier *t = data; + + if (t->type != 0xde || t->subtype != 0xad || + t->source != 0xbe || t->hash != 0xef) + return 1; + rx_callbacks++; + return 0; +} + +void tcp_notifier_poller(int fd) +{ + while (1) + perf_event_poller(fd, dummyfn); +} + +static void *poller_thread(void *arg) +{ + int fd = *(int *)arg; + + tcp_notifier_poller(fd); + return arg; +} + +int verify_result(const struct tcpnotify_globals *result) +{ + return (result->ncalls > 0 && result->ncalls == rx_callbacks ? 0 : 1); +} + +static int bpf_find_map(const char *test, struct bpf_object *obj, + const char *name) +{ + struct bpf_map *map; + + map = bpf_object__find_map_by_name(obj, name); + if (!map) { + printf("%s:FAIL:map '%s' not found\n", test, name); + return -1; + } + return bpf_map__fd(map); +} + +static int setup_bpf_perf_event(int mapfd) +{ + struct perf_event_attr attr = { + .sample_type = PERF_SAMPLE_RAW, + .type = PERF_TYPE_SOFTWARE, + .config = PERF_COUNT_SW_BPF_OUTPUT, + }; + int key = 0; + int pmu_fd; + + pmu_fd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, 0); + if (pmu_fd < 0) + return pmu_fd; + bpf_map_update_elem(mapfd, &key, &pmu_fd, BPF_ANY); + + ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0); + return pmu_fd; +} + +int main(int argc, char **argv) +{ + const char *file = "test_tcpnotify_kern.o"; + int prog_fd, map_fd, perf_event_fd; + struct tcpnotify_globals g = {0}; + const char *cg_path = "/foo"; + int error = EXIT_FAILURE; + struct bpf_object *obj; + int cg_fd = -1; + __u32 key = 0; + int rv; + char test_script[80]; + int pmu_fd; + cpu_set_t cpuset; + + CPU_ZERO(&cpuset); + CPU_SET(0, &cpuset); + pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset); + + if (setup_cgroup_environment()) + goto err; + + cg_fd = create_and_get_cgroup(cg_path); + if (cg_fd < 0) + goto err; + + if (join_cgroup(cg_path)) + goto err; + + if (bpf_prog_load(file, BPF_PROG_TYPE_SOCK_OPS, &obj, &prog_fd)) { + printf("FAILED: load_bpf_file failed for: %s\n", file); + goto err; + } + + rv = bpf_prog_attach(prog_fd, cg_fd, BPF_CGROUP_SOCK_OPS, 0); + if (rv) { + printf("FAILED: bpf_prog_attach: %d (%s)\n", + error, strerror(errno)); + goto err; + } + + perf_event_fd = bpf_find_map(__func__, obj, "perf_event_map"); + if (perf_event_fd < 0) + goto err; + + map_fd = bpf_find_map(__func__, obj, "global_map"); + if (map_fd < 0) + goto err; + + pmu_fd = setup_bpf_perf_event(perf_event_fd); + if (pmu_fd < 0 || perf_event_mmap(pmu_fd) < 0) + goto err; + + pthread_create(&tid, NULL, poller_thread, (void *)&pmu_fd); + + sprintf(test_script, + "/usr/sbin/iptables -A INPUT -p tcp --dport %d -j DROP", + TESTPORT); + system(test_script); + + sprintf(test_script, + "/usr/bin/nc 127.0.0.1 %d < /etc/passwd > /dev/null 2>&1 ", + TESTPORT); + system(test_script); + + sprintf(test_script, + "/usr/sbin/iptables -D INPUT -p tcp --dport %d -j DROP", + TESTPORT); + system(test_script); + + rv = bpf_map_lookup_elem(map_fd, &key, &g); + if (rv != 0) { + printf("FAILED: bpf_map_lookup_elem returns %d\n", rv); + goto err; + } + + sleep(10); + + if (verify_result(&g)) { + printf("FAILED: Wrong stats Expected %d calls, got %d\n", + g.ncalls, rx_callbacks); + goto err; + } + + printf("PASSED!\n"); + error = 0; +err: + bpf_prog_detach(cg_fd, BPF_CGROUP_SOCK_OPS); + close(cg_fd); + cleanup_cgroup_environment(); + return error; +} diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 67c412d19c09..2fd90d456892 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -3,6 +3,7 @@ * * Copyright (c) 2014 PLUMgrid, http://plumgrid.com * Copyright (c) 2017 Facebook + * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public @@ -22,6 +23,7 @@ #include <stdbool.h> #include <sched.h> #include <limits.h> +#include <assert.h> #include <sys/capability.h> @@ -47,7 +49,8 @@ #define MAX_INSNS BPF_MAXINSNS #define MAX_FIXUPS 8 -#define MAX_NR_MAPS 8 +#define MAX_NR_MAPS 13 +#define MAX_TEST_RUNS 8 #define POINTER_VALUE 0xcafe4all #define TEST_DATA_LEN 64 @@ -60,17 +63,22 @@ static bool unpriv_disabled = false; struct bpf_test { const char *descr; struct bpf_insn insns[MAX_INSNS]; - int fixup_map1[MAX_FIXUPS]; - int fixup_map2[MAX_FIXUPS]; - int fixup_map3[MAX_FIXUPS]; - int fixup_map4[MAX_FIXUPS]; + int fixup_map_hash_8b[MAX_FIXUPS]; + int fixup_map_hash_48b[MAX_FIXUPS]; + int fixup_map_hash_16b[MAX_FIXUPS]; + int fixup_map_array_48b[MAX_FIXUPS]; + int fixup_map_sockmap[MAX_FIXUPS]; + int fixup_map_sockhash[MAX_FIXUPS]; + int fixup_map_xskmap[MAX_FIXUPS]; + int fixup_map_stacktrace[MAX_FIXUPS]; int fixup_prog1[MAX_FIXUPS]; int fixup_prog2[MAX_FIXUPS]; int fixup_map_in_map[MAX_FIXUPS]; int fixup_cgroup_storage[MAX_FIXUPS]; + int fixup_percpu_cgroup_storage[MAX_FIXUPS]; const char *errstr; const char *errstr_unpriv; - uint32_t retval; + uint32_t retval, retval_unpriv, insn_processed; enum { UNDEF, ACCEPT, @@ -80,6 +88,14 @@ struct bpf_test { uint8_t flags; __u8 data[TEST_DATA_LEN]; void (*fill_helper)(struct bpf_test *self); + uint8_t runs; + struct { + uint32_t retval, retval_unpriv; + union { + __u8 data[TEST_DATA_LEN]; + __u64 data64[TEST_DATA_LEN / 8]; + }; + } retvals[MAX_TEST_RUNS]; }; /* Note we want this to be 64 bit aligned so that the end of our array is @@ -177,6 +193,24 @@ static void bpf_fill_rand_ld_dw(struct bpf_test *self) self->retval = (uint32_t)res; } +/* BPF_SK_LOOKUP contains 13 instructions, if you need to fix up maps */ +#define BPF_SK_LOOKUP \ + /* struct bpf_sock_tuple tuple = {} */ \ + BPF_MOV64_IMM(BPF_REG_2, 0), \ + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -8), \ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -16), \ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -24), \ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -32), \ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -40), \ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -48), \ + /* sk = sk_lookup_tcp(ctx, &tuple, sizeof tuple, 0, 0) */ \ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), \ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -48), \ + BPF_MOV64_IMM(BPF_REG_3, sizeof(struct bpf_sock_tuple)), \ + BPF_MOV64_IMM(BPF_REG_4, 0), \ + BPF_MOV64_IMM(BPF_REG_5, 0), \ + BPF_EMIT_CALL(BPF_FUNC_sk_lookup_tcp) + static struct bpf_test tests[] = { { "add+sub+mul", @@ -697,8 +731,18 @@ static struct bpf_test tests[] = { BPF_ALU32_IMM(BPF_ARSH, BPF_REG_0, 5), BPF_EXIT_INSN(), }, - .result = REJECT, - .errstr = "unknown opcode c4", + .result = ACCEPT, + .retval = 0, + }, + { + "arsh32 on imm 2", + .insns = { + BPF_LD_IMM64(BPF_REG_0, 0x1122334485667788), + BPF_ALU32_IMM(BPF_ARSH, BPF_REG_0, 7), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = -16069393, }, { "arsh32 on reg", @@ -708,8 +752,19 @@ static struct bpf_test tests[] = { BPF_ALU32_REG(BPF_ARSH, BPF_REG_0, BPF_REG_1), BPF_EXIT_INSN(), }, - .result = REJECT, - .errstr = "unknown opcode cc", + .result = ACCEPT, + .retval = 0, + }, + { + "arsh32 on reg 2", + .insns = { + BPF_LD_IMM64(BPF_REG_0, 0xffff55667788), + BPF_MOV64_IMM(BPF_REG_1, 15), + BPF_ALU32_REG(BPF_ARSH, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 43724, }, { "arsh64 on imm", @@ -856,7 +911,7 @@ static struct bpf_test tests[] = { BPF_FUNC_map_lookup_elem), BPF_EXIT_INSN(), }, - .fixup_map1 = { 2 }, + .fixup_map_hash_8b = { 2 }, .errstr = "invalid indirect read from stack", .result = REJECT, }, @@ -956,15 +1011,45 @@ static struct bpf_test tests[] = { BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), /* mess up with R1 pointer on stack */ BPF_ST_MEM(BPF_B, BPF_REG_10, -7, 0x23), - /* fill back into R0 should fail */ + /* fill back into R0 is fine for priv. + * R0 now becomes SCALAR_VALUE. + */ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), + /* Load from R0 should fail. */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8), BPF_EXIT_INSN(), }, .errstr_unpriv = "attempt to corrupt spilled", - .errstr = "corrupted spill", + .errstr = "R0 invalid mem access 'inv", .result = REJECT, }, { + "check corrupted spill/fill, LSB", + .insns = { + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), + BPF_ST_MEM(BPF_H, BPF_REG_10, -8, 0xcafe), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "attempt to corrupt spilled", + .result_unpriv = REJECT, + .result = ACCEPT, + .retval = POINTER_VALUE, + }, + { + "check corrupted spill/fill, MSB", + .insns = { + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), + BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0x12345678), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "attempt to corrupt spilled", + .result_unpriv = REJECT, + .result = ACCEPT, + .retval = POINTER_VALUE, + }, + { "invalid src register in STX", .insns = { BPF_STX_MEM(BPF_B, BPF_REG_10, -1, -1), @@ -1090,7 +1175,7 @@ static struct bpf_test tests[] = { BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .errstr = "R0 invalid mem access 'map_value_or_null'", .result = REJECT, }, @@ -1107,7 +1192,7 @@ static struct bpf_test tests[] = { BPF_ST_MEM(BPF_DW, BPF_REG_0, 4, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .errstr = "misaligned value access", .result = REJECT, .flags = F_LOAD_WITH_STRICT_ALIGNMENT, @@ -1127,7 +1212,7 @@ static struct bpf_test tests[] = { BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 1), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .errstr = "R0 invalid mem access", .errstr_unpriv = "R0 leaks addr", .result = REJECT, @@ -1217,7 +1302,7 @@ static struct bpf_test tests[] = { BPF_FUNC_map_delete_elem), BPF_EXIT_INSN(), }, - .fixup_map1 = { 24 }, + .fixup_map_hash_8b = { 24 }, .errstr_unpriv = "R1 pointer comparison", .result_unpriv = REJECT, .result = ACCEPT, @@ -1371,7 +1456,7 @@ static struct bpf_test tests[] = { offsetof(struct __sk_buff, pkt_type)), BPF_EXIT_INSN(), }, - .fixup_map1 = { 4 }, + .fixup_map_hash_8b = { 4 }, .errstr = "different pointers", .errstr_unpriv = "R1 pointer comparison", .result = REJECT, @@ -1394,7 +1479,7 @@ static struct bpf_test tests[] = { BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), BPF_JMP_IMM(BPF_JA, 0, 0, -12), }, - .fixup_map1 = { 6 }, + .fixup_map_hash_8b = { 6 }, .errstr = "different pointers", .errstr_unpriv = "R1 pointer comparison", .result = REJECT, @@ -1418,7 +1503,7 @@ static struct bpf_test tests[] = { BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), BPF_JMP_IMM(BPF_JA, 0, 0, -13), }, - .fixup_map1 = { 7 }, + .fixup_map_hash_8b = { 7 }, .errstr = "different pointers", .errstr_unpriv = "R1 pointer comparison", .result = REJECT, @@ -1768,10 +1853,20 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SK_SKB, }, { - "invalid 64B read of family in SK_MSG", + "valid access size in SK_MSG", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct sk_msg_md, size)), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_MSG, + }, + { + "invalid 64B read of size in SK_MSG", .insns = { BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, - offsetof(struct sk_msg_md, family)), + offsetof(struct sk_msg_md, size)), BPF_EXIT_INSN(), }, .errstr = "invalid bpf_context access", @@ -1782,10 +1877,10 @@ static struct bpf_test tests[] = { "invalid read past end of SK_MSG", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, - offsetof(struct sk_msg_md, local_port) + 4), + offsetof(struct sk_msg_md, size) + 4), BPF_EXIT_INSN(), }, - .errstr = "R0 !read_ok", + .errstr = "invalid bpf_context access", .result = REJECT, .prog_type = BPF_PROG_TYPE_SK_MSG, }, @@ -1799,6 +1894,7 @@ static struct bpf_test tests[] = { .errstr = "invalid bpf_context access", .result = REJECT, .prog_type = BPF_PROG_TYPE_SK_MSG, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "direct packet read for SK_MSG", @@ -2002,29 +2098,27 @@ static struct bpf_test tests[] = { .result = ACCEPT, }, { - "check skb->hash byte load not permitted 1", + "check skb->hash byte load permitted 1", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, offsetof(struct __sk_buff, hash) + 1), BPF_EXIT_INSN(), }, - .errstr = "invalid bpf_context access", - .result = REJECT, + .result = ACCEPT, }, { - "check skb->hash byte load not permitted 2", + "check skb->hash byte load permitted 2", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, offsetof(struct __sk_buff, hash) + 2), BPF_EXIT_INSN(), }, - .errstr = "invalid bpf_context access", - .result = REJECT, + .result = ACCEPT, }, { - "check skb->hash byte load not permitted 3", + "check skb->hash byte load permitted 3", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), #if __BYTE_ORDER == __LITTLE_ENDIAN @@ -2036,8 +2130,7 @@ static struct bpf_test tests[] = { #endif BPF_EXIT_INSN(), }, - .errstr = "invalid bpf_context access", - .result = REJECT, + .result = ACCEPT, }, { "check cb access: byte, wrong type", @@ -2149,7 +2242,7 @@ static struct bpf_test tests[] = { .result = ACCEPT, }, { - "check skb->hash half load not permitted", + "check skb->hash half load permitted 2", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), #if __BYTE_ORDER == __LITTLE_ENDIAN @@ -2161,8 +2254,41 @@ static struct bpf_test tests[] = { #endif BPF_EXIT_INSN(), }, + .result = ACCEPT, + }, + { + "check skb->hash half load not permitted, unaligned 1", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), +#if __BYTE_ORDER == __LITTLE_ENDIAN + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, hash) + 1), +#else + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, hash) + 3), +#endif + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + }, + { + "check skb->hash half load not permitted, unaligned 3", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), +#if __BYTE_ORDER == __LITTLE_ENDIAN + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, hash) + 3), +#else + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, hash) + 1), +#endif + BPF_EXIT_INSN(), + }, .errstr = "invalid bpf_context access", .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "check cb access: half, wrong type", @@ -2394,6 +2520,10 @@ static struct bpf_test tests[] = { offsetof(struct __sk_buff, tc_index)), BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, offsetof(struct __sk_buff, cb[3])), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, tstamp)), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, tstamp)), BPF_EXIT_INSN(), }, .errstr_unpriv = "", @@ -2448,6 +2578,7 @@ static struct bpf_test tests[] = { }, .result = REJECT, .errstr = "invalid stack off=-79992 size=8", + .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", }, { "PTR_TO_STACK store/load - out of bounds high", @@ -2555,7 +2686,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .errstr_unpriv = "R4 leaks addr", .result_unpriv = REJECT, .result = ACCEPT, @@ -2572,7 +2703,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .errstr = "invalid indirect read from stack off -8+0 size 8", .result = REJECT, }, @@ -2707,6 +2838,137 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { + "unpriv: spill/fill of different pointers stx - ctx and sock", + .insns = { + BPF_MOV64_REG(BPF_REG_8, BPF_REG_1), + /* struct bpf_sock *sock = bpf_sock_lookup(...); */ + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + /* u64 foo; */ + /* void *target = &foo; */ + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_8), + /* if (skb == NULL) *target = sock; */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_2, 0), + /* else *target = skb; */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), + /* struct __sk_buff *skb = *target; */ + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0), + /* skb->mark = 42; */ + BPF_MOV64_IMM(BPF_REG_3, 42), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, + offsetof(struct __sk_buff, mark)), + /* if (sk) bpf_sk_release(sk) */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "type=ctx expected=sock", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "unpriv: spill/fill of different pointers stx - leak sock", + .insns = { + BPF_MOV64_REG(BPF_REG_8, BPF_REG_1), + /* struct bpf_sock *sock = bpf_sock_lookup(...); */ + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + /* u64 foo; */ + /* void *target = &foo; */ + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_8), + /* if (skb == NULL) *target = sock; */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_2, 0), + /* else *target = skb; */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), + /* struct __sk_buff *skb = *target; */ + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0), + /* skb->mark = 42; */ + BPF_MOV64_IMM(BPF_REG_3, 42), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, + offsetof(struct __sk_buff, mark)), + BPF_EXIT_INSN(), + }, + .result = REJECT, + //.errstr = "same insn cannot be used with different pointers", + .errstr = "Unreleased reference", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "unpriv: spill/fill of different pointers stx - sock and ctx (read)", + .insns = { + BPF_MOV64_REG(BPF_REG_8, BPF_REG_1), + /* struct bpf_sock *sock = bpf_sock_lookup(...); */ + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + /* u64 foo; */ + /* void *target = &foo; */ + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_8), + /* if (skb) *target = skb */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), + /* else *target = sock */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_2, 0), + /* struct bpf_sock *sk = *target; */ + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0), + /* if (sk) u32 foo = sk->mark; bpf_sk_release(sk); */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 2), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct bpf_sock, mark)), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "same insn cannot be used with different pointers", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "unpriv: spill/fill of different pointers stx - sock and ctx (write)", + .insns = { + BPF_MOV64_REG(BPF_REG_8, BPF_REG_1), + /* struct bpf_sock *sock = bpf_sock_lookup(...); */ + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + /* u64 foo; */ + /* void *target = &foo; */ + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_8), + /* if (skb) *target = skb */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), + /* else *target = sock */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_2, 0), + /* struct bpf_sock *sk = *target; */ + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0), + /* if (sk) sk->mark = 42; bpf_sk_release(sk); */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3), + BPF_MOV64_IMM(BPF_REG_3, 42), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, + offsetof(struct bpf_sock, mark)), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + //.errstr = "same insn cannot be used with different pointers", + .errstr = "cannot write into socket", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { "unpriv: spill/fill of different pointers ldx", .insns = { BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), @@ -2743,12 +3005,25 @@ static struct bpf_test tests[] = { BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .errstr_unpriv = "R0 leaks addr", .result_unpriv = REJECT, .result = ACCEPT, }, { + "alu32: mov u32 const", + .insns = { + BPF_MOV32_IMM(BPF_REG_7, 0), + BPF_ALU32_IMM(BPF_AND, BPF_REG_7, 1), + BPF_MOV32_REG(BPF_REG_0, BPF_REG_7), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, + }, + { "unpriv: partial copy of pointer", .insns = { BPF_MOV32_REG(BPF_REG_1, BPF_REG_10), @@ -2783,7 +3058,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 1 }, + .fixup_map_hash_8b = { 1 }, .errstr_unpriv = "R1 pointer comparison", .result_unpriv = REJECT, .result = ACCEPT, @@ -2831,6 +3106,8 @@ static struct bpf_test tests[] = { BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, -8), BPF_EXIT_INSN(), }, + .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", + .result_unpriv = REJECT, .result = ACCEPT, }, { @@ -2929,6 +3206,245 @@ static struct bpf_test tests[] = { .fixup_prog1 = { 2 }, .result = ACCEPT, .retval = 42, + /* Verifier rewrite for unpriv skips tail call here. */ + .retval_unpriv = 2, + }, + { + "PTR_TO_STACK check high 1", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -1), + BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 42, + }, + { + "PTR_TO_STACK check high 2", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ST_MEM(BPF_B, BPF_REG_1, -1, 42), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, -1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 42, + }, + { + "PTR_TO_STACK check high 3", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0), + BPF_ST_MEM(BPF_B, BPF_REG_1, -1, 42), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, -1), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", + .result_unpriv = REJECT, + .result = ACCEPT, + .retval = 42, + }, + { + "PTR_TO_STACK check high 4", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0), + BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", + .errstr = "invalid stack off=0 size=1", + .result = REJECT, + }, + { + "PTR_TO_STACK check high 5", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, (1 << 29) - 1), + BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid stack off", + }, + { + "PTR_TO_STACK check high 6", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, (1 << 29) - 1), + BPF_ST_MEM(BPF_B, BPF_REG_1, SHRT_MAX, 42), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, SHRT_MAX), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid stack off", + }, + { + "PTR_TO_STACK check high 7", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, (1 << 29) - 1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, (1 << 29) - 1), + BPF_ST_MEM(BPF_B, BPF_REG_1, SHRT_MAX, 42), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, SHRT_MAX), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", + .errstr = "fp pointer offset", + }, + { + "PTR_TO_STACK check low 1", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -512), + BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 42, + }, + { + "PTR_TO_STACK check low 2", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -513), + BPF_ST_MEM(BPF_B, BPF_REG_1, 1, 42), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 1), + BPF_EXIT_INSN(), + }, + .result_unpriv = REJECT, + .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", + .result = ACCEPT, + .retval = 42, + }, + { + "PTR_TO_STACK check low 3", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -513), + BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", + .errstr = "invalid stack off=-513 size=1", + .result = REJECT, + }, + { + "PTR_TO_STACK check low 4", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, INT_MIN), + BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "math between fp pointer", + }, + { + "PTR_TO_STACK check low 5", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -((1 << 29) - 1)), + BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid stack off", + }, + { + "PTR_TO_STACK check low 6", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -((1 << 29) - 1)), + BPF_ST_MEM(BPF_B, BPF_REG_1, SHRT_MIN, 42), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, SHRT_MIN), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid stack off", + }, + { + "PTR_TO_STACK check low 7", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -((1 << 29) - 1)), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -((1 << 29) - 1)), + BPF_ST_MEM(BPF_B, BPF_REG_1, SHRT_MIN, 42), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, SHRT_MIN), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", + .errstr = "fp pointer offset", + }, + { + "PTR_TO_STACK mixed reg/k, 1", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -3), + BPF_MOV64_IMM(BPF_REG_2, -3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2), + BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 42, + }, + { + "PTR_TO_STACK mixed reg/k, 2", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -3), + BPF_MOV64_IMM(BPF_REG_2, -3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2), + BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42), + BPF_MOV64_REG(BPF_REG_5, BPF_REG_10), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_5, -6), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 42, + }, + { + "PTR_TO_STACK mixed reg/k, 3", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -3), + BPF_MOV64_IMM(BPF_REG_2, -3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2), + BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = -3, + }, + { + "PTR_TO_STACK reg", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_MOV64_IMM(BPF_REG_2, -3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2), + BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .result_unpriv = REJECT, + .errstr_unpriv = "invalid stack off=0 size=1", + .result = ACCEPT, + .retval = 42, }, { "stack pointer arithmetic", @@ -3092,6 +3608,7 @@ static struct bpf_test tests[] = { .result = REJECT, .errstr = "R0 invalid mem access 'inv'", .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "raw_stack: skb_load_bytes, spilled regs corruption 2", @@ -3122,6 +3639,7 @@ static struct bpf_test tests[] = { .result = REJECT, .errstr = "R3 invalid mem access 'inv'", .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "raw_stack: skb_load_bytes, spilled regs + data", @@ -3275,7 +3793,7 @@ static struct bpf_test tests[] = { BPF_ST_MEM(BPF_DW, BPF_REG_1, offsetof(struct __sk_buff, mark), 0), BPF_EXIT_INSN(), }, - .errstr = "BPF_ST stores into R1 context is not allowed", + .errstr = "BPF_ST stores into R1 ctx is not allowed", .result = REJECT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, @@ -3287,7 +3805,7 @@ static struct bpf_test tests[] = { BPF_REG_0, offsetof(struct __sk_buff, mark), 0), BPF_EXIT_INSN(), }, - .errstr = "BPF_XADD stores into R1 context is not allowed", + .errstr = "BPF_XADD stores into R1 ctx is not allowed", .result = REJECT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, @@ -3621,6 +4139,7 @@ static struct bpf_test tests[] = { .errstr = "R2 invalid mem access 'inv'", .result = REJECT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "direct packet access: test16 (arith on data_end)", @@ -3637,7 +4156,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr = "R3 pointer arithmetic on PTR_TO_PACKET_END", + .errstr = "R3 pointer arithmetic on pkt_end", .result = REJECT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, @@ -3723,6 +4242,7 @@ static struct bpf_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "direct packet access: test21 (x += pkt_ptr, 2)", @@ -3748,6 +4268,7 @@ static struct bpf_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "direct packet access: test22 (x += pkt_ptr, 3)", @@ -3778,6 +4299,7 @@ static struct bpf_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "direct packet access: test23 (x += pkt_ptr, 4)", @@ -3804,6 +4326,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = REJECT, .errstr = "invalid access to packet, off=0 size=8, R5(id=1,off=0,r=0)", + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "direct packet access: test24 (x += pkt_ptr, 5)", @@ -3829,6 +4352,7 @@ static struct bpf_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "direct packet access: test25 (marking on <, good access)", @@ -3922,7 +4446,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 5 }, + .fixup_map_hash_8b = { 5 }, .result_unpriv = ACCEPT, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_XDP, @@ -3938,7 +4462,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 1 }, + .fixup_map_hash_8b = { 1 }, .result = REJECT, .errstr = "invalid access to packet", .prog_type = BPF_PROG_TYPE_XDP, @@ -3966,7 +4490,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 11 }, + .fixup_map_hash_8b = { 11 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_XDP, }, @@ -3988,7 +4512,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 7 }, + .fixup_map_hash_8b = { 7 }, .result = REJECT, .errstr = "invalid access to packet", .prog_type = BPF_PROG_TYPE_XDP, @@ -4010,7 +4534,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 6 }, + .fixup_map_hash_8b = { 6 }, .result = REJECT, .errstr = "invalid access to packet", .prog_type = BPF_PROG_TYPE_XDP, @@ -4033,7 +4557,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 5 }, + .fixup_map_hash_8b = { 5 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, @@ -4048,7 +4572,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 1 }, + .fixup_map_hash_8b = { 1 }, .result = REJECT, .errstr = "invalid access to packet", .prog_type = BPF_PROG_TYPE_SCHED_CLS, @@ -4076,7 +4600,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 11 }, + .fixup_map_hash_8b = { 11 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, @@ -4098,7 +4622,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 7 }, + .fixup_map_hash_8b = { 7 }, .result = REJECT, .errstr = "invalid access to packet", .prog_type = BPF_PROG_TYPE_SCHED_CLS, @@ -4120,7 +4644,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 6 }, + .fixup_map_hash_8b = { 6 }, .result = REJECT, .errstr = "invalid access to packet", .prog_type = BPF_PROG_TYPE_SCHED_CLS, @@ -4391,6 +4915,85 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { + "prevent map lookup in sockmap", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_EXIT_INSN(), + }, + .fixup_map_sockmap = { 3 }, + .result = REJECT, + .errstr = "cannot pass map_type 15 into func bpf_map_lookup_elem", + .prog_type = BPF_PROG_TYPE_SOCK_OPS, + }, + { + "prevent map lookup in sockhash", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_EXIT_INSN(), + }, + .fixup_map_sockhash = { 3 }, + .result = REJECT, + .errstr = "cannot pass map_type 18 into func bpf_map_lookup_elem", + .prog_type = BPF_PROG_TYPE_SOCK_OPS, + }, + { + "prevent map lookup in xskmap", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_EXIT_INSN(), + }, + .fixup_map_xskmap = { 3 }, + .result = REJECT, + .errstr = "cannot pass map_type 17 into func bpf_map_lookup_elem", + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "prevent map lookup in stack trace", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_EXIT_INSN(), + }, + .fixup_map_stacktrace = { 3 }, + .result = REJECT, + .errstr = "cannot pass map_type 7 into func bpf_map_lookup_elem", + .prog_type = BPF_PROG_TYPE_PERF_EVENT, + }, + { + "prevent map lookup in prog array", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_EXIT_INSN(), + }, + .fixup_prog2 = { 3 }, + .result = REJECT, + .errstr = "cannot pass map_type 3 into func bpf_map_lookup_elem", + }, + { "valid map access into an array with a constant", .insns = { BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), @@ -4404,7 +5007,7 @@ static struct bpf_test tests[] = { offsetof(struct test_val, foo)), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr_unpriv = "R0 leaks addr", .result_unpriv = REJECT, .result = ACCEPT, @@ -4426,7 +5029,7 @@ static struct bpf_test tests[] = { offsetof(struct test_val, foo)), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr_unpriv = "R0 leaks addr", .result_unpriv = REJECT, .result = ACCEPT, @@ -4450,7 +5053,7 @@ static struct bpf_test tests[] = { offsetof(struct test_val, foo)), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr_unpriv = "R0 leaks addr", .result_unpriv = REJECT, .result = ACCEPT, @@ -4478,7 +5081,7 @@ static struct bpf_test tests[] = { offsetof(struct test_val, foo)), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr_unpriv = "R0 leaks addr", .result_unpriv = REJECT, .result = ACCEPT, @@ -4498,7 +5101,7 @@ static struct bpf_test tests[] = { offsetof(struct test_val, foo)), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr = "invalid access to map value, value_size=48 off=48 size=8", .result = REJECT, }, @@ -4519,7 +5122,7 @@ static struct bpf_test tests[] = { offsetof(struct test_val, foo)), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr = "R0 min value is outside of the array range", .result = REJECT, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, @@ -4541,7 +5144,7 @@ static struct bpf_test tests[] = { offsetof(struct test_val, foo)), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr = "R0 unbounded memory access, make sure to bounds check any array access into a map", .result = REJECT, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, @@ -4566,7 +5169,7 @@ static struct bpf_test tests[] = { offsetof(struct test_val, foo)), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr_unpriv = "R0 leaks addr", .errstr = "R0 unbounded memory access", .result_unpriv = REJECT, @@ -4593,7 +5196,7 @@ static struct bpf_test tests[] = { offsetof(struct test_val, foo)), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr_unpriv = "R0 leaks addr", .errstr = "invalid access to map value, value_size=48 off=44 size=8", .result_unpriv = REJECT, @@ -4623,12 +5226,185 @@ static struct bpf_test tests[] = { offsetof(struct test_val, foo)), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3, 11 }, + .fixup_map_hash_48b = { 3, 11 }, .errstr = "R0 pointer += pointer", .result = REJECT, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { + "direct packet read test#1 for CGROUP_SKB", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, + offsetof(struct __sk_buff, pkt_type)), + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, + offsetof(struct __sk_buff, mark)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, queue_mapping)), + BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1, + offsetof(struct __sk_buff, protocol)), + BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1, + offsetof(struct __sk_buff, vlan_present)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "invalid bpf_context access off=76 size=4", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + }, + { + "direct packet read test#2 for CGROUP_SKB", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, + offsetof(struct __sk_buff, vlan_tci)), + BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, + offsetof(struct __sk_buff, vlan_proto)), + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, priority)), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, + offsetof(struct __sk_buff, priority)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, + ingress_ifindex)), + BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1, + offsetof(struct __sk_buff, tc_index)), + BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1, + offsetof(struct __sk_buff, hash)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + }, + { + "direct packet read test#3 for CGROUP_SKB", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, + offsetof(struct __sk_buff, cb[0])), + BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, + offsetof(struct __sk_buff, cb[1])), + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, cb[2])), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, cb[3])), + BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1, + offsetof(struct __sk_buff, cb[4])), + BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1, + offsetof(struct __sk_buff, napi_id)), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_4, + offsetof(struct __sk_buff, cb[0])), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_5, + offsetof(struct __sk_buff, cb[1])), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, + offsetof(struct __sk_buff, cb[2])), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_7, + offsetof(struct __sk_buff, cb[3])), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_8, + offsetof(struct __sk_buff, cb[4])), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + }, + { + "direct packet read test#4 for CGROUP_SKB", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, family)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, remote_ip4)), + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, + offsetof(struct __sk_buff, local_ip4)), + BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, + offsetof(struct __sk_buff, remote_ip6[0])), + BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, + offsetof(struct __sk_buff, remote_ip6[1])), + BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, + offsetof(struct __sk_buff, remote_ip6[2])), + BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1, + offsetof(struct __sk_buff, remote_ip6[3])), + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, local_ip6[0])), + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, local_ip6[1])), + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, local_ip6[2])), + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, local_ip6[3])), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, remote_port)), + BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1, + offsetof(struct __sk_buff, local_port)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + }, + { + "invalid access of tc_classid for CGROUP_SKB", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, tc_classid)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid bpf_context access", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + }, + { + "invalid access of data_meta for CGROUP_SKB", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, data_meta)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid bpf_context access", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + }, + { + "invalid access of flow_keys for CGROUP_SKB", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, flow_keys)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid bpf_context access", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + }, + { + "invalid write access to napi_id for CGROUP_SKB", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1, + offsetof(struct __sk_buff, napi_id)), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_9, + offsetof(struct __sk_buff, napi_id)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid bpf_context access", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + }, + { "valid cgroup storage access", .insns = { BPF_MOV64_IMM(BPF_REG_2, 0), @@ -4656,7 +5432,7 @@ static struct bpf_test tests[] = { BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), BPF_EXIT_INSN(), }, - .fixup_map1 = { 1 }, + .fixup_map_hash_8b = { 1 }, .result = REJECT, .errstr = "cannot pass map_type 1 into func bpf_get_local_storage", .prog_type = BPF_PROG_TYPE_CGROUP_SKB, @@ -4676,7 +5452,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_CGROUP_SKB, }, { - "invalid per-cgroup storage access 3", + "invalid cgroup storage access 3", .insns = { BPF_MOV64_IMM(BPF_REG_2, 0), BPF_LD_MAP_FD(BPF_REG_1, 0), @@ -4708,6 +5484,7 @@ static struct bpf_test tests[] = { .result = REJECT, .errstr = "invalid access to map value, value_size=64 off=-2 size=4", .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "invalid cgroup storage access 5", @@ -4741,6 +5518,149 @@ static struct bpf_test tests[] = { .fixup_cgroup_storage = { 1 }, .result = REJECT, .errstr = "get_local_storage() doesn't support non-zero flags", + .errstr_unpriv = "R2 leaks addr into helper function", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + }, + { + "valid per-cpu cgroup storage access", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_local_storage), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_percpu_cgroup_storage = { 1 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + }, + { + "invalid per-cpu cgroup storage access 1", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_local_storage), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 1 }, + .result = REJECT, + .errstr = "cannot pass map_type 1 into func bpf_get_local_storage", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + }, + { + "invalid per-cpu cgroup storage access 2", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_LD_MAP_FD(BPF_REG_1, 1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_local_storage), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "fd 1 is not pointing to valid bpf_map", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + }, + { + "invalid per-cpu cgroup storage access 3", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_local_storage), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 256), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_percpu_cgroup_storage = { 1 }, + .result = REJECT, + .errstr = "invalid access to map value, value_size=64 off=256 size=4", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + }, + { + "invalid per-cpu cgroup storage access 4", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_local_storage), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, -2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1), + BPF_EXIT_INSN(), + }, + .fixup_cgroup_storage = { 1 }, + .result = REJECT, + .errstr = "invalid access to map value, value_size=64 off=-2 size=4", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + }, + { + "invalid per-cpu cgroup storage access 5", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 7), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_local_storage), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_percpu_cgroup_storage = { 1 }, + .result = REJECT, + .errstr = "get_local_storage() doesn't support non-zero flags", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + }, + { + "invalid per-cpu cgroup storage access 6", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_1), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_local_storage), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_percpu_cgroup_storage = { 1 }, + .result = REJECT, + .errstr = "get_local_storage() doesn't support non-zero flags", + .errstr_unpriv = "R2 leaks addr into helper function", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + }, + { + "write tstamp from CGROUP_SKB", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, tstamp)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "invalid bpf_context access off=152 size=8", + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + }, + { + "read tstamp from CGROUP_SKB", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, tstamp)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, .prog_type = BPF_PROG_TYPE_CGROUP_SKB, }, { @@ -4758,7 +5678,7 @@ static struct bpf_test tests[] = { BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 4 }, + .fixup_map_hash_8b = { 4 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS }, @@ -4779,8 +5699,8 @@ static struct bpf_test tests[] = { BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 4 }, - .errstr = "R4 pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL", + .fixup_map_hash_8b = { 4 }, + .errstr = "R4 pointer arithmetic on map_value_or_null", .result = REJECT, .prog_type = BPF_PROG_TYPE_SCHED_CLS }, @@ -4800,8 +5720,8 @@ static struct bpf_test tests[] = { BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 4 }, - .errstr = "R4 pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL", + .fixup_map_hash_8b = { 4 }, + .errstr = "R4 pointer arithmetic on map_value_or_null", .result = REJECT, .prog_type = BPF_PROG_TYPE_SCHED_CLS }, @@ -4821,8 +5741,8 @@ static struct bpf_test tests[] = { BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 4 }, - .errstr = "R4 pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL", + .fixup_map_hash_8b = { 4 }, + .errstr = "R4 pointer arithmetic on map_value_or_null", .result = REJECT, .prog_type = BPF_PROG_TYPE_SCHED_CLS }, @@ -4847,7 +5767,7 @@ static struct bpf_test tests[] = { BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 4 }, + .fixup_map_hash_8b = { 4 }, .result = REJECT, .errstr = "R4 !read_ok", .prog_type = BPF_PROG_TYPE_SCHED_CLS @@ -4875,7 +5795,7 @@ static struct bpf_test tests[] = { BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 4 }, + .fixup_map_hash_8b = { 4 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS }, @@ -4896,7 +5816,7 @@ static struct bpf_test tests[] = { BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr = "R0 unbounded memory access", .result = REJECT, .errstr_unpriv = "R0 leaks addr", @@ -5146,11 +6066,11 @@ static struct bpf_test tests[] = { offsetof(struct __sk_buff, cb[0])), BPF_EXIT_INSN(), }, - .fixup_map1 = { 2 }, + .fixup_map_hash_8b = { 2 }, .errstr_unpriv = "R2 leaks addr into mem", .result_unpriv = REJECT, .result = REJECT, - .errstr = "BPF_XADD stores into R1 context is not allowed", + .errstr = "BPF_XADD stores into R1 ctx is not allowed", }, { "leak pointer into ctx 2", @@ -5165,7 +6085,7 @@ static struct bpf_test tests[] = { .errstr_unpriv = "R10 leaks addr into mem", .result_unpriv = REJECT, .result = REJECT, - .errstr = "BPF_XADD stores into R1 context is not allowed", + .errstr = "BPF_XADD stores into R1 ctx is not allowed", }, { "leak pointer into ctx 3", @@ -5176,7 +6096,7 @@ static struct bpf_test tests[] = { offsetof(struct __sk_buff, cb[0])), BPF_EXIT_INSN(), }, - .fixup_map1 = { 1 }, + .fixup_map_hash_8b = { 1 }, .errstr_unpriv = "R2 leaks addr into ctx", .result_unpriv = REJECT, .result = ACCEPT, @@ -5198,7 +6118,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 4 }, + .fixup_map_hash_8b = { 4 }, .errstr_unpriv = "R6 leaks addr into mem", .result_unpriv = REJECT, .result = ACCEPT, @@ -5218,7 +6138,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_probe_read), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -5237,7 +6157,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_probe_read), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -5255,7 +6175,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_trace_printk), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr = "invalid access to map value, value_size=48 off=0 size=0", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, @@ -5275,7 +6195,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_probe_read), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr = "invalid access to map value, value_size=48 off=0 size=56", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, @@ -5295,7 +6215,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_probe_read), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr = "R2 min value is negative", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, @@ -5319,7 +6239,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_probe_read), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -5340,7 +6260,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_probe_read), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -5360,7 +6280,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_trace_printk), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr = "invalid access to map value, value_size=48 off=4 size=0", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, @@ -5384,7 +6304,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_probe_read), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr = "invalid access to map value, value_size=48 off=4 size=52", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, @@ -5406,7 +6326,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_probe_read), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr = "R2 min value is negative", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, @@ -5428,7 +6348,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_probe_read), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr = "R2 min value is negative", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, @@ -5453,7 +6373,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_probe_read), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -5475,7 +6395,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_probe_read), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -5495,7 +6415,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_trace_printk), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr = "R1 min value is outside of the array range", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, @@ -5520,7 +6440,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_probe_read), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr = "invalid access to map value, value_size=48 off=4 size=52", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, @@ -5543,7 +6463,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_probe_read), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr = "R2 min value is negative", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, @@ -5566,7 +6486,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_probe_read), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr = "R2 min value is negative", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, @@ -5592,7 +6512,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_probe_read), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -5615,7 +6535,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_probe_read), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -5637,7 +6557,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_trace_printk), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr = "R1 min value is outside of the array range", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, @@ -5659,7 +6579,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_probe_read), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr = "R1 unbounded memory access", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, @@ -5685,7 +6605,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_probe_read), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr = "invalid access to map value, value_size=48 off=4 size=45", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, @@ -5709,7 +6629,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -5732,7 +6652,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .result = REJECT, .errstr = "R1 unbounded memory access", .prog_type = BPF_PROG_TYPE_TRACEPOINT, @@ -5756,7 +6676,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -5779,7 +6699,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .result = REJECT, .errstr = "R1 unbounded memory access", .prog_type = BPF_PROG_TYPE_TRACEPOINT, @@ -5804,7 +6724,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -5828,7 +6748,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -5852,7 +6772,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .result = REJECT, .errstr = "R1 min value is negative", .prog_type = BPF_PROG_TYPE_TRACEPOINT, @@ -5877,7 +6797,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -5901,7 +6821,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -5925,12 +6845,835 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .result = REJECT, .errstr = "R1 min value is negative", .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, { + "map access: known scalar += value_ptr from different maps", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_MOV64_IMM(BPF_REG_1, 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_16b = { 5 }, + .fixup_map_array_48b = { 8 }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R1 tried to add from different maps", + .retval = 1, + }, + { + "map access: value_ptr -= known scalar from different maps", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_MOV64_IMM(BPF_REG_1, 4), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_16b = { 5 }, + .fixup_map_array_48b = { 8 }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R0 min value is outside of the array range", + .retval = 1, + }, + { + "map access: known scalar += value_ptr from different maps, but same value properties", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_MOV64_IMM(BPF_REG_1, 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 5 }, + .fixup_map_array_48b = { 8 }, + .result = ACCEPT, + .retval = 1, + }, + { + "map access: mixing value pointer and scalar, 1", + .insns = { + // load map value pointer into r0 and r2 + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_LD_MAP_FD(BPF_REG_ARG1, 0), + BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_FP), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG2, -16), + BPF_ST_MEM(BPF_DW, BPF_REG_FP, -16, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + // load some number from the map into r1 + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + // depending on r1, branch: + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 3), + // branch A + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_JMP_A(2), + // branch B + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_3, 0x100000), + // common instruction + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3), + // depending on r1, branch: + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1), + // branch A + BPF_JMP_A(4), + // branch B + BPF_MOV64_IMM(BPF_REG_0, 0x13371337), + // verifier follows fall-through + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 0x100000, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + // fake-dead code; targeted from branch A to + // prevent dead code sanitization + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 1 }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R2 tried to add from different pointers or scalars", + .retval = 0, + }, + { + "map access: mixing value pointer and scalar, 2", + .insns = { + // load map value pointer into r0 and r2 + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_LD_MAP_FD(BPF_REG_ARG1, 0), + BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_FP), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG2, -16), + BPF_ST_MEM(BPF_DW, BPF_REG_FP, -16, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + // load some number from the map into r1 + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + // depending on r1, branch: + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3), + // branch A + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_3, 0x100000), + BPF_JMP_A(2), + // branch B + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_3, 0), + // common instruction + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3), + // depending on r1, branch: + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1), + // branch A + BPF_JMP_A(4), + // branch B + BPF_MOV64_IMM(BPF_REG_0, 0x13371337), + // verifier follows fall-through + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 0x100000, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + // fake-dead code; targeted from branch A to + // prevent dead code sanitization + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 1 }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R2 tried to add from different maps or paths", + .retval = 0, + }, + { + "sanitation: alu with different scalars", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_LD_MAP_FD(BPF_REG_ARG1, 0), + BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_FP), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG2, -16), + BPF_ST_MEM(BPF_DW, BPF_REG_FP, -16, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_3, 0x100000), + BPF_JMP_A(2), + BPF_MOV64_IMM(BPF_REG_2, 42), + BPF_MOV64_IMM(BPF_REG_3, 0x100001), + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 1 }, + .result = ACCEPT, + .retval = 0x100000, + }, + { + "map access: value_ptr += known scalar, upper oob arith, test 1", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_MOV64_IMM(BPF_REG_1, 48), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", + .retval = 1, + }, + { + "map access: value_ptr += known scalar, upper oob arith, test 2", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_MOV64_IMM(BPF_REG_1, 49), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", + .retval = 1, + }, + { + "map access: value_ptr += known scalar, upper oob arith, test 3", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_MOV64_IMM(BPF_REG_1, 47), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", + .retval = 1, + }, + { + "map access: value_ptr -= known scalar, lower oob arith, test 1", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), + BPF_MOV64_IMM(BPF_REG_1, 47), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_1, 48), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = REJECT, + .errstr = "R0 min value is outside of the array range", + .result_unpriv = REJECT, + .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", + }, + { + "map access: value_ptr -= known scalar, lower oob arith, test 2", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), + BPF_MOV64_IMM(BPF_REG_1, 47), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_1, 48), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_1, 1), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", + .retval = 1, + }, + { + "map access: value_ptr -= known scalar, lower oob arith, test 3", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), + BPF_MOV64_IMM(BPF_REG_1, 47), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_1, 47), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", + .retval = 1, + }, + { + "map access: known scalar += value_ptr", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_MOV64_IMM(BPF_REG_1, 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = ACCEPT, + .retval = 1, + }, + { + "map access: value_ptr += known scalar, 1", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_MOV64_IMM(BPF_REG_1, 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = ACCEPT, + .retval = 1, + }, + { + "map access: value_ptr += known scalar, 2", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_MOV64_IMM(BPF_REG_1, 49), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = REJECT, + .errstr = "invalid access to map value", + }, + { + "map access: value_ptr += known scalar, 3", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_MOV64_IMM(BPF_REG_1, -1), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = REJECT, + .errstr = "invalid access to map value", + }, + { + "map access: value_ptr += known scalar, 4", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), + BPF_MOV64_IMM(BPF_REG_1, 5), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_1, -2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_1, -1), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", + .retval = 1, + }, + { + "map access: value_ptr += known scalar, 5", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_MOV64_IMM(BPF_REG_1, (6 + 1) * sizeof(int)), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = ACCEPT, + .retval = 0xabcdef12, + }, + { + "map access: value_ptr += known scalar, 6", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), + BPF_MOV64_IMM(BPF_REG_1, (3 + 1) * sizeof(int)), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_1, 3 * sizeof(int)), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = ACCEPT, + .retval = 0xabcdef12, + }, + { + "map access: unknown scalar += value_ptr, 1", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xf), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = ACCEPT, + .retval = 1, + }, + { + "map access: unknown scalar += value_ptr, 2", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 31), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = ACCEPT, + .retval = 0xabcdef12, + }, + { + "map access: unknown scalar += value_ptr, 3", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8), + BPF_MOV64_IMM(BPF_REG_1, -1), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_1, 1), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 31), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", + .retval = 0xabcdef12, + }, + { + "map access: unknown scalar += value_ptr, 4", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + BPF_MOV64_IMM(BPF_REG_1, 19), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 31), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = REJECT, + .errstr = "R1 max value is outside of the array range", + .errstr_unpriv = "R1 pointer arithmetic of map value goes out of range", + }, + { + "map access: value_ptr += unknown scalar, 1", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xf), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = ACCEPT, + .retval = 1, + }, + { + "map access: value_ptr += unknown scalar, 2", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 31), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = ACCEPT, + .retval = 0xabcdef12, + }, + { + "map access: value_ptr += unknown scalar, 3", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 8), + BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 16), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xf), + BPF_ALU64_IMM(BPF_AND, BPF_REG_3, 1), + BPF_ALU64_IMM(BPF_OR, BPF_REG_3, 1), + BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_3, 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_3), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_JMP_IMM(BPF_JA, 0, 0, -3), + }, + .fixup_map_array_48b = { 3 }, + .result = ACCEPT, + .retval = 1, + }, + { + "map access: value_ptr += value_ptr", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_0), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = REJECT, + .errstr = "R0 pointer += pointer prohibited", + }, + { + "map access: known scalar -= value_ptr", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_MOV64_IMM(BPF_REG_1, 4), + BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = REJECT, + .errstr = "R1 tried to subtract pointer from scalar", + }, + { + "map access: value_ptr -= known scalar", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_MOV64_IMM(BPF_REG_1, 4), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = REJECT, + .errstr = "R0 min value is outside of the array range", + }, + { + "map access: value_ptr -= known scalar, 2", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), + BPF_MOV64_IMM(BPF_REG_1, 6), + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_2), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", + .retval = 1, + }, + { + "map access: unknown scalar -= value_ptr", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xf), + BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = REJECT, + .errstr = "R1 tried to subtract pointer from scalar", + }, + { + "map access: value_ptr -= unknown scalar", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xf), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = REJECT, + .errstr = "R0 min value is negative", + }, + { + "map access: value_ptr -= unknown scalar, 2", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xf), + BPF_ALU64_IMM(BPF_OR, BPF_REG_1, 0x7), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", + .retval = 1, + }, + { + "map access: value_ptr -= value_ptr", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_0), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = REJECT, + .errstr = "R0 invalid mem access 'inv'", + .errstr_unpriv = "R0 pointer -= pointer prohibited", + }, + { "map lookup helper access to map", .insns = { BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), @@ -5944,7 +7687,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), BPF_EXIT_INSN(), }, - .fixup_map3 = { 3, 8 }, + .fixup_map_hash_16b = { 3, 8 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -5964,7 +7707,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_map_update_elem), BPF_EXIT_INSN(), }, - .fixup_map3 = { 3, 10 }, + .fixup_map_hash_16b = { 3, 10 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -5984,8 +7727,8 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_map_update_elem), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, - .fixup_map3 = { 10 }, + .fixup_map_hash_8b = { 3 }, + .fixup_map_hash_16b = { 10 }, .result = REJECT, .errstr = "invalid access to map value, value_size=8 off=0 size=16", .prog_type = BPF_PROG_TYPE_TRACEPOINT, @@ -6006,7 +7749,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), BPF_EXIT_INSN(), }, - .fixup_map3 = { 3, 9 }, + .fixup_map_hash_16b = { 3, 9 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -6026,7 +7769,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), BPF_EXIT_INSN(), }, - .fixup_map3 = { 3, 9 }, + .fixup_map_hash_16b = { 3, 9 }, .result = REJECT, .errstr = "invalid access to map value, value_size=16 off=12 size=8", .prog_type = BPF_PROG_TYPE_TRACEPOINT, @@ -6046,7 +7789,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), BPF_EXIT_INSN(), }, - .fixup_map3 = { 3, 9 }, + .fixup_map_hash_16b = { 3, 9 }, .result = REJECT, .errstr = "invalid access to map value, value_size=16 off=-4 size=8", .prog_type = BPF_PROG_TYPE_TRACEPOINT, @@ -6068,7 +7811,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), BPF_EXIT_INSN(), }, - .fixup_map3 = { 3, 10 }, + .fixup_map_hash_16b = { 3, 10 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -6089,7 +7832,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), BPF_EXIT_INSN(), }, - .fixup_map3 = { 3, 10 }, + .fixup_map_hash_16b = { 3, 10 }, .result = REJECT, .errstr = "invalid access to map value, value_size=16 off=12 size=8", .prog_type = BPF_PROG_TYPE_TRACEPOINT, @@ -6110,7 +7853,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), BPF_EXIT_INSN(), }, - .fixup_map3 = { 3, 10 }, + .fixup_map_hash_16b = { 3, 10 }, .result = REJECT, .errstr = "invalid access to map value, value_size=16 off=-4 size=8", .prog_type = BPF_PROG_TYPE_TRACEPOINT, @@ -6133,7 +7876,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), BPF_EXIT_INSN(), }, - .fixup_map3 = { 3, 11 }, + .fixup_map_hash_16b = { 3, 11 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -6153,7 +7896,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), BPF_EXIT_INSN(), }, - .fixup_map3 = { 3, 10 }, + .fixup_map_hash_16b = { 3, 10 }, .result = REJECT, .errstr = "R2 unbounded memory access, make sure to bounds check any array access into a map", .prog_type = BPF_PROG_TYPE_TRACEPOINT, @@ -6176,7 +7919,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), BPF_EXIT_INSN(), }, - .fixup_map3 = { 3, 11 }, + .fixup_map_hash_16b = { 3, 11 }, .result = REJECT, .errstr = "invalid access to map value, value_size=16 off=9 size=8", .prog_type = BPF_PROG_TYPE_TRACEPOINT, @@ -6198,7 +7941,7 @@ static struct bpf_test tests[] = { BPF_ST_MEM(BPF_DW, BPF_REG_3, 0, 42), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr_unpriv = "R0 leaks addr", .result = ACCEPT, .result_unpriv = REJECT, @@ -6219,7 +7962,7 @@ static struct bpf_test tests[] = { BPF_ST_MEM(BPF_DW, BPF_REG_3, 0, 42), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr_unpriv = "R0 leaks addr", .result = ACCEPT, .result_unpriv = REJECT, @@ -6236,7 +7979,7 @@ static struct bpf_test tests[] = { BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr_unpriv = "R1 !read_ok", .errstr = "R1 !read_ok", .result = REJECT, @@ -6270,7 +8013,7 @@ static struct bpf_test tests[] = { BPF_ST_MEM(BPF_DW, BPF_REG_7, -4, 24), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr_unpriv = "R0 leaks addr", .result = ACCEPT, .result_unpriv = REJECT, @@ -6298,7 +8041,7 @@ static struct bpf_test tests[] = { BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 4), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr_unpriv = "R0 leaks addr", .result = ACCEPT, .result_unpriv = REJECT, @@ -6317,7 +8060,7 @@ static struct bpf_test tests[] = { BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr = "R0 bitwise operator &= on pointer", .result = REJECT, }, @@ -6334,7 +8077,7 @@ static struct bpf_test tests[] = { BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr = "R0 32-bit pointer arithmetic prohibited", .result = REJECT, }, @@ -6351,7 +8094,7 @@ static struct bpf_test tests[] = { BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr = "R0 pointer arithmetic with /= operator", .result = REJECT, }, @@ -6368,11 +8111,12 @@ static struct bpf_test tests[] = { BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr_unpriv = "R0 pointer arithmetic prohibited", .errstr = "invalid mem access 'inv'", .result = REJECT, .result_unpriv = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "map element value illegal alu op, 5", @@ -6392,9 +8136,10 @@ static struct bpf_test tests[] = { BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr = "R0 invalid mem access 'inv'", .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "map element value is preserved across register spilling", @@ -6415,7 +8160,7 @@ static struct bpf_test tests[] = { BPF_ST_MEM(BPF_DW, BPF_REG_3, 0, 42), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr_unpriv = "R0 leaks addr", .result = ACCEPT, .result_unpriv = REJECT, @@ -6661,7 +8406,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -6687,7 +8432,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr = "invalid access to map value, value_size=48 off=0 size=49", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, @@ -6715,7 +8460,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -6742,7 +8487,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr = "R1 min value is outside of the array range", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, @@ -6814,7 +8559,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_csum_diff), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, @@ -6839,7 +8584,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_csum_diff), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, @@ -6862,7 +8607,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_csum_diff), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, @@ -6888,6 +8633,7 @@ static struct bpf_test tests[] = { .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .retval = 0 /* csum_diff of 64-byte packet */, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "helper access to variable memory: size = 0 not allowed on NULL (!ARG_PTR_TO_MEM_OR_NULL)", @@ -6943,7 +8689,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_probe_read), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -6964,7 +8710,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_probe_read), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -6984,7 +8730,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_probe_read), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -7059,7 +8805,7 @@ static struct bpf_test tests[] = { offsetof(struct test_val, foo)), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr = "R0 max value is outside of the array range", .result = REJECT, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, @@ -7089,7 +8835,7 @@ static struct bpf_test tests[] = { BPF_MOV64_REG(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr = "R0 max value is outside of the array range", .result = REJECT, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, @@ -7137,7 +8883,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map_in_map = { 3 }, - .errstr = "R1 pointer arithmetic on CONST_PTR_TO_MAP prohibited", + .errstr = "R1 pointer arithmetic on map_ptr prohibited", .result = REJECT, }, { @@ -7442,8 +9188,9 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .errstr = "unbounded min value", + .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", .result = REJECT, }, { @@ -7466,8 +9213,9 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .errstr = "unbounded min value", + .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", .result = REJECT, }, { @@ -7492,8 +9240,9 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .errstr = "unbounded min value", + .errstr_unpriv = "R8 has unknown scalar with mixed signed bounds", .result = REJECT, }, { @@ -7517,8 +9266,9 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .errstr = "unbounded min value", + .errstr_unpriv = "R8 has unknown scalar with mixed signed bounds", .result = REJECT, }, { @@ -7541,7 +9291,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .result = ACCEPT, }, { @@ -7565,8 +9315,9 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .errstr = "unbounded min value", + .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", .result = REJECT, }, { @@ -7611,7 +9362,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .result = ACCEPT, }, { @@ -7636,8 +9387,9 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .errstr = "unbounded min value", + .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", .result = REJECT, }, { @@ -7662,7 +9414,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .result = ACCEPT, }, { @@ -7687,8 +9439,9 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .errstr = "unbounded min value", + .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", .result = REJECT, }, { @@ -7714,8 +9467,9 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .errstr = "unbounded min value", + .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", .result = REJECT, }, { @@ -7740,8 +9494,9 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .errstr = "unbounded min value", + .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", .result = REJECT, }, { @@ -7769,8 +9524,9 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .errstr = "unbounded min value", + .errstr_unpriv = "R7 has unknown scalar with mixed signed bounds", .result = REJECT, }, { @@ -7799,8 +9555,9 @@ static struct bpf_test tests[] = { BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, -3), BPF_JMP_IMM(BPF_JA, 0, 0, -7), }, - .fixup_map1 = { 4 }, - .errstr = "R0 invalid mem access 'inv'", + .fixup_map_hash_8b = { 4 }, + .errstr = "unbounded min value", + .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", .result = REJECT, }, { @@ -7827,8 +9584,9 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .errstr = "unbounded min value", + .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", .result = REJECT, .result_unpriv = REJECT, }, @@ -7854,7 +9612,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .errstr = "R0 max value is outside of the array range", .result = REJECT, }, @@ -7879,11 +9637,41 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.", + .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds", .result = REJECT, }, { + "check subtraction on pointers for unpriv", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_LD_MAP_FD(BPF_REG_ARG1, 0), + BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_FP), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_ARG2, 0, 9), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_MOV64_REG(BPF_REG_9, BPF_REG_FP), + BPF_ALU64_REG(BPF_SUB, BPF_REG_9, BPF_REG_0), + BPF_LD_MAP_FD(BPF_REG_ARG1, 0), + BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_FP), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_ARG2, 0, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_9, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 1, 9 }, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R9 pointer -= pointer prohibited", + }, + { "bounds check based on zero-extended MOV", .insns = { BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), @@ -7905,7 +9693,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .result = ACCEPT }, { @@ -7930,7 +9718,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .errstr = "map_value pointer and 4294967295", .result = REJECT }, @@ -7956,7 +9744,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .errstr = "R0 min value is outside of the array range", .result = REJECT }, @@ -7980,7 +9768,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 4 }, + .fixup_map_hash_8b = { 4 }, .errstr = "value_size=8 off=1073741825", .result = REJECT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, @@ -8005,7 +9793,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 4 }, + .fixup_map_hash_8b = { 4 }, .errstr = "value 1073741823", .result = REJECT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, @@ -8041,7 +9829,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .result = ACCEPT }, { @@ -8080,7 +9868,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, /* not actually fully unbounded, but the bound is very high */ .errstr = "R0 unbounded memory access", .result = REJECT @@ -8123,7 +9911,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, /* not actually fully unbounded, but the bound is very high */ .errstr = "R0 unbounded memory access", .result = REJECT @@ -8152,7 +9940,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .result = ACCEPT }, { @@ -8179,7 +9967,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .errstr = "R0 max value is outside of the array range", .result = REJECT }, @@ -8209,11 +9997,41 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .errstr = "R0 unbounded memory access", .result = REJECT }, { + "bounds check after 32-bit right shift with 64-bit input", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + /* r1 = 2 */ + BPF_MOV64_IMM(BPF_REG_1, 2), + /* r1 = 1<<32 */ + BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 31), + /* r1 = 0 (NOT 2!) */ + BPF_ALU32_IMM(BPF_RSH, BPF_REG_1, 31), + /* r1 = 0xffff'fffe (NOT 0!) */ + BPF_ALU32_IMM(BPF_SUB, BPF_REG_1, 2), + /* computes OOB pointer */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + /* OOB access */ + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + /* exit */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .errstr = "R0 invalid mem access", + .result = REJECT, + }, + { "bounds check map access with off+size signed 32bit overflow. test1", .insns = { BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), @@ -8229,7 +10047,7 @@ static struct bpf_test tests[] = { BPF_JMP_A(0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .errstr = "map_value pointer and 2147483646", .result = REJECT }, @@ -8251,8 +10069,9 @@ static struct bpf_test tests[] = { BPF_JMP_A(0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .errstr = "pointer offset 1073741822", + .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", .result = REJECT }, { @@ -8272,8 +10091,9 @@ static struct bpf_test tests[] = { BPF_JMP_A(0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .errstr = "pointer offset -1073741822", + .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", .result = REJECT }, { @@ -8294,7 +10114,7 @@ static struct bpf_test tests[] = { BPF_JMP_A(0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .errstr = "map_value pointer and 1000000000000", .result = REJECT }, @@ -8314,7 +10134,7 @@ static struct bpf_test tests[] = { BPF_JMP_A(0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .result = ACCEPT, .retval = POINTER_VALUE, .result_unpriv = REJECT, @@ -8335,7 +10155,7 @@ static struct bpf_test tests[] = { BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .result = ACCEPT, .retval = POINTER_VALUE, .result_unpriv = REJECT, @@ -8403,7 +10223,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 5 }, + .fixup_map_hash_8b = { 5 }, .errstr = "variable stack read R2", .result = REJECT, .prog_type = BPF_PROG_TYPE_LWT_IN, @@ -8445,6 +10265,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN() }, .errstr = "fp pointer offset 1073741822", + .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", .result = REJECT }, { @@ -8484,7 +10305,7 @@ static struct bpf_test tests[] = { offsetof(struct test_val, foo)), BPF_EXIT_INSN(), }, - .fixup_map2 = { 3 }, + .fixup_map_hash_48b = { 3 }, .errstr_unpriv = "R0 leaks addr", .errstr = "R0 unbounded memory access", .result_unpriv = REJECT, @@ -8811,7 +10632,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr = "R3 pointer arithmetic on PTR_TO_PACKET_END", + .errstr = "R3 pointer arithmetic on pkt_end", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, }, @@ -8830,7 +10651,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr = "R3 pointer arithmetic on PTR_TO_PACKET_END", + .errstr = "R3 pointer arithmetic on pkt_end", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, }, @@ -8850,6 +10671,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data' > pkt_end, bad access 1", @@ -8887,6 +10709,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_end > pkt_data', good access", @@ -8925,6 +10748,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_end > pkt_data', bad access 2", @@ -8943,6 +10767,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data' < pkt_end, good access", @@ -8981,6 +10806,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data' < pkt_end, bad access 2", @@ -8999,6 +10825,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_end < pkt_data', good access", @@ -9016,6 +10843,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_end < pkt_data', bad access 1", @@ -9053,6 +10881,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data' >= pkt_end, good access", @@ -9089,6 +10918,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data' >= pkt_end, bad access 2", @@ -9126,6 +10956,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_end >= pkt_data', bad access 1", @@ -9164,6 +10995,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data' <= pkt_end, good access", @@ -9182,6 +11014,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data' <= pkt_end, bad access 1", @@ -9220,6 +11053,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_end <= pkt_data', good access", @@ -9256,6 +11090,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_end <= pkt_data', bad access 2", @@ -9292,6 +11127,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_meta' > pkt_data, bad access 1", @@ -9329,6 +11165,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data > pkt_meta', good access", @@ -9367,6 +11204,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data > pkt_meta', bad access 2", @@ -9385,6 +11223,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_meta' < pkt_data, good access", @@ -9423,6 +11262,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_meta' < pkt_data, bad access 2", @@ -9441,6 +11281,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data < pkt_meta', good access", @@ -9458,6 +11299,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data < pkt_meta', bad access 1", @@ -9495,6 +11337,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_meta' >= pkt_data, good access", @@ -9531,6 +11374,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_meta' >= pkt_data, bad access 2", @@ -9568,6 +11412,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data >= pkt_meta', bad access 1", @@ -9606,6 +11451,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_meta' <= pkt_data, good access", @@ -9624,6 +11470,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_meta' <= pkt_data, bad access 1", @@ -9662,6 +11509,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data <= pkt_meta', good access", @@ -9698,6 +11546,7 @@ static struct bpf_test tests[] = { .errstr = "R1 offset is outside of the packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "XDP pkt read, pkt_data <= pkt_meta', bad access 2", @@ -9771,7 +11620,7 @@ static struct bpf_test tests[] = { "check deducing bounds from const, 5", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 1), + BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 1, 1), BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), BPF_EXIT_INSN(), }, @@ -9802,6 +11651,7 @@ static struct bpf_test tests[] = { }, .result = REJECT, .errstr = "dereference of modified ctx ptr", + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "check deducing bounds from const, 8", @@ -9815,6 +11665,7 @@ static struct bpf_test tests[] = { }, .result = REJECT, .errstr = "dereference of modified ctx ptr", + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "check deducing bounds from const, 9", @@ -10018,7 +11869,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .fixup_map1 = { 16 }, + .fixup_map_hash_8b = { 16 }, .result = REJECT, .errstr = "R0 min value is outside of the array range", }, @@ -10289,6 +12140,7 @@ static struct bpf_test tests[] = { .result = REJECT, .errstr = "R6 invalid mem access 'inv'", .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "calls: two calls with args", @@ -10969,7 +12821,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), /* return 0 */ }, .prog_type = BPF_PROG_TYPE_XDP, - .fixup_map1 = { 23 }, + .fixup_map_hash_8b = { 23 }, .result = ACCEPT, }, { @@ -11024,7 +12876,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), /* return 1 */ }, .prog_type = BPF_PROG_TYPE_XDP, - .fixup_map1 = { 23 }, + .fixup_map_hash_8b = { 23 }, .result = ACCEPT, }, { @@ -11079,7 +12931,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), /* return 1 */ }, .prog_type = BPF_PROG_TYPE_XDP, - .fixup_map1 = { 23 }, + .fixup_map_hash_8b = { 23 }, .result = REJECT, .errstr = "invalid read from stack off -16+0 size 8", }, @@ -11151,9 +13003,10 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .fixup_map1 = { 12, 22 }, + .fixup_map_hash_8b = { 12, 22 }, .result = REJECT, .errstr = "invalid access to map value, value_size=8 off=2 size=8", + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "calls: two calls that receive map_value via arg=ptr_stack_of_caller. test2", @@ -11223,7 +13076,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .fixup_map1 = { 12, 22 }, + .fixup_map_hash_8b = { 12, 22 }, .result = ACCEPT, }, { @@ -11294,9 +13147,10 @@ static struct bpf_test tests[] = { BPF_JMP_IMM(BPF_JA, 0, 0, -8), }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .fixup_map1 = { 12, 22 }, + .fixup_map_hash_8b = { 12, 22 }, .result = REJECT, .errstr = "invalid access to map value, value_size=8 off=2 size=8", + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "calls: two calls that receive map_value_ptr_or_null via arg. test1", @@ -11366,7 +13220,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .fixup_map1 = { 12, 22 }, + .fixup_map_hash_8b = { 12, 22 }, .result = ACCEPT, }, { @@ -11437,7 +13291,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .fixup_map1 = { 12, 22 }, + .fixup_map_hash_8b = { 12, 22 }, .result = REJECT, .errstr = "R0 invalid mem access 'inv'", }, @@ -11468,6 +13322,7 @@ static struct bpf_test tests[] = { .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .retval = POINTER_VALUE, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "calls: pkt_ptr spill into caller stack 2", @@ -11499,6 +13354,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .errstr = "invalid access to packet", .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "calls: pkt_ptr spill into caller stack 3", @@ -11534,6 +13390,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, .retval = 1, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "calls: pkt_ptr spill into caller stack 4", @@ -11568,6 +13425,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, .retval = 1, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "calls: pkt_ptr spill into caller stack 5", @@ -11601,6 +13459,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .errstr = "same insn cannot be used with different", .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "calls: pkt_ptr spill into caller stack 6", @@ -11636,6 +13495,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .errstr = "R4 invalid mem access", .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "calls: pkt_ptr spill into caller stack 7", @@ -11670,6 +13530,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .errstr = "R4 invalid mem access", .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "calls: pkt_ptr spill into caller stack 8", @@ -11710,6 +13571,7 @@ static struct bpf_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "calls: pkt_ptr spill into caller stack 9", @@ -11751,6 +13613,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .errstr = "invalid access to packet", .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "calls: caller stack init to zero or map_value_or_null", @@ -11782,7 +13645,7 @@ static struct bpf_test tests[] = { BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map1 = { 13 }, + .fixup_map_hash_8b = { 13 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_XDP, }, @@ -11809,7 +13672,7 @@ static struct bpf_test tests[] = { BPF_FUNC_map_lookup_elem), BPF_EXIT_INSN(), }, - .fixup_map2 = { 6 }, + .fixup_map_hash_48b = { 6 }, .errstr = "invalid indirect read from stack off -8+0 size 8", .result = REJECT, .prog_type = BPF_PROG_TYPE_XDP, @@ -11841,8 +13704,8 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .fixup_map2 = { 13 }, - .fixup_map4 = { 16 }, + .fixup_map_hash_48b = { 13 }, + .fixup_map_array_48b = { 16 }, .result = ACCEPT, .retval = 1, }, @@ -11874,7 +13737,7 @@ static struct bpf_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .fixup_map_in_map = { 16 }, - .fixup_map4 = { 13 }, + .fixup_map_array_48b = { 13 }, .result = REJECT, .errstr = "R0 invalid mem access 'map_ptr'", }, @@ -11942,7 +13805,7 @@ static struct bpf_test tests[] = { BPF_ST_MEM(BPF_DW, BPF_REG_6, 0, 0xdead), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .errstr = "R6 invalid mem access 'inv'", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, @@ -11966,7 +13829,7 @@ static struct bpf_test tests[] = { BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_10, -16), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .errstr = "invalid read from stack off -16+0 size 8", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, @@ -12088,7 +13951,7 @@ static struct bpf_test tests[] = { BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 3), BPF_EXIT_INSN(), }, - .fixup_map1 = { 3 }, + .fixup_map_hash_8b = { 3 }, .result = REJECT, .errstr = "misaligned value access off", .prog_type = BPF_PROG_TYPE_SCHED_CLS, @@ -12114,8 +13977,9 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = REJECT, - .errstr = "BPF_XADD stores into R2 packet", + .errstr = "BPF_XADD stores into R2 pkt is not allowed", .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "xadd/w check whether src/dst got mangled, 1", @@ -12198,7 +14062,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_get_stack), BPF_EXIT_INSN(), }, - .fixup_map2 = { 4 }, + .fixup_map_hash_48b = { 4 }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -12442,6 +14306,216 @@ static struct bpf_test tests[] = { .result = ACCEPT, }, { + "reference tracking: leak potential reference", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), /* leak reference */ + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "Unreleased reference", + .result = REJECT, + }, + { + "reference tracking: leak potential reference on stack", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "Unreleased reference", + .result = REJECT, + }, + { + "reference tracking: leak potential reference on stack 2", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "Unreleased reference", + .result = REJECT, + }, + { + "reference tracking: zero potential reference", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_IMM(BPF_REG_0, 0), /* leak reference */ + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "Unreleased reference", + .result = REJECT, + }, + { + "reference tracking: copy and zero potential references", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_7, 0), /* leak reference */ + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "Unreleased reference", + .result = REJECT, + }, + { + "reference tracking: release reference without check", + .insns = { + BPF_SK_LOOKUP, + /* reference in r0 may be NULL */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "type=sock_or_null expected=sock", + .result = REJECT, + }, + { + "reference tracking: release reference", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, + { + "reference tracking: release reference 2", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, + { + "reference tracking: release reference twice", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "type=inv expected=sock", + .result = REJECT, + }, + { + "reference tracking: release reference twice inside branch", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), /* goto end */ + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "type=inv expected=sock", + .result = REJECT, + }, + { + "reference tracking: alloc, check, free in one subbranch", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 16), + /* if (offsetof(skb, mark) > data_len) exit; */ + BPF_JMP_REG(BPF_JLE, BPF_REG_0, BPF_REG_3, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_2, + offsetof(struct __sk_buff, mark)), + BPF_SK_LOOKUP, + BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 1), /* mark == 0? */ + /* Leak reference in R0 */ + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), /* sk NULL? */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "Unreleased reference", + .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + }, + { + "reference tracking: alloc, check, free in both subbranches", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 16), + /* if (offsetof(skb, mark) > data_len) exit; */ + BPF_JMP_REG(BPF_JLE, BPF_REG_0, BPF_REG_3, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_2, + offsetof(struct __sk_buff, mark)), + BPF_SK_LOOKUP, + BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 4), /* mark == 0? */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), /* sk NULL? */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), /* sk NULL? */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + }, + { + "reference tracking in call: free reference in subprog", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), /* unchecked reference */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_1), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_2, 0, 1), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, + { "pass modified ctx pointer to helper, 1", .insns = { BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -612), @@ -12511,6 +14585,1020 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, }, + { + "allocated_stack", + .insns = { + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32), + BPF_ALU64_REG(BPF_MOV, BPF_REG_7, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, -8), + BPF_STX_MEM(BPF_B, BPF_REG_10, BPF_REG_7, -9), + BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_10, -9), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .result_unpriv = ACCEPT, + .insn_processed = 15, + }, + { + "masking, test out of bounds 1", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, 5), + BPF_MOV32_IMM(BPF_REG_2, 5 - 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), + BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), + BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, + }, + { + "masking, test out of bounds 2", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, 1), + BPF_MOV32_IMM(BPF_REG_2, 1 - 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), + BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), + BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, + }, + { + "masking, test out of bounds 3", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, 0xffffffff), + BPF_MOV32_IMM(BPF_REG_2, 0xffffffff - 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), + BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), + BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, + }, + { + "masking, test out of bounds 4", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, 0xffffffff), + BPF_MOV32_IMM(BPF_REG_2, 1 - 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), + BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), + BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, + }, + { + "masking, test out of bounds 5", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, -1), + BPF_MOV32_IMM(BPF_REG_2, 1 - 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), + BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), + BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, + }, + { + "masking, test out of bounds 6", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, -1), + BPF_MOV32_IMM(BPF_REG_2, 0xffffffff - 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), + BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), + BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, + }, + { + "masking, test out of bounds 7", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 5), + BPF_MOV32_IMM(BPF_REG_2, 5 - 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), + BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), + BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, + }, + { + "masking, test out of bounds 8", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 1), + BPF_MOV32_IMM(BPF_REG_2, 1 - 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), + BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), + BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, + }, + { + "masking, test out of bounds 9", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 0xffffffff), + BPF_MOV32_IMM(BPF_REG_2, 0xffffffff - 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), + BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), + BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, + }, + { + "masking, test out of bounds 10", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 0xffffffff), + BPF_MOV32_IMM(BPF_REG_2, 1 - 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), + BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), + BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, + }, + { + "masking, test out of bounds 11", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, -1), + BPF_MOV32_IMM(BPF_REG_2, 1 - 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), + BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), + BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, + }, + { + "masking, test out of bounds 12", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, -1), + BPF_MOV32_IMM(BPF_REG_2, 0xffffffff - 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), + BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), + BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, + }, + { + "masking, test in bounds 1", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, 4), + BPF_MOV32_IMM(BPF_REG_2, 5 - 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), + BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), + BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 4, + }, + { + "masking, test in bounds 2", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_MOV32_IMM(BPF_REG_2, 0xffffffff - 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), + BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), + BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, + }, + { + "masking, test in bounds 3", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, 0xfffffffe), + BPF_MOV32_IMM(BPF_REG_2, 0xffffffff - 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), + BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), + BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0xfffffffe, + }, + { + "masking, test in bounds 4", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, 0xabcde), + BPF_MOV32_IMM(BPF_REG_2, 0xabcdef - 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), + BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), + BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0xabcde, + }, + { + "masking, test in bounds 5", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_MOV32_IMM(BPF_REG_2, 1 - 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), + BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), + BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, + }, + { + "masking, test in bounds 6", + .insns = { + BPF_MOV32_IMM(BPF_REG_1, 46), + BPF_MOV32_IMM(BPF_REG_2, 47 - 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), + BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), + BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 46, + }, + { + "masking, test in bounds 7", + .insns = { + BPF_MOV64_IMM(BPF_REG_3, -46), + BPF_ALU64_IMM(BPF_MUL, BPF_REG_3, -1), + BPF_MOV32_IMM(BPF_REG_2, 47 - 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_3), + BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_3), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), + BPF_ALU64_REG(BPF_AND, BPF_REG_3, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_3), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 46, + }, + { + "masking, test in bounds 8", + .insns = { + BPF_MOV64_IMM(BPF_REG_3, -47), + BPF_ALU64_IMM(BPF_MUL, BPF_REG_3, -1), + BPF_MOV32_IMM(BPF_REG_2, 47 - 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_3), + BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_3), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63), + BPF_ALU64_REG(BPF_AND, BPF_REG_3, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_3), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, + }, + { + "reference tracking in call: free reference in subprog and outside", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), /* unchecked reference */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_1), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_2, 0, 1), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "type=inv expected=sock", + .result = REJECT, + }, + { + "reference tracking in call: alloc & leak reference in subprog", + .insns = { + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_4), + BPF_SK_LOOKUP, + /* spill unchecked sk_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "Unreleased reference", + .result = REJECT, + }, + { + "reference tracking in call: alloc in subprog, release outside", + .insns = { + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_SK_LOOKUP, + BPF_EXIT_INSN(), /* return sk */ + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .retval = POINTER_VALUE, + .result = ACCEPT, + }, + { + "reference tracking in call: sk_ptr leak into caller stack", + .insns = { + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_5, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_4, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 5), + /* spill unchecked sk_ptr into stack of caller */ + BPF_MOV64_REG(BPF_REG_5, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_5, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 2 */ + BPF_SK_LOOKUP, + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "Unreleased reference", + .result = REJECT, + }, + { + "reference tracking in call: sk_ptr spill into caller stack", + .insns = { + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_5, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_4, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 8), + /* spill unchecked sk_ptr into stack of caller */ + BPF_MOV64_REG(BPF_REG_5, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_5, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + /* now the sk_ptr is verified, free the reference */ + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_4, 0), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + + /* subprog 2 */ + BPF_SK_LOOKUP, + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, + { + "reference tracking: allow LD_ABS", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_LD_ABS(BPF_B, 0), + BPF_LD_ABS(BPF_H, 0), + BPF_LD_ABS(BPF_W, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, + { + "reference tracking: forbid LD_ABS while holding reference", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_SK_LOOKUP, + BPF_LD_ABS(BPF_B, 0), + BPF_LD_ABS(BPF_H, 0), + BPF_LD_ABS(BPF_W, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "BPF_LD_[ABS|IND] cannot be mixed with socket references", + .result = REJECT, + }, + { + "reference tracking: allow LD_IND", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_MOV64_IMM(BPF_REG_7, 1), + BPF_LD_IND(BPF_W, BPF_REG_7, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_7), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 1, + }, + { + "reference tracking: forbid LD_IND while holding reference", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_4, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_7, 1), + BPF_LD_IND(BPF_W, BPF_REG_7, -0x200000), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_7), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_4), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "BPF_LD_[ABS|IND] cannot be mixed with socket references", + .result = REJECT, + }, + { + "reference tracking: check reference or tail call", + .insns = { + BPF_MOV64_REG(BPF_REG_7, BPF_REG_1), + BPF_SK_LOOKUP, + /* if (sk) bpf_sk_release() */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 7), + /* bpf_tail_call() */ + BPF_MOV64_IMM(BPF_REG_3, 2), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_tail_call), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .fixup_prog1 = { 17 }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, + { + "reference tracking: release reference then tail call", + .insns = { + BPF_MOV64_REG(BPF_REG_7, BPF_REG_1), + BPF_SK_LOOKUP, + /* if (sk) bpf_sk_release() */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + /* bpf_tail_call() */ + BPF_MOV64_IMM(BPF_REG_3, 2), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_tail_call), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_prog1 = { 18 }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, + { + "reference tracking: leak possible reference over tail call", + .insns = { + BPF_MOV64_REG(BPF_REG_7, BPF_REG_1), + /* Look up socket and store in REG_6 */ + BPF_SK_LOOKUP, + /* bpf_tail_call() */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_3, 2), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_tail_call), + BPF_MOV64_IMM(BPF_REG_0, 0), + /* if (sk) bpf_sk_release() */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .fixup_prog1 = { 16 }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "tail_call would lead to reference leak", + .result = REJECT, + }, + { + "reference tracking: leak checked reference over tail call", + .insns = { + BPF_MOV64_REG(BPF_REG_7, BPF_REG_1), + /* Look up socket and store in REG_6 */ + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + /* if (!sk) goto end */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), + /* bpf_tail_call() */ + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_tail_call), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .fixup_prog1 = { 17 }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "tail_call would lead to reference leak", + .result = REJECT, + }, + { + "reference tracking: mangle and release sock_or_null", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 5), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "R1 pointer arithmetic on sock_or_null prohibited", + .result = REJECT, + }, + { + "reference tracking: mangle and release sock", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 5), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "R1 pointer arithmetic on sock prohibited", + .result = REJECT, + }, + { + "reference tracking: access member", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_0, 4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, + { + "reference tracking: write to member", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_LD_IMM64(BPF_REG_2, 42), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_2, + offsetof(struct bpf_sock, mark)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_LD_IMM64(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "cannot write into socket", + .result = REJECT, + }, + { + "reference tracking: invalid 64-bit access of member", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "invalid bpf_sock access off=0 size=8", + .result = REJECT, + }, + { + "reference tracking: access after release", + .insns = { + BPF_SK_LOOKUP, + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "!read_ok", + .result = REJECT, + }, + { + "reference tracking: direct access for lookup", + .insns = { + /* Check that the packet is at least 64B long */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 64), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 9), + /* sk = sk_lookup_tcp(ctx, skb->data, ...) */ + BPF_MOV64_IMM(BPF_REG_3, sizeof(struct bpf_sock_tuple)), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_EMIT_CALL(BPF_FUNC_sk_lookup_tcp), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_0, 4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, + { + "calls: ctx read at start of subprog", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 5), + BPF_JMP_REG(BPF_JSGT, BPF_REG_0, BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_B, BPF_REG_9, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, + .errstr_unpriv = "function calls to other bpf functions are allowed for root only", + .result_unpriv = REJECT, + .result = ACCEPT, + }, + { + "check wire_len is not readable by sockets", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, wire_len)), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + }, + { + "check wire_len is readable by tc classifier", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, wire_len)), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, + { + "check wire_len is not writable by tc classifier", + .insns = { + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, + offsetof(struct __sk_buff, wire_len)), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "invalid bpf_context access", + .errstr_unpriv = "R1 leaks addr", + .result = REJECT, + }, + { + "calls: cross frame pruning", + .insns = { + /* r8 = !!random(); + * call pruner() + * if (r8) + * do something bad; + */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_prandom_u32), + BPF_MOV64_IMM(BPF_REG_8, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_MOV64_IMM(BPF_REG_8, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_8, 1, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_9, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, + .errstr_unpriv = "function calls to other bpf functions are allowed for root only", + .errstr = "!read_ok", + .result = REJECT, + }, + { + "jset: functional", + .insns = { + /* r0 = 0 */ + BPF_MOV64_IMM(BPF_REG_0, 0), + /* prep for direct packet access via r2 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_4, BPF_REG_3, 1), + BPF_EXIT_INSN(), + + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_2, 0), + + /* reg, bit 63 or bit 0 set, taken */ + BPF_LD_IMM64(BPF_REG_8, 0x8000000000000001), + BPF_JMP_REG(BPF_JSET, BPF_REG_7, BPF_REG_8, 1), + BPF_EXIT_INSN(), + + /* reg, bit 62, not taken */ + BPF_LD_IMM64(BPF_REG_8, 0x4000000000000000), + BPF_JMP_REG(BPF_JSET, BPF_REG_7, BPF_REG_8, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_EXIT_INSN(), + + /* imm, any bit set, taken */ + BPF_JMP_IMM(BPF_JSET, BPF_REG_7, -1, 1), + BPF_EXIT_INSN(), + + /* imm, bit 31 set, taken */ + BPF_JMP_IMM(BPF_JSET, BPF_REG_7, 0x80000000, 1), + BPF_EXIT_INSN(), + + /* all good - return r0 == 2 */ + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .runs = 7, + .retvals = { + { .retval = 2, + .data64 = { (1ULL << 63) | (1U << 31) | (1U << 0), } + }, + { .retval = 2, + .data64 = { (1ULL << 63) | (1U << 31), } + }, + { .retval = 2, + .data64 = { (1ULL << 31) | (1U << 0), } + }, + { .retval = 2, + .data64 = { (__u32)-1, } + }, + { .retval = 2, + .data64 = { ~0x4000000000000000ULL, } + }, + { .retval = 0, + .data64 = { 0, } + }, + { .retval = 0, + .data64 = { ~0ULL, } + }, + }, + }, + { + "jset: sign-extend", + .insns = { + /* r0 = 0 */ + BPF_MOV64_IMM(BPF_REG_0, 0), + /* prep for direct packet access via r2 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_4, BPF_REG_3, 1), + BPF_EXIT_INSN(), + + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_2, 0), + + BPF_JMP_IMM(BPF_JSET, BPF_REG_7, 0x80000000, 1), + BPF_EXIT_INSN(), + + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 2, + .data = { 1, 0, 0, 0, 0, 0, 0, 1, }, + }, + { + "jset: known const compare", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_IMM(BPF_JSET, BPF_REG_0, 1, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, + .retval_unpriv = 1, + .result_unpriv = ACCEPT, + .retval = 1, + .result = ACCEPT, + }, + { + "jset: known const compare bad", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JSET, BPF_REG_0, 1, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, + .errstr_unpriv = "!read_ok", + .result_unpriv = REJECT, + .errstr = "!read_ok", + .result = REJECT, + }, + { + "jset: unknown const compare taken", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_prandom_u32), + BPF_JMP_IMM(BPF_JSET, BPF_REG_0, 1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, + .errstr_unpriv = "!read_ok", + .result_unpriv = REJECT, + .errstr = "!read_ok", + .result = REJECT, + }, + { + "jset: unknown const compare not taken", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_prandom_u32), + BPF_JMP_IMM(BPF_JSET, BPF_REG_0, 1, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, + .errstr_unpriv = "!read_ok", + .result_unpriv = REJECT, + .errstr = "!read_ok", + .result = REJECT, + }, + { + "jset: half-known const compare", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_prandom_u32), + BPF_ALU64_IMM(BPF_OR, BPF_REG_0, 2), + BPF_JMP_IMM(BPF_JSET, BPF_REG_0, 3, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, + .result_unpriv = ACCEPT, + .result = ACCEPT, + }, + { + "jset: range", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_prandom_u32), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xff), + BPF_JMP_IMM(BPF_JSET, BPF_REG_1, 0xf0, 3), + BPF_JMP_IMM(BPF_JLT, BPF_REG_1, 0x10, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JSET, BPF_REG_1, 0x10, 1), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0x10, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, + .result_unpriv = ACCEPT, + .result = ACCEPT, + }, }; static int probe_filter_length(const struct bpf_insn *fp) @@ -12536,18 +15624,28 @@ static int create_map(uint32_t type, uint32_t size_key, return fd; } -static int create_prog_dummy1(void) +static void update_map(int fd, int index) +{ + struct test_val value = { + .index = (6 + 1) * sizeof(int), + .foo[6] = 0xabcdef12, + }; + + assert(!bpf_map_update_elem(fd, &index, &value, 0)); +} + +static int create_prog_dummy1(enum bpf_prog_type prog_type) { struct bpf_insn prog[] = { BPF_MOV64_IMM(BPF_REG_0, 42), BPF_EXIT_INSN(), }; - return bpf_load_program(BPF_PROG_TYPE_SOCKET_FILTER, prog, + return bpf_load_program(prog_type, prog, ARRAY_SIZE(prog), "GPL", 0, NULL, 0); } -static int create_prog_dummy2(int mfd, int idx) +static int create_prog_dummy2(enum bpf_prog_type prog_type, int mfd, int idx) { struct bpf_insn prog[] = { BPF_MOV64_IMM(BPF_REG_3, idx), @@ -12558,11 +15656,12 @@ static int create_prog_dummy2(int mfd, int idx) BPF_EXIT_INSN(), }; - return bpf_load_program(BPF_PROG_TYPE_SOCKET_FILTER, prog, + return bpf_load_program(prog_type, prog, ARRAY_SIZE(prog), "GPL", 0, NULL, 0); } -static int create_prog_array(uint32_t max_elem, int p1key) +static int create_prog_array(enum bpf_prog_type prog_type, uint32_t max_elem, + int p1key) { int p2key = 1; int mfd, p1fd, p2fd; @@ -12574,8 +15673,8 @@ static int create_prog_array(uint32_t max_elem, int p1key) return -1; } - p1fd = create_prog_dummy1(); - p2fd = create_prog_dummy2(mfd, p2key); + p1fd = create_prog_dummy1(prog_type); + p2fd = create_prog_dummy2(prog_type, mfd, p2key); if (p1fd < 0 || p2fd < 0) goto out; if (bpf_map_update_elem(mfd, &p1key, &p1fd, BPF_ANY) < 0) @@ -12615,32 +15714,39 @@ static int create_map_in_map(void) return outer_map_fd; } -static int create_cgroup_storage(void) +static int create_cgroup_storage(bool percpu) { + enum bpf_map_type type = percpu ? BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE : + BPF_MAP_TYPE_CGROUP_STORAGE; int fd; - fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_STORAGE, - sizeof(struct bpf_cgroup_storage_key), + fd = bpf_create_map(type, sizeof(struct bpf_cgroup_storage_key), TEST_DATA_LEN, 0, 0); if (fd < 0) - printf("Failed to create array '%s'!\n", strerror(errno)); + printf("Failed to create cgroup storage '%s'!\n", + strerror(errno)); return fd; } static char bpf_vlog[UINT_MAX >> 8]; -static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog, - int *map_fds) +static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, + struct bpf_insn *prog, int *map_fds) { - int *fixup_map1 = test->fixup_map1; - int *fixup_map2 = test->fixup_map2; - int *fixup_map3 = test->fixup_map3; - int *fixup_map4 = test->fixup_map4; + int *fixup_map_hash_8b = test->fixup_map_hash_8b; + int *fixup_map_hash_48b = test->fixup_map_hash_48b; + int *fixup_map_hash_16b = test->fixup_map_hash_16b; + int *fixup_map_array_48b = test->fixup_map_array_48b; + int *fixup_map_sockmap = test->fixup_map_sockmap; + int *fixup_map_sockhash = test->fixup_map_sockhash; + int *fixup_map_xskmap = test->fixup_map_xskmap; + int *fixup_map_stacktrace = test->fixup_map_stacktrace; int *fixup_prog1 = test->fixup_prog1; int *fixup_prog2 = test->fixup_prog2; int *fixup_map_in_map = test->fixup_map_in_map; int *fixup_cgroup_storage = test->fixup_cgroup_storage; + int *fixup_percpu_cgroup_storage = test->fixup_percpu_cgroup_storage; if (test->fill_helper) test->fill_helper(test); @@ -12649,44 +15755,45 @@ static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog, * for verifier and not do a runtime lookup, so the only thing * that really matters is value size in this case. */ - if (*fixup_map1) { + if (*fixup_map_hash_8b) { map_fds[0] = create_map(BPF_MAP_TYPE_HASH, sizeof(long long), sizeof(long long), 1); do { - prog[*fixup_map1].imm = map_fds[0]; - fixup_map1++; - } while (*fixup_map1); + prog[*fixup_map_hash_8b].imm = map_fds[0]; + fixup_map_hash_8b++; + } while (*fixup_map_hash_8b); } - if (*fixup_map2) { + if (*fixup_map_hash_48b) { map_fds[1] = create_map(BPF_MAP_TYPE_HASH, sizeof(long long), sizeof(struct test_val), 1); do { - prog[*fixup_map2].imm = map_fds[1]; - fixup_map2++; - } while (*fixup_map2); + prog[*fixup_map_hash_48b].imm = map_fds[1]; + fixup_map_hash_48b++; + } while (*fixup_map_hash_48b); } - if (*fixup_map3) { + if (*fixup_map_hash_16b) { map_fds[2] = create_map(BPF_MAP_TYPE_HASH, sizeof(long long), sizeof(struct other_val), 1); do { - prog[*fixup_map3].imm = map_fds[2]; - fixup_map3++; - } while (*fixup_map3); + prog[*fixup_map_hash_16b].imm = map_fds[2]; + fixup_map_hash_16b++; + } while (*fixup_map_hash_16b); } - if (*fixup_map4) { + if (*fixup_map_array_48b) { map_fds[3] = create_map(BPF_MAP_TYPE_ARRAY, sizeof(int), sizeof(struct test_val), 1); + update_map(map_fds[3], 0); do { - prog[*fixup_map4].imm = map_fds[3]; - fixup_map4++; - } while (*fixup_map4); + prog[*fixup_map_array_48b].imm = map_fds[3]; + fixup_map_array_48b++; + } while (*fixup_map_array_48b); } if (*fixup_prog1) { - map_fds[4] = create_prog_array(4, 0); + map_fds[4] = create_prog_array(prog_type, 4, 0); do { prog[*fixup_prog1].imm = map_fds[4]; fixup_prog1++; @@ -12694,7 +15801,7 @@ static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog, } if (*fixup_prog2) { - map_fds[5] = create_prog_array(8, 7); + map_fds[5] = create_prog_array(prog_type, 8, 7); do { prog[*fixup_prog2].imm = map_fds[5]; fixup_prog2++; @@ -12710,33 +15817,134 @@ static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog, } if (*fixup_cgroup_storage) { - map_fds[7] = create_cgroup_storage(); + map_fds[7] = create_cgroup_storage(false); do { prog[*fixup_cgroup_storage].imm = map_fds[7]; fixup_cgroup_storage++; } while (*fixup_cgroup_storage); } + + if (*fixup_percpu_cgroup_storage) { + map_fds[8] = create_cgroup_storage(true); + do { + prog[*fixup_percpu_cgroup_storage].imm = map_fds[8]; + fixup_percpu_cgroup_storage++; + } while (*fixup_percpu_cgroup_storage); + } + if (*fixup_map_sockmap) { + map_fds[9] = create_map(BPF_MAP_TYPE_SOCKMAP, sizeof(int), + sizeof(int), 1); + do { + prog[*fixup_map_sockmap].imm = map_fds[9]; + fixup_map_sockmap++; + } while (*fixup_map_sockmap); + } + if (*fixup_map_sockhash) { + map_fds[10] = create_map(BPF_MAP_TYPE_SOCKHASH, sizeof(int), + sizeof(int), 1); + do { + prog[*fixup_map_sockhash].imm = map_fds[10]; + fixup_map_sockhash++; + } while (*fixup_map_sockhash); + } + if (*fixup_map_xskmap) { + map_fds[11] = create_map(BPF_MAP_TYPE_XSKMAP, sizeof(int), + sizeof(int), 1); + do { + prog[*fixup_map_xskmap].imm = map_fds[11]; + fixup_map_xskmap++; + } while (*fixup_map_xskmap); + } + if (*fixup_map_stacktrace) { + map_fds[12] = create_map(BPF_MAP_TYPE_STACK_TRACE, sizeof(u32), + sizeof(u64), 1); + do { + prog[*fixup_map_stacktrace].imm = map_fds[12]; + fixup_map_stacktrace++; + } while (*fixup_map_stacktrace); + } +} + +static int set_admin(bool admin) +{ + cap_t caps; + const cap_value_t cap_val = CAP_SYS_ADMIN; + int ret = -1; + + caps = cap_get_proc(); + if (!caps) { + perror("cap_get_proc"); + return -1; + } + if (cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap_val, + admin ? CAP_SET : CAP_CLEAR)) { + perror("cap_set_flag"); + goto out; + } + if (cap_set_proc(caps)) { + perror("cap_set_proc"); + goto out; + } + ret = 0; +out: + if (cap_free(caps)) + perror("cap_free"); + return ret; +} + +static int do_prog_test_run(int fd_prog, bool unpriv, uint32_t expected_val, + void *data, size_t size_data) +{ + __u8 tmp[TEST_DATA_LEN << 2]; + __u32 size_tmp = sizeof(tmp); + uint32_t retval; + int err; + + if (unpriv) + set_admin(true); + err = bpf_prog_test_run(fd_prog, 1, data, size_data, + tmp, &size_tmp, &retval, NULL); + if (unpriv) + set_admin(false); + if (err && errno != 524/*ENOTSUPP*/ && errno != EPERM) { + printf("Unexpected bpf_prog_test_run error "); + return err; + } + if (!err && retval != expected_val && + expected_val != POINTER_VALUE) { + printf("FAIL retval %d != %d ", retval, expected_val); + return 1; + } + + return 0; } static void do_test_single(struct bpf_test *test, bool unpriv, int *passes, int *errors) { - int fd_prog, expected_ret, reject_from_alignment; + int fd_prog, expected_ret, alignment_prevented_execution; int prog_len, prog_type = test->prog_type; struct bpf_insn *prog = test->insns; + int run_errs, run_successes; int map_fds[MAX_NR_MAPS]; const char *expected_err; - uint32_t retval; + __u32 pflags; int i, err; for (i = 0; i < MAX_NR_MAPS; i++) map_fds[i] = -1; - do_test_fixup(test, prog, map_fds); + if (!prog_type) + prog_type = BPF_PROG_TYPE_SOCKET_FILTER; + do_test_fixup(test, prog_type, prog, map_fds); prog_len = probe_filter_length(prog); - fd_prog = bpf_verify_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER, - prog, prog_len, test->flags & F_LOAD_WITH_STRICT_ALIGNMENT, + pflags = 0; + if (test->flags & F_LOAD_WITH_STRICT_ALIGNMENT) + pflags |= BPF_F_STRICT_ALIGNMENT; + if (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS) + pflags |= BPF_F_ANY_ALIGNMENT; + fd_prog = bpf_verify_program(prog_type, prog, prog_len, pflags, "GPL", 0, bpf_vlog, sizeof(bpf_vlog), 1); expected_ret = unpriv && test->result_unpriv != UNDEF ? @@ -12744,54 +15952,92 @@ static void do_test_single(struct bpf_test *test, bool unpriv, expected_err = unpriv && test->errstr_unpriv ? test->errstr_unpriv : test->errstr; - reject_from_alignment = fd_prog < 0 && - (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS) && - strstr(bpf_vlog, "Unknown alignment."); -#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS - if (reject_from_alignment) { - printf("FAIL\nFailed due to alignment despite having efficient unaligned access: '%s'!\n", - strerror(errno)); - goto fail_log; - } -#endif + alignment_prevented_execution = 0; + if (expected_ret == ACCEPT) { - if (fd_prog < 0 && !reject_from_alignment) { + if (fd_prog < 0) { printf("FAIL\nFailed to load prog '%s'!\n", strerror(errno)); goto fail_log; } +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + if (fd_prog >= 0 && + (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS)) + alignment_prevented_execution = 1; +#endif } else { if (fd_prog >= 0) { printf("FAIL\nUnexpected success to load!\n"); goto fail_log; } - if (!strstr(bpf_vlog, expected_err) && !reject_from_alignment) { + if (!strstr(bpf_vlog, expected_err)) { printf("FAIL\nUnexpected error message!\n\tEXP: %s\n\tRES: %s\n", expected_err, bpf_vlog); goto fail_log; } } - if (fd_prog >= 0) { - __u8 tmp[TEST_DATA_LEN << 2]; - __u32 size_tmp = sizeof(tmp); + if (test->insn_processed) { + uint32_t insn_processed; + char *proc; - err = bpf_prog_test_run(fd_prog, 1, test->data, - sizeof(test->data), tmp, &size_tmp, - &retval, NULL); - if (err && errno != 524/*ENOTSUPP*/ && errno != EPERM) { - printf("Unexpected bpf_prog_test_run error\n"); + proc = strstr(bpf_vlog, "processed "); + insn_processed = atoi(proc + 10); + if (test->insn_processed != insn_processed) { + printf("FAIL\nUnexpected insn_processed %u vs %u\n", + insn_processed, test->insn_processed); goto fail_log; } - if (!err && retval != test->retval && - test->retval != POINTER_VALUE) { - printf("FAIL retval %d != %d\n", retval, test->retval); - goto fail_log; + } + + run_errs = 0; + run_successes = 0; + if (!alignment_prevented_execution && fd_prog >= 0) { + uint32_t expected_val; + int i; + + if (!test->runs) { + expected_val = unpriv && test->retval_unpriv ? + test->retval_unpriv : test->retval; + + err = do_prog_test_run(fd_prog, unpriv, expected_val, + test->data, sizeof(test->data)); + if (err) + run_errs++; + else + run_successes++; + } + + for (i = 0; i < test->runs; i++) { + if (unpriv && test->retvals[i].retval_unpriv) + expected_val = test->retvals[i].retval_unpriv; + else + expected_val = test->retvals[i].retval; + + err = do_prog_test_run(fd_prog, unpriv, expected_val, + test->retvals[i].data, + sizeof(test->retvals[i].data)); + if (err) { + printf("(run %d/%d) ", i + 1, test->runs); + run_errs++; + } else { + run_successes++; + } } } - (*passes)++; - printf("OK%s\n", reject_from_alignment ? - " (NOTE: reject due to unknown alignment)" : ""); + + if (!run_errs) { + (*passes)++; + if (run_successes > 1) + printf("%d cases ", run_successes); + printf("OK"); + if (alignment_prevented_execution) + printf(" (NOTE: not executed due to unknown alignment)"); + printf("\n"); + } else { + printf("\n"); + goto fail_log; + } close_fds: close(fd_prog); for (i = 0; i < MAX_NR_MAPS; i++) @@ -12828,33 +16074,6 @@ static bool is_admin(void) return (sysadmin == CAP_SET); } -static int set_admin(bool admin) -{ - cap_t caps; - const cap_value_t cap_val = CAP_SYS_ADMIN; - int ret = -1; - - caps = cap_get_proc(); - if (!caps) { - perror("cap_get_proc"); - return -1; - } - if (cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap_val, - admin ? CAP_SET : CAP_CLEAR)) { - perror("cap_set_flag"); - goto out; - } - if (cap_set_proc(caps)) { - perror("cap_set_proc"); - goto out; - } - ret = 0; -out: - if (cap_free(caps)) - perror("cap_free"); - return ret; -} - static void get_unpriv_disabled() { char buf[2]; @@ -12871,6 +16090,13 @@ static void get_unpriv_disabled() fclose(fd); } +static bool test_as_unpriv(struct bpf_test *test) +{ + return !test->prog_type || + test->prog_type == BPF_PROG_TYPE_SOCKET_FILTER || + test->prog_type == BPF_PROG_TYPE_CGROUP_SKB; +} + static int do_test(bool unpriv, unsigned int from, unsigned int to) { int i, passes = 0, errors = 0, skips = 0; @@ -12881,10 +16107,10 @@ static int do_test(bool unpriv, unsigned int from, unsigned int to) /* Program types that are not supported by non-root we * skip right away. */ - if (!test->prog_type && unpriv_disabled) { + if (test_as_unpriv(test) && unpriv_disabled) { printf("#%d/u %s SKIP\n", i, test->descr); skips++; - } else if (!test->prog_type) { + } else if (test_as_unpriv(test)) { if (!unpriv) set_admin(false); printf("#%d/u %s ", i, test->descr); diff --git a/tools/testing/selftests/bpf/test_xdp_vlan.c b/tools/testing/selftests/bpf/test_xdp_vlan.c new file mode 100644 index 000000000000..365a7d2d9f5c --- /dev/null +++ b/tools/testing/selftests/bpf/test_xdp_vlan.c @@ -0,0 +1,292 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Copyright(c) 2018 Jesper Dangaard Brouer. + * + * XDP/TC VLAN manipulation example + * + * GOTCHA: Remember to disable NIC hardware offloading of VLANs, + * else the VLAN tags are NOT inlined in the packet payload: + * + * # ethtool -K ixgbe2 rxvlan off + * + * Verify setting: + * # ethtool -k ixgbe2 | grep rx-vlan-offload + * rx-vlan-offload: off + * + */ +#include <stddef.h> +#include <stdbool.h> +#include <string.h> +#include <linux/bpf.h> +#include <linux/if_ether.h> +#include <linux/if_vlan.h> +#include <linux/in.h> +#include <linux/pkt_cls.h> + +#include "bpf_helpers.h" +#include "bpf_endian.h" + +/* linux/if_vlan.h have not exposed this as UAPI, thus mirror some here + * + * struct vlan_hdr - vlan header + * @h_vlan_TCI: priority and VLAN ID + * @h_vlan_encapsulated_proto: packet type ID or len + */ +struct _vlan_hdr { + __be16 h_vlan_TCI; + __be16 h_vlan_encapsulated_proto; +}; +#define VLAN_PRIO_MASK 0xe000 /* Priority Code Point */ +#define VLAN_PRIO_SHIFT 13 +#define VLAN_CFI_MASK 0x1000 /* Canonical Format Indicator */ +#define VLAN_TAG_PRESENT VLAN_CFI_MASK +#define VLAN_VID_MASK 0x0fff /* VLAN Identifier */ +#define VLAN_N_VID 4096 + +struct parse_pkt { + __u16 l3_proto; + __u16 l3_offset; + __u16 vlan_outer; + __u16 vlan_inner; + __u8 vlan_outer_offset; + __u8 vlan_inner_offset; +}; + +char _license[] SEC("license") = "GPL"; + +static __always_inline +bool parse_eth_frame(struct ethhdr *eth, void *data_end, struct parse_pkt *pkt) +{ + __u16 eth_type; + __u8 offset; + + offset = sizeof(*eth); + /* Make sure packet is large enough for parsing eth + 2 VLAN headers */ + if ((void *)eth + offset + (2*sizeof(struct _vlan_hdr)) > data_end) + return false; + + eth_type = eth->h_proto; + + /* Handle outer VLAN tag */ + if (eth_type == bpf_htons(ETH_P_8021Q) + || eth_type == bpf_htons(ETH_P_8021AD)) { + struct _vlan_hdr *vlan_hdr; + + vlan_hdr = (void *)eth + offset; + pkt->vlan_outer_offset = offset; + pkt->vlan_outer = bpf_ntohs(vlan_hdr->h_vlan_TCI) + & VLAN_VID_MASK; + eth_type = vlan_hdr->h_vlan_encapsulated_proto; + offset += sizeof(*vlan_hdr); + } + + /* Handle inner (double) VLAN tag */ + if (eth_type == bpf_htons(ETH_P_8021Q) + || eth_type == bpf_htons(ETH_P_8021AD)) { + struct _vlan_hdr *vlan_hdr; + + vlan_hdr = (void *)eth + offset; + pkt->vlan_inner_offset = offset; + pkt->vlan_inner = bpf_ntohs(vlan_hdr->h_vlan_TCI) + & VLAN_VID_MASK; + eth_type = vlan_hdr->h_vlan_encapsulated_proto; + offset += sizeof(*vlan_hdr); + } + + pkt->l3_proto = bpf_ntohs(eth_type); /* Convert to host-byte-order */ + pkt->l3_offset = offset; + + return true; +} + +/* Hint, VLANs are choosen to hit network-byte-order issues */ +#define TESTVLAN 4011 /* 0xFAB */ +// #define TO_VLAN 4000 /* 0xFA0 (hint 0xOA0 = 160) */ + +SEC("xdp_drop_vlan_4011") +int xdp_prognum0(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct parse_pkt pkt = { 0 }; + + if (!parse_eth_frame(data, data_end, &pkt)) + return XDP_ABORTED; + + /* Drop specific VLAN ID example */ + if (pkt.vlan_outer == TESTVLAN) + return XDP_ABORTED; + /* + * Using XDP_ABORTED makes it possible to record this event, + * via tracepoint xdp:xdp_exception like: + * # perf record -a -e xdp:xdp_exception + * # perf script + */ + return XDP_PASS; +} +/* +Commands to setup VLAN on Linux to test packets gets dropped: + + export ROOTDEV=ixgbe2 + export VLANID=4011 + ip link add link $ROOTDEV name $ROOTDEV.$VLANID type vlan id $VLANID + ip link set dev $ROOTDEV.$VLANID up + + ip link set dev $ROOTDEV mtu 1508 + ip addr add 100.64.40.11/24 dev $ROOTDEV.$VLANID + +Load prog with ip tool: + + ip link set $ROOTDEV xdp off + ip link set $ROOTDEV xdp object xdp_vlan01_kern.o section xdp_drop_vlan_4011 + +*/ + +/* Changing VLAN to zero, have same practical effect as removing the VLAN. */ +#define TO_VLAN 0 + +SEC("xdp_vlan_change") +int xdp_prognum1(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct parse_pkt pkt = { 0 }; + + if (!parse_eth_frame(data, data_end, &pkt)) + return XDP_ABORTED; + + /* Change specific VLAN ID */ + if (pkt.vlan_outer == TESTVLAN) { + struct _vlan_hdr *vlan_hdr = data + pkt.vlan_outer_offset; + + /* Modifying VLAN, preserve top 4 bits */ + vlan_hdr->h_vlan_TCI = + bpf_htons((bpf_ntohs(vlan_hdr->h_vlan_TCI) & 0xf000) + | TO_VLAN); + } + + return XDP_PASS; +} + +/* + * Show XDP+TC can cooperate, on creating a VLAN rewriter. + * 1. Create a XDP prog that can "pop"/remove a VLAN header. + * 2. Create a TC-bpf prog that egress can add a VLAN header. + */ + +#ifndef ETH_ALEN /* Ethernet MAC address length */ +#define ETH_ALEN 6 /* bytes */ +#endif +#define VLAN_HDR_SZ 4 /* bytes */ + +SEC("xdp_vlan_remove_outer") +int xdp_prognum2(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct parse_pkt pkt = { 0 }; + char *dest; + + if (!parse_eth_frame(data, data_end, &pkt)) + return XDP_ABORTED; + + /* Skip packet if no outer VLAN was detected */ + if (pkt.vlan_outer_offset == 0) + return XDP_PASS; + + /* Moving Ethernet header, dest overlap with src, memmove handle this */ + dest = data; + dest+= VLAN_HDR_SZ; + /* + * Notice: Taking over vlan_hdr->h_vlan_encapsulated_proto, by + * only moving two MAC addrs (12 bytes), not overwriting last 2 bytes + */ + __builtin_memmove(dest, data, ETH_ALEN * 2); + /* Note: LLVM built-in memmove inlining require size to be constant */ + + /* Move start of packet header seen by Linux kernel stack */ + bpf_xdp_adjust_head(ctx, VLAN_HDR_SZ); + + return XDP_PASS; +} + +static __always_inline +void shift_mac_4bytes_16bit(void *data) +{ + __u16 *p = data; + + p[7] = p[5]; /* delete p[7] was vlan_hdr->h_vlan_TCI */ + p[6] = p[4]; /* delete p[6] was ethhdr->h_proto */ + p[5] = p[3]; + p[4] = p[2]; + p[3] = p[1]; + p[2] = p[0]; +} + +static __always_inline +void shift_mac_4bytes_32bit(void *data) +{ + __u32 *p = data; + + /* Assuming VLAN hdr present. The 4 bytes in p[3] that gets + * overwritten, is ethhdr->h_proto and vlan_hdr->h_vlan_TCI. + * The vlan_hdr->h_vlan_encapsulated_proto take over role as + * ethhdr->h_proto. + */ + p[3] = p[2]; + p[2] = p[1]; + p[1] = p[0]; +} + +SEC("xdp_vlan_remove_outer2") +int xdp_prognum3(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct ethhdr *orig_eth = data; + struct parse_pkt pkt = { 0 }; + + if (!parse_eth_frame(orig_eth, data_end, &pkt)) + return XDP_ABORTED; + + /* Skip packet if no outer VLAN was detected */ + if (pkt.vlan_outer_offset == 0) + return XDP_PASS; + + /* Simply shift down MAC addrs 4 bytes, overwrite h_proto + TCI */ + shift_mac_4bytes_32bit(data); + + /* Move start of packet header seen by Linux kernel stack */ + bpf_xdp_adjust_head(ctx, VLAN_HDR_SZ); + + return XDP_PASS; +} + +/*===================================== + * BELOW: TC-hook based ebpf programs + * ==================================== + * The TC-clsact eBPF programs (currently) need to be attach via TC commands + */ + +SEC("tc_vlan_push") +int _tc_progA(struct __sk_buff *ctx) +{ + bpf_skb_vlan_push(ctx, bpf_htons(ETH_P_8021Q), TESTVLAN); + + return TC_ACT_OK; +} +/* +Commands to setup TC to use above bpf prog: + +export ROOTDEV=ixgbe2 +export FILE=xdp_vlan01_kern.o + +# Re-attach clsact to clear/flush existing role +tc qdisc del dev $ROOTDEV clsact 2> /dev/null ;\ +tc qdisc add dev $ROOTDEV clsact + +# Attach BPF prog EGRESS +tc filter add dev $ROOTDEV egress \ + prio 1 handle 1 bpf da obj $FILE sec tc_vlan_push + +tc filter show dev $ROOTDEV egress +*/ diff --git a/tools/testing/selftests/bpf/test_xdp_vlan.sh b/tools/testing/selftests/bpf/test_xdp_vlan.sh new file mode 100755 index 000000000000..51a3a31d1aac --- /dev/null +++ b/tools/testing/selftests/bpf/test_xdp_vlan.sh @@ -0,0 +1,195 @@ +#!/bin/bash + +TESTNAME=xdp_vlan + +usage() { + echo "Testing XDP + TC eBPF VLAN manipulations: $TESTNAME" + echo "" + echo "Usage: $0 [-vfh]" + echo " -v | --verbose : Verbose" + echo " --flush : Flush before starting (e.g. after --interactive)" + echo " --interactive : Keep netns setup running after test-run" + echo "" +} + +cleanup() +{ + local status=$? + + if [ "$status" = "0" ]; then + echo "selftests: $TESTNAME [PASS]"; + else + echo "selftests: $TESTNAME [FAILED]"; + fi + + if [ -n "$INTERACTIVE" ]; then + echo "Namespace setup still active explore with:" + echo " ip netns exec ns1 bash" + echo " ip netns exec ns2 bash" + exit $status + fi + + set +e + ip link del veth1 2> /dev/null + ip netns del ns1 2> /dev/null + ip netns del ns2 2> /dev/null +} + +# Using external program "getopt" to get --long-options +OPTIONS=$(getopt -o hvfi: \ + --long verbose,flush,help,interactive,debug -- "$@") +if (( $? != 0 )); then + usage + echo "selftests: $TESTNAME [FAILED] Error calling getopt, unknown option?" + exit 2 +fi +eval set -- "$OPTIONS" + +## --- Parse command line arguments / parameters --- +while true; do + case "$1" in + -v | --verbose) + export VERBOSE=yes + shift + ;; + -i | --interactive | --debug ) + INTERACTIVE=yes + shift + ;; + -f | --flush ) + cleanup + shift + ;; + -- ) + shift + break + ;; + -h | --help ) + usage; + echo "selftests: $TESTNAME [SKIP] usage help info requested" + exit 0 + ;; + * ) + shift + break + ;; + esac +done + +if [ "$EUID" -ne 0 ]; then + echo "selftests: $TESTNAME [FAILED] need root privileges" + exit 1 +fi + +ip link set dev lo xdp off 2>/dev/null > /dev/null +if [ $? -ne 0 ];then + echo "selftests: $TESTNAME [SKIP] need ip xdp support" + exit 0 +fi + +# Interactive mode likely require us to cleanup netns +if [ -n "$INTERACTIVE" ]; then + ip link del veth1 2> /dev/null + ip netns del ns1 2> /dev/null + ip netns del ns2 2> /dev/null +fi + +# Exit on failure +set -e + +# Some shell-tools dependencies +which ip > /dev/null +which tc > /dev/null +which ethtool > /dev/null + +# Make rest of shell verbose, showing comments as doc/info +if [ -n "$VERBOSE" ]; then + set -v +fi + +# Create two namespaces +ip netns add ns1 +ip netns add ns2 + +# Run cleanup if failing or on kill +trap cleanup 0 2 3 6 9 + +# Create veth pair +ip link add veth1 type veth peer name veth2 + +# Move veth1 and veth2 into the respective namespaces +ip link set veth1 netns ns1 +ip link set veth2 netns ns2 + +# NOTICE: XDP require VLAN header inside packet payload +# - Thus, disable VLAN offloading driver features +# - For veth REMEMBER TX side VLAN-offload +# +# Disable rx-vlan-offload (mostly needed on ns1) +ip netns exec ns1 ethtool -K veth1 rxvlan off +ip netns exec ns2 ethtool -K veth2 rxvlan off +# +# Disable tx-vlan-offload (mostly needed on ns2) +ip netns exec ns2 ethtool -K veth2 txvlan off +ip netns exec ns1 ethtool -K veth1 txvlan off + +export IPADDR1=100.64.41.1 +export IPADDR2=100.64.41.2 + +# In ns1/veth1 add IP-addr on plain net_device +ip netns exec ns1 ip addr add ${IPADDR1}/24 dev veth1 +ip netns exec ns1 ip link set veth1 up + +# In ns2/veth2 create VLAN device +export VLAN=4011 +export DEVNS2=veth2 +ip netns exec ns2 ip link add link $DEVNS2 name $DEVNS2.$VLAN type vlan id $VLAN +ip netns exec ns2 ip addr add ${IPADDR2}/24 dev $DEVNS2.$VLAN +ip netns exec ns2 ip link set $DEVNS2 up +ip netns exec ns2 ip link set $DEVNS2.$VLAN up + +# Bringup lo in netns (to avoids confusing people using --interactive) +ip netns exec ns1 ip link set lo up +ip netns exec ns2 ip link set lo up + +# At this point, the hosts cannot reach each-other, +# because ns2 are using VLAN tags on the packets. + +ip netns exec ns2 sh -c 'ping -W 1 -c 1 100.64.41.1 || echo "Okay ping fails"' + + +# Now we can use the test_xdp_vlan.c program to pop/push these VLAN tags +# ---------------------------------------------------------------------- +# In ns1: ingress use XDP to remove VLAN tags +export DEVNS1=veth1 +export FILE=test_xdp_vlan.o + +# First test: Remove VLAN by setting VLAN ID 0, using "xdp_vlan_change" +export XDP_PROG=xdp_vlan_change +ip netns exec ns1 ip link set $DEVNS1 xdp object $FILE section $XDP_PROG + +# In ns1: egress use TC to add back VLAN tag 4011 +# (del cmd) +# tc qdisc del dev $DEVNS1 clsact 2> /dev/null +# +ip netns exec ns1 tc qdisc add dev $DEVNS1 clsact +ip netns exec ns1 tc filter add dev $DEVNS1 egress \ + prio 1 handle 1 bpf da obj $FILE sec tc_vlan_push + +# Now the namespaces can reach each-other, test with ping: +ip netns exec ns2 ping -W 2 -c 3 $IPADDR1 +ip netns exec ns1 ping -W 2 -c 3 $IPADDR2 + +# Second test: Replace xdp prog, that fully remove vlan header +# +# Catch kernel bug for generic-XDP, that does didn't allow us to +# remove a VLAN header, because skb->protocol still contain VLAN +# ETH_P_8021Q indication, and this cause overwriting of our changes. +# +export XDP_PROG=xdp_vlan_remove_outer2 +ip netns exec ns1 ip link set $DEVNS1 xdp off +ip netns exec ns1 ip link set $DEVNS1 xdp object $FILE section $XDP_PROG + +# Now the namespaces should still be able reach each-other, test with ping: +ip netns exec ns2 ping -W 2 -c 3 $IPADDR1 +ip netns exec ns1 ping -W 2 -c 3 $IPADDR2 diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c index cabe2a3a3b30..4cdb63bf0521 100644 --- a/tools/testing/selftests/bpf/trace_helpers.c +++ b/tools/testing/selftests/bpf/trace_helpers.c @@ -41,6 +41,7 @@ int load_kallsyms(void) syms[i].name = strdup(func); i++; } + fclose(f); sym_cnt = i; qsort(syms, sym_cnt, sizeof(struct ksym), ksym_cmp); return 0; @@ -124,10 +125,11 @@ struct perf_event_sample { char data[]; }; -static enum bpf_perf_event_ret bpf_perf_event_print(void *event, void *priv) +static enum bpf_perf_event_ret +bpf_perf_event_print(struct perf_event_header *hdr, void *private_data) { - struct perf_event_sample *e = event; - perf_event_print_fn fn = priv; + struct perf_event_sample *e = (struct perf_event_sample *)hdr; + perf_event_print_fn fn = private_data; int ret; if (e->header.type == PERF_RECORD_SAMPLE) { diff --git a/tools/testing/selftests/bpf/with_addr.sh b/tools/testing/selftests/bpf/with_addr.sh new file mode 100755 index 000000000000..ffcd3953f94c --- /dev/null +++ b/tools/testing/selftests/bpf/with_addr.sh @@ -0,0 +1,54 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# add private ipv4 and ipv6 addresses to loopback + +readonly V6_INNER='100::a/128' +readonly V4_INNER='192.168.0.1/32' + +if getopts ":s" opt; then + readonly SIT_DEV_NAME='sixtofourtest0' + readonly V6_SIT='2::/64' + readonly V4_SIT='172.17.0.1/32' + shift +fi + +fail() { + echo "error: $*" 1>&2 + exit 1 +} + +setup() { + ip -6 addr add "${V6_INNER}" dev lo || fail 'failed to setup v6 address' + ip -4 addr add "${V4_INNER}" dev lo || fail 'failed to setup v4 address' + + if [[ -n "${V6_SIT}" ]]; then + ip link add "${SIT_DEV_NAME}" type sit remote any local any \ + || fail 'failed to add sit' + ip link set dev "${SIT_DEV_NAME}" up \ + || fail 'failed to bring sit device up' + ip -6 addr add "${V6_SIT}" dev "${SIT_DEV_NAME}" \ + || fail 'failed to setup v6 SIT address' + ip -4 addr add "${V4_SIT}" dev "${SIT_DEV_NAME}" \ + || fail 'failed to setup v4 SIT address' + fi + + sleep 2 # avoid race causing bind to fail +} + +cleanup() { + if [[ -n "${V6_SIT}" ]]; then + ip -4 addr del "${V4_SIT}" dev "${SIT_DEV_NAME}" + ip -6 addr del "${V6_SIT}" dev "${SIT_DEV_NAME}" + ip link del "${SIT_DEV_NAME}" + fi + + ip -4 addr del "${V4_INNER}" dev lo + ip -6 addr del "${V6_INNER}" dev lo +} + +trap cleanup EXIT + +setup +"$@" +exit "$?" diff --git a/tools/testing/selftests/bpf/with_tunnels.sh b/tools/testing/selftests/bpf/with_tunnels.sh new file mode 100755 index 000000000000..e24949ed3a20 --- /dev/null +++ b/tools/testing/selftests/bpf/with_tunnels.sh @@ -0,0 +1,36 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# setup tunnels for flow dissection test + +readonly SUFFIX="test_$(mktemp -u XXXX)" +CONFIG="remote 127.0.0.2 local 127.0.0.1 dev lo" + +setup() { + ip link add "ipip_${SUFFIX}" type ipip ${CONFIG} + ip link add "gre_${SUFFIX}" type gre ${CONFIG} + ip link add "sit_${SUFFIX}" type sit ${CONFIG} + + echo "tunnels before test:" + ip tunnel show + + ip link set "ipip_${SUFFIX}" up + ip link set "gre_${SUFFIX}" up + ip link set "sit_${SUFFIX}" up +} + + +cleanup() { + ip tunnel del "ipip_${SUFFIX}" + ip tunnel del "gre_${SUFFIX}" + ip tunnel del "sit_${SUFFIX}" + + echo "tunnels after test:" + ip tunnel show +} + +trap cleanup EXIT + +setup +"$@" +exit "$?" diff --git a/tools/testing/selftests/bpf/xdp_dummy.c b/tools/testing/selftests/bpf/xdp_dummy.c new file mode 100644 index 000000000000..43b0ef1001ed --- /dev/null +++ b/tools/testing/selftests/bpf/xdp_dummy.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define KBUILD_MODNAME "xdp_dummy" +#include <linux/bpf.h> +#include "bpf_helpers.h" + +SEC("xdp_dummy") +int xdp_dummy_prog(struct xdp_md *ctx) +{ + return XDP_PASS; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh index bab13dd025a6..0d26b5e3f966 100755 --- a/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh +++ b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh @@ -37,6 +37,10 @@ prerequisite() exit $ksft_skip fi + present_cpus=`cat $SYSFS/devices/system/cpu/present` + present_max=${present_cpus##*-} + echo "present_cpus = $present_cpus present_max = $present_max" + echo -e "\t Cpus in online state: $online_cpus" offline_cpus=`cat $SYSFS/devices/system/cpu/offline` @@ -151,6 +155,8 @@ online_cpus=0 online_max=0 offline_cpus=0 offline_max=0 +present_cpus=0 +present_max=0 while getopts e:ahp: opt; do case $opt in @@ -190,9 +196,10 @@ if [ $allcpus -eq 0 ]; then online_cpu_expect_success $online_max if [[ $offline_cpus -gt 0 ]]; then - echo -e "\t offline to online to offline: cpu $offline_max" - online_cpu_expect_success $offline_max - offline_cpu_expect_success $offline_max + echo -e "\t offline to online to offline: cpu $present_max" + online_cpu_expect_success $present_max + offline_cpu_expect_success $present_max + online_cpu $present_max fi exit 0 else diff --git a/tools/testing/selftests/drivers/dma-buf/Makefile b/tools/testing/selftests/drivers/dma-buf/Makefile new file mode 100644 index 000000000000..f22c3f7cf612 --- /dev/null +++ b/tools/testing/selftests/drivers/dma-buf/Makefile @@ -0,0 +1,7 @@ +CFLAGS += -I../../../../../usr/include/ + +TEST_GEN_PROGS := udmabuf + +top_srcdir ?=../../../../.. + +include ../../lib.mk diff --git a/tools/testing/selftests/drivers/dma-buf/udmabuf.c b/tools/testing/selftests/drivers/dma-buf/udmabuf.c new file mode 100644 index 000000000000..4de902ea14d8 --- /dev/null +++ b/tools/testing/selftests/drivers/dma-buf/udmabuf.c @@ -0,0 +1,103 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <string.h> +#include <errno.h> +#include <linux/fcntl.h> +#include <malloc.h> + +#include <sys/ioctl.h> +#include <sys/syscall.h> +#include <linux/memfd.h> +#include <linux/udmabuf.h> + +#define TEST_PREFIX "drivers/dma-buf/udmabuf" +#define NUM_PAGES 4 + +static int memfd_create(const char *name, unsigned int flags) +{ + return syscall(__NR_memfd_create, name, flags); +} + +int main(int argc, char *argv[]) +{ + struct udmabuf_create create; + int devfd, memfd, buf, ret; + off_t size; + void *mem; + + devfd = open("/dev/udmabuf", O_RDWR); + if (devfd < 0) { + printf("%s: [skip,no-udmabuf]\n", TEST_PREFIX); + exit(77); + } + + memfd = memfd_create("udmabuf-test", MFD_ALLOW_SEALING); + if (memfd < 0) { + printf("%s: [skip,no-memfd]\n", TEST_PREFIX); + exit(77); + } + + ret = fcntl(memfd, F_ADD_SEALS, F_SEAL_SHRINK); + if (ret < 0) { + printf("%s: [skip,fcntl-add-seals]\n", TEST_PREFIX); + exit(77); + } + + + size = getpagesize() * NUM_PAGES; + ret = ftruncate(memfd, size); + if (ret == -1) { + printf("%s: [FAIL,memfd-truncate]\n", TEST_PREFIX); + exit(1); + } + + memset(&create, 0, sizeof(create)); + + /* should fail (offset not page aligned) */ + create.memfd = memfd; + create.offset = getpagesize()/2; + create.size = getpagesize(); + buf = ioctl(devfd, UDMABUF_CREATE, &create); + if (buf >= 0) { + printf("%s: [FAIL,test-1]\n", TEST_PREFIX); + exit(1); + } + + /* should fail (size not multiple of page) */ + create.memfd = memfd; + create.offset = 0; + create.size = getpagesize()/2; + buf = ioctl(devfd, UDMABUF_CREATE, &create); + if (buf >= 0) { + printf("%s: [FAIL,test-2]\n", TEST_PREFIX); + exit(1); + } + + /* should fail (not memfd) */ + create.memfd = 0; /* stdin */ + create.offset = 0; + create.size = size; + buf = ioctl(devfd, UDMABUF_CREATE, &create); + if (buf >= 0) { + printf("%s: [FAIL,test-3]\n", TEST_PREFIX); + exit(1); + } + + /* should work */ + create.memfd = memfd; + create.offset = 0; + create.size = size; + buf = ioctl(devfd, UDMABUF_CREATE, &create); + if (buf < 0) { + printf("%s: [FAIL,test-4]\n", TEST_PREFIX); + exit(1); + } + + fprintf(stderr, "%s: ok\n", TEST_PREFIX); + close(buf); + close(memfd); + close(devfd); + return 0; +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/extack.sh b/tools/testing/selftests/drivers/net/mlxsw/extack.sh new file mode 100755 index 000000000000..d72d8488a3b2 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/extack.sh @@ -0,0 +1,145 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test operations that we expect to report extended ack. + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + netdev_pre_up_test + vxlan_vlan_add_test + port_vlan_add_test +" +NUM_NETIFS=2 +source $lib_dir/lib.sh + +setup_prepare() +{ + swp1=${NETIFS[p1]} + swp2=${NETIFS[p2]} + + ip link set dev $swp1 up + ip link set dev $swp2 up +} + +cleanup() +{ + pre_cleanup + + ip link set dev $swp2 down + ip link set dev $swp1 down +} + +netdev_pre_up_test() +{ + RET=0 + + ip link add name br1 up type bridge vlan_filtering 0 mcast_snooping 0 + ip link add name vx1 up type vxlan id 1000 \ + local 192.0.2.17 remote 192.0.2.18 \ + dstport 4789 nolearning noudpcsum tos inherit ttl 100 + + ip link set dev vx1 master br1 + check_err $? + + ip link set dev $swp1 master br1 + check_err $? + + ip link add name br2 up type bridge vlan_filtering 0 mcast_snooping 0 + ip link add name vx2 up type vxlan id 2000 \ + local 192.0.2.17 remote 192.0.2.18 \ + dstport 4789 nolearning noudpcsum tos inherit ttl 100 + + ip link set dev vx2 master br2 + check_err $? + + ip link set dev $swp2 master br2 + check_err $? + + # Unsupported configuration: mlxsw demands that all offloaded VXLAN + # devices have the same TTL. + ip link set dev vx2 down + ip link set dev vx2 type vxlan ttl 200 + + ip link set dev vx2 up &>/dev/null + check_fail $? + + ip link set dev vx2 up 2>&1 >/dev/null | grep -q mlxsw_spectrum + check_err $? + + log_test "extack - NETDEV_PRE_UP" + + ip link del dev vx2 + ip link del dev br2 + + ip link del dev vx1 + ip link del dev br1 +} + +vxlan_vlan_add_test() +{ + RET=0 + + ip link add name br1 up type bridge vlan_filtering 1 mcast_snooping 0 + + # Unsupported configuration: mlxsw demands VXLAN with "noudpcsum". + ip link add name vx1 up type vxlan id 1000 \ + local 192.0.2.17 remote 192.0.2.18 \ + dstport 4789 tos inherit ttl 100 + + ip link set dev vx1 master br1 + check_err $? + + bridge vlan add dev vx1 vid 1 + check_err $? + + ip link set dev $swp1 master br1 + check_err $? + + bridge vlan add dev vx1 vid 1 pvid untagged 2>&1 >/dev/null \ + | grep -q mlxsw_spectrum + check_err $? + + log_test "extack - map VLAN at VXLAN device" + + ip link del dev vx1 + ip link del dev br1 +} + +port_vlan_add_test() +{ + RET=0 + + ip link add name br1 up type bridge vlan_filtering 1 mcast_snooping 0 + + # Unsupported configuration: mlxsw demands VXLAN with "noudpcsum". + ip link add name vx1 up type vxlan id 1000 \ + local 192.0.2.17 remote 192.0.2.18 \ + dstport 4789 tos inherit ttl 100 + + ip link set dev $swp1 master br1 + check_err $? + + bridge vlan del dev $swp1 vid 1 + + ip link set dev vx1 master br1 + check_err $? + + bridge vlan add dev $swp1 vid 1 pvid untagged 2>&1 >/dev/null \ + | grep -q mlxsw_spectrum + check_err $? + + log_test "extack - map VLAN at port" + + ip link del dev vx1 + ip link del dev br1 +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/one_armed_router.sh b/tools/testing/selftests/drivers/net/mlxsw/one_armed_router.sh new file mode 100755 index 000000000000..f02d83e94576 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/one_armed_router.sh @@ -0,0 +1,259 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test a "one-armed router" [1] scenario. Packets forwarded between H1 and H2 +# should be forwarded by the ASIC, but also trapped so that ICMP redirect +# packets could be potentially generated. +# +# 1. https://en.wikipedia.org/wiki/One-armed_router +# +# +---------------------------------+ +# | H1 (vrf) | +# | + $h1 | +# | | 192.0.2.1/24 | +# | | 2001:db8:1::1/64 | +# | | | +# | | default via 192.0.2.2 | +# | | default via 2001:db8:1::2 | +# +----|----------------------------+ +# | +# +----|----------------------------------------------------------------------+ +# | SW | | +# | +--|--------------------------------------------------------------------+ | +# | | + $swp1 BR0 (802.1d) | | +# | | | | +# | | 192.0.2.2/24 | | +# | | 2001:db8:1::2/64 | | +# | | 198.51.100.2/24 | | +# | | 2001:db8:2::2/64 | | +# | | | | +# | | + $swp2 | | +# | +--|--------------------------------------------------------------------+ | +# | | | +# +----|----------------------------------------------------------------------+ +# | +# +----|----------------------------+ +# | | default via 198.51.100.2 | +# | | default via 2001:db8:2::2 | +# | | | +# | | 2001:db8:2::1/64 | +# | | 198.51.100.1/24 | +# | + $h2 | +# | H2 (vrf) | +# +---------------------------------+ + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS="ping_ipv4 ping_ipv6 fwd_mark_ipv4 fwd_mark_ipv6" +NUM_NETIFS=4 +source $lib_dir/tc_common.sh +source $lib_dir/lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64 + + ip -4 route add default vrf v$h1 nexthop via 192.0.2.2 + ip -6 route add default vrf v$h1 nexthop via 2001:db8:1::2 +} + +h1_destroy() +{ + ip -6 route del default vrf v$h1 nexthop via 2001:db8:1::2 + ip -4 route del default vrf v$h1 nexthop via 192.0.2.2 + + simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64 +} + +h2_create() +{ + simple_if_init $h2 198.51.100.1/24 2001:db8:2::1/64 + + ip -4 route add default vrf v$h2 nexthop via 198.51.100.2 + ip -6 route add default vrf v$h2 nexthop via 2001:db8:2::2 +} + +h2_destroy() +{ + ip -6 route del default vrf v$h2 nexthop via 2001:db8:2::2 + ip -4 route del default vrf v$h2 nexthop via 198.51.100.2 + + simple_if_fini $h2 198.51.100.1/24 2001:db8:2::1/64 +} + +switch_create() +{ + ip link add name br0 type bridge mcast_snooping 0 + ip link set dev br0 up + + ip link set dev $swp1 master br0 + ip link set dev $swp1 up + ip link set dev $swp2 master br0 + ip link set dev $swp2 up + + tc qdisc add dev $swp1 clsact + tc qdisc add dev $swp2 clsact + + __addr_add_del br0 add 192.0.2.2/24 2001:db8:1::2/64 + __addr_add_del br0 add 198.51.100.2/24 2001:db8:2::2/64 +} + +switch_destroy() +{ + __addr_add_del br0 del 198.51.100.2/24 2001:db8:2::2/64 + __addr_add_del br0 del 192.0.2.2/24 2001:db8:1::2/64 + + tc qdisc del dev $swp2 clsact + tc qdisc del dev $swp1 clsact + + ip link set dev $swp2 down + ip link set dev $swp2 nomaster + ip link set dev $swp1 down + ip link set dev $swp1 nomaster + + ip link set dev br0 down + ip link del dev br0 +} + +ping_ipv4() +{ + ping_test $h1 198.51.100.1 ": h1->h2" +} + +ping_ipv6() +{ + ping6_test $h1 2001:db8:2::1 ": h1->h2" +} + +fwd_mark_ipv4() +{ + # Transmit packets from H1 to H2 and make sure they are trapped at + # swp1 due to loopback error, but only forwarded by the ASIC through + # swp2 + + tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \ + skip_hw dst_ip 198.51.100.1 ip_proto udp dst_port 52768 \ + action pass + + tc filter add dev $swp2 egress protocol ip pref 1 handle 101 flower \ + skip_hw dst_ip 198.51.100.1 ip_proto udp dst_port 52768 \ + action pass + + tc filter add dev $swp2 egress protocol ip pref 2 handle 102 flower \ + skip_sw dst_ip 198.51.100.1 ip_proto udp dst_port 52768 \ + action pass + + ip vrf exec v$h1 $MZ $h1 -c 10 -d 100msec -p 64 -A 192.0.2.1 \ + -B 198.51.100.1 -t udp dp=52768,sp=42768 -q + + RET=0 + + tc_check_packets "dev $swp1 ingress" 101 10 + check_err $? + + log_test "fwd mark: trapping IPv4 packets due to LBERROR" + + RET=0 + + tc_check_packets "dev $swp2 egress" 101 0 + check_err $? + + log_test "fwd mark: forwarding IPv4 packets in software" + + RET=0 + + tc_check_packets "dev $swp2 egress" 102 10 + check_err $? + + log_test "fwd mark: forwarding IPv4 packets in hardware" + + tc filter del dev $swp2 egress protocol ip pref 2 handle 102 flower + tc filter del dev $swp2 egress protocol ip pref 1 handle 101 flower + tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower +} + +fwd_mark_ipv6() +{ + tc filter add dev $swp1 ingress protocol ipv6 pref 1 handle 101 flower \ + skip_hw dst_ip 2001:db8:2::1 ip_proto udp dst_port 52768 \ + action pass + + tc filter add dev $swp2 egress protocol ipv6 pref 1 handle 101 flower \ + skip_hw dst_ip 2001:db8:2::1 ip_proto udp dst_port 52768 \ + action pass + + tc filter add dev $swp2 egress protocol ipv6 pref 2 handle 102 flower \ + skip_sw dst_ip 2001:db8:2::1 ip_proto udp dst_port 52768 \ + action pass + + ip vrf exec v$h1 $MZ $h1 -6 -c 10 -d 100msec -p 64 -A 2001:db8:1::1 \ + -B 2001:db8:2::1 -t udp dp=52768,sp=42768 -q + + RET=0 + + tc_check_packets "dev $swp1 ingress" 101 10 + check_err $? + + log_test "fwd mark: trapping IPv6 packets due to LBERROR" + + RET=0 + + tc_check_packets "dev $swp2 egress" 101 0 + check_err $? + + log_test "fwd mark: forwarding IPv6 packets in software" + + RET=0 + + tc_check_packets "dev $swp2 egress" 102 10 + check_err $? + + log_test "fwd mark: forwarding IPv6 packets in hardware" + + tc filter del dev $swp2 egress protocol ipv6 pref 2 handle 102 flower + tc filter del dev $swp2 egress protocol ipv6 pref 1 handle 101 flower + tc filter del dev $swp1 ingress protocol ipv6 pref 1 handle 101 flower +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + forwarding_enable + + sysctl_set net.ipv4.conf.all.accept_redirects 0 + sysctl_set net.ipv6.conf.all.accept_redirects 0 + + h1_create + h2_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h2_destroy + h1_destroy + + sysctl_restore net.ipv6.conf.all.accept_redirects + sysctl_restore net.ipv4.conf.all.accept_redirects + + forwarding_restore + vrf_cleanup +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh new file mode 100755 index 000000000000..117f6f35d72f --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh @@ -0,0 +1,394 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# A test for switch behavior under MC overload. An issue in Spectrum chips +# causes throughput of UC traffic to drop severely when a switch is under heavy +# MC load. This issue can be overcome by putting the switch to MC-aware mode. +# This test verifies that UC performance stays intact even as the switch is +# under MC flood, and therefore that the MC-aware mode is enabled and correctly +# configured. +# +# Because mlxsw throttles CPU port, the traffic can't actually reach userspace +# at full speed. That makes it impossible to use iperf3 to simply measure the +# throughput, because many packets (that reach $h3) don't get to the kernel at +# all even in UDP mode (the situation is even worse in TCP mode, where one can't +# hope to see more than a couple Mbps). +# +# So instead we send traffic with mausezahn and use RX ethtool counters at $h3. +# Multicast traffic is untagged, unicast traffic is tagged with PCP 1. Therefore +# each gets a different priority and we can use per-prio ethtool counters to +# measure the throughput. In order to avoid prioritizing unicast traffic, prio +# qdisc is installed on $swp3 and maps all priorities to the same band #7 (and +# thus TC 0). +# +# Mausezahn can't actually saturate the links unless it's using large frames. +# Thus we set MTU to 10K on all involved interfaces. Then both unicast and +# multicast traffic uses 8K frames. +# +# +---------------------------+ +----------------------------------+ +# | H1 | | H2 | +# | | | unicast --> + $h2.111 | +# | multicast | | traffic | 192.0.2.129/28 | +# | traffic | | | e-qos-map 0:1 | +# | $h1 + <----- | | | | +# | 192.0.2.65/28 | | | + $h2 | +# +---------------|-----------+ +--------------|-------------------+ +# | | +# +---------------|---------------------------------------|-------------------+ +# | $swp1 + + $swp2 | +# | >1Gbps | | >1Gbps | +# | +-------------|------+ +----------|----------------+ | +# | | $swp1.1 + | | + $swp2.111 | | +# | | BR1 | SW | BR111 | | +# | | $swp3.1 + | | + $swp3.111 | | +# | +-------------|------+ +----------|----------------+ | +# | \_______________________________________/ | +# | | | +# | + $swp3 | +# | | 1Gbps bottleneck | +# | | prio qdisc: {0..7} -> 7 | +# +------------------------------------|--------------------------------------+ +# | +# +--|-----------------+ +# | + $h3 H3 | +# | | 192.0.2.66/28 | +# | | | +# | + $h3.111 | +# | 192.0.2.130/28 | +# +--------------------+ + +ALL_TESTS=" + ping_ipv4 + test_mc_aware + test_uc_aware +" + +lib_dir=$(dirname $0)/../../../net/forwarding + +NUM_NETIFS=6 +source $lib_dir/lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.65/28 + mtu_set $h1 10000 +} + +h1_destroy() +{ + mtu_restore $h1 + simple_if_fini $h1 192.0.2.65/28 +} + +h2_create() +{ + simple_if_init $h2 + mtu_set $h2 10000 + + vlan_create $h2 111 v$h2 192.0.2.129/28 + ip link set dev $h2.111 type vlan egress-qos-map 0:1 +} + +h2_destroy() +{ + vlan_destroy $h2 111 + + mtu_restore $h2 + simple_if_fini $h2 +} + +h3_create() +{ + simple_if_init $h3 192.0.2.66/28 + mtu_set $h3 10000 + + vlan_create $h3 111 v$h3 192.0.2.130/28 +} + +h3_destroy() +{ + vlan_destroy $h3 111 + + mtu_restore $h3 + simple_if_fini $h3 192.0.2.66/28 +} + +switch_create() +{ + ip link set dev $swp1 up + mtu_set $swp1 10000 + + ip link set dev $swp2 up + mtu_set $swp2 10000 + + ip link set dev $swp3 up + mtu_set $swp3 10000 + + vlan_create $swp2 111 + vlan_create $swp3 111 + + ethtool -s $swp3 speed 1000 autoneg off + tc qdisc replace dev $swp3 root handle 3: \ + prio bands 8 priomap 7 7 7 7 7 7 7 7 + + ip link add name br1 type bridge vlan_filtering 0 + ip link set dev br1 up + ip link set dev $swp1 master br1 + ip link set dev $swp3 master br1 + + ip link add name br111 type bridge vlan_filtering 0 + ip link set dev br111 up + ip link set dev $swp2.111 master br111 + ip link set dev $swp3.111 master br111 +} + +switch_destroy() +{ + ip link del dev br111 + ip link del dev br1 + + tc qdisc del dev $swp3 root handle 3: + ethtool -s $swp3 autoneg on + + vlan_destroy $swp3 111 + vlan_destroy $swp2 111 + + mtu_restore $swp3 + ip link set dev $swp3 down + + mtu_restore $swp2 + ip link set dev $swp2 down + + mtu_restore $swp1 + ip link set dev $swp1 down +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + h3mac=$(mac_get $h3) + + vrf_prepare + + h1_create + h2_create + h3_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h3_destroy + h2_destroy + h1_destroy + + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h2 192.0.2.130 +} + +humanize() +{ + local speed=$1; shift + + for unit in bps Kbps Mbps Gbps; do + if (($(echo "$speed < 1024" | bc))); then + break + fi + + speed=$(echo "scale=1; $speed / 1024" | bc) + done + + echo "$speed${unit}" +} + +rate() +{ + local t0=$1; shift + local t1=$1; shift + local interval=$1; shift + + echo $((8 * (t1 - t0) / interval)) +} + +check_rate() +{ + local rate=$1; shift + local min=$1; shift + local what=$1; shift + + if ((rate > min)); then + return 0 + fi + + echo "$what $(humanize $ir) < $(humanize $min_ingress)" > /dev/stderr + return 1 +} + +measure_uc_rate() +{ + local what=$1; shift + + local interval=10 + local i + local ret=0 + + # Dips in performance might cause momentary ingress rate to drop below + # 1Gbps. That wouldn't saturate egress and MC would thus get through, + # seemingly winning bandwidth on account of UC. Demand at least 2Gbps + # average ingress rate to somewhat mitigate this. + local min_ingress=2147483648 + + $MZ $h2.111 -p 8000 -A 192.0.2.129 -B 192.0.2.130 -c 0 \ + -a own -b $h3mac -t udp -q & + sleep 1 + + for i in {5..0}; do + local t0=$(ethtool_stats_get $h3 rx_octets_prio_1) + local u0=$(ethtool_stats_get $swp2 rx_octets_prio_1) + sleep $interval + local t1=$(ethtool_stats_get $h3 rx_octets_prio_1) + local u1=$(ethtool_stats_get $swp2 rx_octets_prio_1) + + local ir=$(rate $u0 $u1 $interval) + local er=$(rate $t0 $t1 $interval) + + if check_rate $ir $min_ingress "$what ingress rate"; then + break + fi + + # Fail the test if we can't get the throughput. + if ((i == 0)); then + ret=1 + fi + done + + # Suppress noise from killing mausezahn. + { kill %% && wait; } 2>/dev/null + + echo $ir $er + exit $ret +} + +test_mc_aware() +{ + RET=0 + + local -a uc_rate + uc_rate=($(measure_uc_rate "UC-only")) + check_err $? "Could not get high enough UC-only ingress rate" + local ucth1=${uc_rate[1]} + + $MZ $h1 -p 8000 -c 0 -a own -b bc -t udp -q & + + local d0=$(date +%s) + local t0=$(ethtool_stats_get $h3 rx_octets_prio_0) + local u0=$(ethtool_stats_get $swp1 rx_octets_prio_0) + + local -a uc_rate_2 + uc_rate_2=($(measure_uc_rate "UC+MC")) + check_err $? "Could not get high enough UC+MC ingress rate" + local ucth2=${uc_rate_2[1]} + + local d1=$(date +%s) + local t1=$(ethtool_stats_get $h3 rx_octets_prio_0) + local u1=$(ethtool_stats_get $swp1 rx_octets_prio_0) + + local deg=$(bc <<< " + scale=2 + ret = 100 * ($ucth1 - $ucth2) / $ucth1 + if (ret > 0) { ret } else { 0 } + ") + check_err $(bc <<< "$deg > 25") + + local interval=$((d1 - d0)) + local mc_ir=$(rate $u0 $u1 $interval) + local mc_er=$(rate $t0 $t1 $interval) + + # Suppress noise from killing mausezahn. + { kill %% && wait; } 2>/dev/null + + log_test "UC performace under MC overload" + + echo "UC-only throughput $(humanize $ucth1)" + echo "UC+MC throughput $(humanize $ucth2)" + echo "Degradation $deg %" + echo + echo "Full report:" + echo " UC only:" + echo " ingress UC throughput $(humanize ${uc_rate[0]})" + echo " egress UC throughput $(humanize ${uc_rate[1]})" + echo " UC+MC:" + echo " ingress UC throughput $(humanize ${uc_rate_2[0]})" + echo " egress UC throughput $(humanize ${uc_rate_2[1]})" + echo " ingress MC throughput $(humanize $mc_ir)" + echo " egress MC throughput $(humanize $mc_er)" + echo +} + +test_uc_aware() +{ + RET=0 + + $MZ $h2.111 -p 8000 -A 192.0.2.129 -B 192.0.2.130 -c 0 \ + -a own -b $h3mac -t udp -q & + + local d0=$(date +%s) + local t0=$(ethtool_stats_get $h3 rx_octets_prio_1) + local u0=$(ethtool_stats_get $swp2 rx_octets_prio_1) + sleep 1 + + local attempts=50 + local passes=0 + local i + + for ((i = 0; i < attempts; ++i)); do + if $ARPING -c 1 -I $h1 -b 192.0.2.66 -q -w 0.1; then + ((passes++)) + fi + + sleep 0.1 + done + + local d1=$(date +%s) + local t1=$(ethtool_stats_get $h3 rx_octets_prio_1) + local u1=$(ethtool_stats_get $swp2 rx_octets_prio_1) + + local interval=$((d1 - d0)) + local uc_ir=$(rate $u0 $u1 $interval) + local uc_er=$(rate $t0 $t1 $interval) + + ((attempts == passes)) + check_err $? + + # Suppress noise from killing mausezahn. + { kill %% && wait; } 2>/dev/null + + log_test "MC performace under UC overload" + echo " ingress UC throughput $(humanize ${uc_ir})" + echo " egress UC throughput $(humanize ${uc_er})" + echo " sent $attempts BC ARPs, got $passes responses" +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh new file mode 100755 index 000000000000..c4cf6e6d800e --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh @@ -0,0 +1,585 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test various interface configuration scenarios. Observe that configurations +# deemed valid by mlxsw succeed, invalid configurations fail and that no traces +# are produced. To prevent the test from passing in case traces are produced, +# the user can set the 'kernel.panic_on_warn' and 'kernel.panic_on_oops' +# sysctls in its environment. + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + rif_set_addr_test + rif_inherit_bridge_addr_test + rif_non_inherit_bridge_addr_test + vlan_interface_deletion_test + bridge_deletion_test + bridge_vlan_flags_test + vlan_1_test + lag_bridge_upper_test + duplicate_vlans_test + vlan_rif_refcount_test + subport_rif_refcount_test + vlan_dev_deletion_test + lag_unlink_slaves_test + lag_dev_deletion_test + vlan_interface_uppers_test + bridge_extern_learn_test + devlink_reload_test +" +NUM_NETIFS=2 +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh + +setup_prepare() +{ + swp1=${NETIFS[p1]} + swp2=${NETIFS[p2]} + + ip link set dev $swp1 up + ip link set dev $swp2 up +} + +cleanup() +{ + pre_cleanup + + ip link set dev $swp2 down + ip link set dev $swp1 down +} + +rif_set_addr_test() +{ + local swp1_mac=$(mac_get $swp1) + local swp2_mac=$(mac_get $swp2) + + RET=0 + + # $swp1 and $swp2 likely got their IPv6 local addresses already, but + # here we need to test the transition to RIF. + ip addr flush dev $swp1 + ip addr flush dev $swp2 + sleep .1 + + ip addr add dev $swp1 192.0.2.1/28 + check_err $? + + ip link set dev $swp1 addr 00:11:22:33:44:55 + check_err $? + + # IP address enablement should be rejected if the MAC address prefix + # doesn't match other RIFs. + ip addr add dev $swp2 192.0.2.2/28 &>/dev/null + check_fail $? "IP address addition passed for a device with a wrong MAC" + ip addr add dev $swp2 192.0.2.2/28 2>&1 >/dev/null \ + | grep -q mlxsw_spectrum + check_err $? "no extack for IP address addition" + + ip link set dev $swp2 addr 00:11:22:33:44:66 + check_err $? + ip addr add dev $swp2 192.0.2.2/28 &>/dev/null + check_err $? + + # Change of MAC address of a RIF should be forbidden if the new MAC + # doesn't share the prefix with other MAC addresses. + ip link set dev $swp2 addr 00:11:22:33:00:66 &>/dev/null + check_fail $? "change of MAC address passed for a wrong MAC" + ip link set dev $swp2 addr 00:11:22:33:00:66 2>&1 >/dev/null \ + | grep -q mlxsw_spectrum + check_err $? "no extack for MAC address change" + + log_test "RIF - bad MAC change" + + ip addr del dev $swp2 192.0.2.2/28 + ip addr del dev $swp1 192.0.2.1/28 + + ip link set dev $swp2 addr $swp2_mac + ip link set dev $swp1 addr $swp1_mac +} + +rif_inherit_bridge_addr_test() +{ + RET=0 + + # Create first RIF + ip addr add dev $swp1 192.0.2.1/28 + check_err $? + + # Create a FID RIF + ip link add name br1 up type bridge vlan_filtering 0 + ip link set dev $swp2 master br1 + ip addr add dev br1 192.0.2.17/28 + check_err $? + + # Prepare a device with a low MAC address + ip link add name d up type dummy + ip link set dev d addr 00:11:22:33:44:55 + + # Attach the device to br1. That prompts bridge address change, which + # should be vetoed, thus preventing the attachment. + ip link set dev d master br1 &>/dev/null + check_fail $? "Device with low MAC was permitted to attach a bridge with RIF" + ip link set dev d master br1 2>&1 >/dev/null \ + | grep -q mlxsw_spectrum + check_err $? "no extack for bridge attach rejection" + + ip link set dev $swp2 addr 00:11:22:33:44:55 &>/dev/null + check_fail $? "Changing swp2's MAC address permitted" + ip link set dev $swp2 addr 00:11:22:33:44:55 2>&1 >/dev/null \ + | grep -q mlxsw_spectrum + check_err $? "no extack for bridge port MAC address change rejection" + + log_test "RIF - attach port with bad MAC to bridge" + + ip link del dev d + ip link del dev br1 + ip addr del dev $swp1 192.0.2.1/28 +} + +rif_non_inherit_bridge_addr_test() +{ + local swp2_mac=$(mac_get $swp2) + + RET=0 + + # Create first RIF + ip addr add dev $swp1 192.0.2.1/28 + check_err $? + + # Create a FID RIF + ip link add name br1 up type bridge vlan_filtering 0 + ip link set dev br1 addr $swp2_mac + ip link set dev $swp2 master br1 + ip addr add dev br1 192.0.2.17/28 + check_err $? + + # Prepare a device with a low MAC address + ip link add name d up type dummy + ip link set dev d addr 00:11:22:33:44:55 + + # Attach the device to br1. Since the bridge address was set, it should + # work. + ip link set dev d master br1 &>/dev/null + check_err $? "Could not attach a device with low MAC to a bridge with RIF" + + # Port MAC address change should be allowed for a bridge with set MAC. + ip link set dev $swp2 addr 00:11:22:33:44:55 + check_err $? "Changing swp2's MAC address not permitted" + + log_test "RIF - attach port with bad MAC to bridge with set MAC" + + ip link set dev $swp2 addr $swp2_mac + ip link del dev d + ip link del dev br1 + ip addr del dev $swp1 192.0.2.1/28 +} + +vlan_interface_deletion_test() +{ + # Test that when a VLAN interface is deleted, its associated router + # interface (RIF) is correctly deleted and not leaked. See commit + # c360867ec46a ("mlxsw: spectrum: Delete RIF when VLAN device is + # removed") for more details + RET=0 + + ip link add name br0 type bridge vlan_filtering 1 + ip link set dev $swp1 master br0 + + ip link add link br0 name br0.10 type vlan id 10 + ip -6 address add 2001:db8:1::1/64 dev br0.10 + ip link del dev br0.10 + + # If we leaked the previous RIF, then this should produce a trace + ip link add link br0 name br0.20 type vlan id 20 + ip -6 address add 2001:db8:1::1/64 dev br0.20 + ip link del dev br0.20 + + log_test "vlan interface deletion" + + ip link del dev br0 +} + +bridge_deletion_test() +{ + # Test that when a bridge with VLAN interfaces is deleted, we correctly + # delete the associated RIFs. See commit 602b74eda813 ("mlxsw: + # spectrum_switchdev: Do not leak RIFs when removing bridge") for more + # details + RET=0 + + ip link add name br0 type bridge vlan_filtering 1 + ip link set dev $swp1 master br0 + ip -6 address add 2001:db8::1/64 dev br0 + + ip link add link br0 name br0.10 type vlan id 10 + ip -6 address add 2001:db8:1::1/64 dev br0.10 + + ip link add link br0 name br0.20 type vlan id 20 + ip -6 address add 2001:db8:2::1/64 dev br0.20 + + ip link del dev br0 + + # If we leaked previous RIFs, then this should produce a trace + ip -6 address add 2001:db8:1::1/64 dev $swp1 + ip -6 address del 2001:db8:1::1/64 dev $swp1 + + log_test "bridge deletion" +} + +bridge_vlan_flags_test() +{ + # Test that when bridge VLAN flags are toggled, we do not take + # unnecessary references on related structs. See commit 9e25826ffc94 + # ("mlxsw: spectrum_switchdev: Fix port_vlan refcounting") for more + # details + RET=0 + + ip link add name br0 type bridge vlan_filtering 1 + ip link set dev $swp1 master br0 + + bridge vlan add vid 10 dev $swp1 pvid untagged + bridge vlan add vid 10 dev $swp1 untagged + bridge vlan add vid 10 dev $swp1 pvid + bridge vlan add vid 10 dev $swp1 + ip link del dev br0 + + # If we did not handle references correctly, then this should produce a + # trace + devlink dev reload "$DEVLINK_DEV" + + # Allow netdevices to be re-created following the reload + sleep 20 + + log_test "bridge vlan flags" +} + +vlan_1_test() +{ + # Test that VLAN 1 can be configured over mlxsw ports. In the past it + # was used internally for untagged traffic. See commit 47bf9df2e820 + # ("mlxsw: spectrum: Forbid creation of VLAN 1 over port/LAG") for more + # details + RET=0 + + ip link add link $swp1 name $swp1.1 type vlan id 1 + check_err $? "did not manage to create vlan 1 when should" + + log_test "vlan 1" + + ip link del dev $swp1.1 +} + +lag_bridge_upper_test() +{ + # Test that ports cannot be enslaved to LAG devices that have uppers + # and that failure is handled gracefully. See commit b3529af6bb0d + # ("spectrum: Reference count VLAN entries") for more details + RET=0 + + ip link add name bond1 type bond mode 802.3ad + + ip link add name br0 type bridge vlan_filtering 1 + ip link set dev bond1 master br0 + + ip link set dev $swp1 down + ip link set dev $swp1 master bond1 &> /dev/null + check_fail $? "managed to enslave port to lag when should not" + + # This might generate a trace, if we did not handle the failure + # correctly + ip -6 address add 2001:db8:1::1/64 dev $swp1 + ip -6 address del 2001:db8:1::1/64 dev $swp1 + + log_test "lag with bridge upper" + + ip link del dev br0 + ip link del dev bond1 +} + +duplicate_vlans_test() +{ + # Test that on a given port a VLAN is only used once. Either as VLAN + # in a VLAN-aware bridge or as a VLAN device + RET=0 + + ip link add name br0 type bridge vlan_filtering 1 + ip link set dev $swp1 master br0 + bridge vlan add vid 10 dev $swp1 + + ip link add link $swp1 name $swp1.10 type vlan id 10 &> /dev/null + check_fail $? "managed to create vlan device when should not" + + bridge vlan del vid 10 dev $swp1 + ip link add link $swp1 name $swp1.10 type vlan id 10 + check_err $? "did not manage to create vlan device when should" + bridge vlan add vid 10 dev $swp1 &> /dev/null + check_fail $? "managed to add bridge vlan when should not" + + log_test "duplicate vlans" + + ip link del dev $swp1.10 + ip link del dev br0 +} + +vlan_rif_refcount_test() +{ + # Test that RIFs representing VLAN interfaces are not affected from + # ports member in the VLAN. We use the offload indication on routes + # configured on the RIF to understand if it was created / destroyed + RET=0 + + ip link add name br0 type bridge vlan_filtering 1 + ip link set dev $swp1 master br0 + + ip link set dev $swp1 up + ip link set dev br0 up + + ip link add link br0 name br0.10 up type vlan id 10 + ip -6 address add 2001:db8:1::1/64 dev br0.10 + + ip -6 route get fibmatch 2001:db8:1::2 dev br0.10 | grep -q offload + check_err $? "vlan rif was not created before adding port to vlan" + + bridge vlan add vid 10 dev $swp1 + ip -6 route get fibmatch 2001:db8:1::2 dev br0.10 | grep -q offload + check_err $? "vlan rif was destroyed after adding port to vlan" + + bridge vlan del vid 10 dev $swp1 + ip -6 route get fibmatch 2001:db8:1::2 dev br0.10 | grep -q offload + check_err $? "vlan rif was destroyed after removing port from vlan" + + ip link set dev $swp1 nomaster + ip -6 route get fibmatch 2001:db8:1::2 dev br0.10 | grep -q offload + check_fail $? "vlan rif was not destroyed after unlinking port from bridge" + + log_test "vlan rif refcount" + + ip link del dev br0.10 + ip link set dev $swp1 down + ip link del dev br0 +} + +subport_rif_refcount_test() +{ + # Test that RIFs representing upper devices of physical ports are + # reference counted correctly and destroyed when should. We use the + # offload indication on routes configured on the RIF to understand if + # it was created / destroyed + RET=0 + + ip link add name bond1 type bond mode 802.3ad + ip link set dev $swp1 down + ip link set dev $swp2 down + ip link set dev $swp1 master bond1 + ip link set dev $swp2 master bond1 + + ip link set dev bond1 up + ip link add link bond1 name bond1.10 up type vlan id 10 + ip -6 address add 2001:db8:1::1/64 dev bond1 + ip -6 address add 2001:db8:2::1/64 dev bond1.10 + + ip -6 route get fibmatch 2001:db8:1::2 dev bond1 | grep -q offload + check_err $? "subport rif was not created on lag device" + ip -6 route get fibmatch 2001:db8:2::2 dev bond1.10 | grep -q offload + check_err $? "subport rif was not created on vlan device" + + ip link set dev $swp1 nomaster + ip -6 route get fibmatch 2001:db8:1::2 dev bond1 | grep -q offload + check_err $? "subport rif of lag device was destroyed when should not" + ip -6 route get fibmatch 2001:db8:2::2 dev bond1.10 | grep -q offload + check_err $? "subport rif of vlan device was destroyed when should not" + + ip link set dev $swp2 nomaster + ip -6 route get fibmatch 2001:db8:1::2 dev bond1 | grep -q offload + check_fail $? "subport rif of lag device was not destroyed when should" + ip -6 route get fibmatch 2001:db8:2::2 dev bond1.10 | grep -q offload + check_fail $? "subport rif of vlan device was not destroyed when should" + + log_test "subport rif refcount" + + ip link del dev bond1.10 + ip link del dev bond1 +} + +vlan_dev_deletion_test() +{ + # Test that VLAN devices are correctly deleted / unlinked when enslaved + # to bridge + RET=0 + + ip link add name br10 type bridge + ip link add name br20 type bridge + ip link add name br30 type bridge + ip link add link $swp1 name $swp1.10 type vlan id 10 + ip link add link $swp1 name $swp1.20 type vlan id 20 + ip link add link $swp1 name $swp1.30 type vlan id 30 + ip link set dev $swp1.10 master br10 + ip link set dev $swp1.20 master br20 + ip link set dev $swp1.30 master br30 + + # If we did not handle the situation correctly, then these operations + # might produce a trace + ip link set dev $swp1.30 nomaster + ip link del dev $swp1.20 + # Deletion via ioctl uses different code paths from netlink + vconfig rem $swp1.10 &> /dev/null + + log_test "vlan device deletion" + + ip link del dev $swp1.30 + ip link del dev br30 + ip link del dev br20 + ip link del dev br10 +} + +lag_create() +{ + ip link add name bond1 type bond mode 802.3ad + ip link set dev $swp1 down + ip link set dev $swp2 down + ip link set dev $swp1 master bond1 + ip link set dev $swp2 master bond1 + + ip link add link bond1 name bond1.10 type vlan id 10 + ip link add link bond1 name bond1.20 type vlan id 20 + + ip link add name br0 type bridge vlan_filtering 1 + ip link set dev bond1 master br0 + + ip link add name br10 type bridge + ip link set dev bond1.10 master br10 + + ip link add name br20 type bridge + ip link set dev bond1.20 master br20 +} + +lag_unlink_slaves_test() +{ + # Test that ports are correctly unlinked from their LAG master, when + # the LAG and its VLAN uppers are enslaved to bridges + RET=0 + + lag_create + + ip link set dev $swp1 nomaster + check_err $? "lag slave $swp1 was not unlinked from master" + ip link set dev $swp2 nomaster + check_err $? "lag slave $swp2 was not unlinked from master" + + # Try to configure corresponding VLANs as router interfaces + ip -6 address add 2001:db8:1::1/64 dev $swp1 + check_err $? "failed to configure ip address on $swp1" + + ip link add link $swp1 name $swp1.10 type vlan id 10 + ip -6 address add 2001:db8:10::1/64 dev $swp1.10 + check_err $? "failed to configure ip address on $swp1.10" + + ip link add link $swp1 name $swp1.20 type vlan id 20 + ip -6 address add 2001:db8:20::1/64 dev $swp1.20 + check_err $? "failed to configure ip address on $swp1.20" + + log_test "lag slaves unlinking" + + ip link del dev $swp1.20 + ip link del dev $swp1.10 + ip address flush dev $swp1 + + ip link del dev br20 + ip link del dev br10 + ip link del dev br0 + ip link del dev bond1 +} + +lag_dev_deletion_test() +{ + # Test that LAG device is correctly deleted, when the LAG and its VLAN + # uppers are enslaved to bridges + RET=0 + + lag_create + + ip link del dev bond1 + + log_test "lag device deletion" + + ip link del dev br20 + ip link del dev br10 + ip link del dev br0 +} + +vlan_interface_uppers_test() +{ + # Test that uppers of a VLAN interface are correctly sanitized + RET=0 + + ip link add name br0 type bridge vlan_filtering 1 + ip link set dev $swp1 master br0 + + ip link add link br0 name br0.10 type vlan id 10 + ip link add link br0.10 name macvlan0 \ + type macvlan mode private &> /dev/null + check_fail $? "managed to create a macvlan when should not" + + ip -6 address add 2001:db8:1::1/64 dev br0.10 + ip link add link br0.10 name macvlan0 type macvlan mode private + check_err $? "did not manage to create a macvlan when should" + + ip link del dev macvlan0 + + ip link add name vrf-test type vrf table 10 + ip link set dev br0.10 master vrf-test + check_err $? "did not manage to enslave vlan interface to vrf" + ip link del dev vrf-test + + ip link add name br-test type bridge + ip link set dev br0.10 master br-test &> /dev/null + check_fail $? "managed to enslave vlan interface to bridge when should not" + ip link del dev br-test + + log_test "vlan interface uppers" + + ip link del dev br0 +} + +bridge_extern_learn_test() +{ + # Test that externally learned entries added from user space are + # marked as offloaded + RET=0 + + ip link add name br0 type bridge + ip link set dev $swp1 master br0 + + bridge fdb add de:ad:be:ef:13:37 dev $swp1 master extern_learn + + bridge fdb show brport $swp1 | grep de:ad:be:ef:13:37 | grep -q offload + check_err $? "fdb entry not marked as offloaded when should" + + log_test "externally learned fdb entry" + + ip link del dev br0 +} + +devlink_reload_test() +{ + # Test that after executing all the above configuration tests, a + # devlink reload can be performed without errors + RET=0 + + devlink dev reload "$DEVLINK_DEV" + check_err $? "devlink reload failed" + + log_test "devlink reload - last test" + + sleep 20 +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh index 3b75180f455d..b41d6256b2d0 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh @@ -8,7 +8,8 @@ lib_dir=$(dirname $0)/../../../../net/forwarding ALL_TESTS="single_mask_test identical_filters_test two_masks_test \ - multiple_masks_test ctcam_edge_cases_test" + multiple_masks_test ctcam_edge_cases_test delta_simple_test \ + bloom_simple_test bloom_complex_test bloom_delta_test" NUM_NETIFS=2 source $lib_dir/tc_common.sh source $lib_dir/lib.sh @@ -142,7 +143,7 @@ two_masks_test() tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \ $tcflags dst_ip 192.0.2.2 action drop tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \ - $tcflags dst_ip 192.0.0.0/16 action drop + $tcflags dst_ip 192.0.0.0/8 action drop $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ -t ip -q @@ -235,7 +236,7 @@ ctcam_two_atcam_masks_test() $tcflags dst_ip 192.0.2.2 action drop # Filter goes into A-TCAM tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \ - $tcflags dst_ip 192.0.2.0/24 action drop + $tcflags dst_ip 192.0.0.0/16 action drop $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ -t ip -q @@ -324,6 +325,258 @@ ctcam_edge_cases_test() ctcam_no_atcam_masks_test } +tp_record() +{ + local tracepoint=$1 + local cmd=$2 + + perf record -q -e $tracepoint $cmd + return $? +} + +tp_check_hits() +{ + local tracepoint=$1 + local count=$2 + + perf_output=`perf script -F trace:event,trace` + hits=`echo $perf_output | grep "$tracepoint:" | wc -l` + if [[ "$count" -ne "$hits" ]]; then + return 1 + fi + return 0 +} + +delta_simple_test() +{ + # The first filter will create eRP, the second filter will fit into + # the first eRP with delta. Remove the first rule then and check that + # the eRP stays (referenced by the second filter). + + RET=0 + + if [[ "$tcflags" != "skip_sw" ]]; then + return 0; + fi + + tp_record "objagg:*" "tc filter add dev $h2 ingress protocol ip \ + pref 1 handle 101 flower $tcflags dst_ip 192.0.0.0/24 \ + action drop" + tp_check_hits "objagg:objagg_obj_root_create" 1 + check_err $? "eRP was not created" + + tp_record "objagg:*" "tc filter add dev $h2 ingress protocol ip \ + pref 2 handle 102 flower $tcflags dst_ip 192.0.2.2 \ + action drop" + tp_check_hits "objagg:objagg_obj_root_create" 0 + check_err $? "eRP was incorrectly created" + tp_check_hits "objagg:objagg_obj_parent_assign" 1 + check_err $? "delta was not created" + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_fail $? "Matched a wrong filter" + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Did not match on correct filter" + + tp_record "objagg:*" "tc filter del dev $h2 ingress protocol ip \ + pref 1 handle 101 flower" + tp_check_hits "objagg:objagg_obj_root_destroy" 0 + check_err $? "eRP was incorrectly destroyed" + tp_check_hits "objagg:objagg_obj_parent_unassign" 0 + check_err $? "delta was incorrectly destroyed" + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 102 2 + check_err $? "Did not match on correct filter after the first was removed" + + tp_record "objagg:*" "tc filter del dev $h2 ingress protocol ip \ + pref 2 handle 102 flower" + tp_check_hits "objagg:objagg_obj_parent_unassign" 1 + check_err $? "delta was not destroyed" + tp_check_hits "objagg:objagg_obj_root_destroy" 1 + check_err $? "eRP was not destroyed" + + log_test "delta simple test ($tcflags)" +} + +bloom_simple_test() +{ + # Bloom filter requires that the eRP table is used. This test + # verifies that Bloom filter is not harming correctness of ACLs. + # First, make sure that eRP table is used and then set rule patterns + # which are distant enough and will result skipping a lookup after + # consulting the Bloom filter. Although some eRP lookups are skipped, + # the correct filter should be hit. + + RET=0 + + tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \ + $tcflags dst_ip 192.0.2.2 action drop + tc filter add dev $h2 ingress protocol ip pref 5 handle 104 flower \ + $tcflags dst_ip 198.51.100.2 action drop + tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \ + $tcflags dst_ip 192.0.0.0/8 action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_err $? "Two filters - did not match highest priority" + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 198.51.100.1 -B 198.51.100.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 104 1 + check_err $? "Single filter - did not match" + + tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 103 1 + check_err $? "Low prio filter - did not match" + + tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \ + $tcflags dst_ip 198.0.0.0/8 action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 198.51.100.1 -B 198.51.100.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Two filters - did not match highest priority after add" + + tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower + tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower + tc filter del dev $h2 ingress protocol ip pref 5 handle 104 flower + + log_test "bloom simple test ($tcflags)" +} + +bloom_complex_test() +{ + # Bloom filter index computation is affected from region ID, eRP + # ID and from the region key size. In order to excercise those parts + # of the Bloom filter code, use a series of regions, each with a + # different key size and send packet that should hit all of them. + local index + + RET=0 + NUM_CHAINS=4 + BASE_INDEX=100 + + # Create chain with up to 2 key blocks (ip_proto only) + tc chain add dev $h2 ingress chain 1 protocol ip flower \ + ip_proto tcp &> /dev/null + # Create chain with 2-4 key blocks (ip_proto, src MAC) + tc chain add dev $h2 ingress chain 2 protocol ip flower \ + ip_proto tcp \ + src_mac 00:00:00:00:00:00/FF:FF:FF:FF:FF:FF &> /dev/null + # Create chain with 4-8 key blocks (ip_proto, src & dst MAC, IPv4 dest) + tc chain add dev $h2 ingress chain 3 protocol ip flower \ + ip_proto tcp \ + dst_mac 00:00:00:00:00:00/FF:FF:FF:FF:FF:FF \ + src_mac 00:00:00:00:00:00/FF:FF:FF:FF:FF:FF \ + dst_ip 0.0.0.0/32 &> /dev/null + # Default chain contains all fields and therefore is 8-12 key blocks + tc chain add dev $h2 ingress chain 4 + + # We need at least 2 rules in every region to have eRP table active + # so create a dummy rule per chain using a different pattern + for i in $(eval echo {0..$NUM_CHAINS}); do + index=$((BASE_INDEX - 1 - i)) + tc filter add dev $h2 ingress chain $i protocol ip \ + pref 2 handle $index flower \ + $tcflags ip_proto tcp action drop + done + + # Add rules to test Bloom filter, each in a different chain + index=$BASE_INDEX + tc filter add dev $h2 ingress protocol ip \ + pref 1 handle $((++index)) flower \ + $tcflags dst_ip 192.0.0.0/16 action goto chain 1 + tc filter add dev $h2 ingress chain 1 protocol ip \ + pref 1 handle $((++index)) flower \ + $tcflags action goto chain 2 + tc filter add dev $h2 ingress chain 2 protocol ip \ + pref 1 handle $((++index)) flower \ + $tcflags src_mac $h1mac action goto chain 3 + tc filter add dev $h2 ingress chain 3 protocol ip \ + pref 1 handle $((++index)) flower \ + $tcflags dst_ip 192.0.0.0/8 action goto chain 4 + tc filter add dev $h2 ingress chain 4 protocol ip \ + pref 1 handle $((++index)) flower \ + $tcflags src_ip 192.0.2.0/24 action drop + + # Send a packet that is supposed to hit all chains + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + for i in $(eval echo {0..$NUM_CHAINS}); do + index=$((BASE_INDEX + i + 1)) + tc_check_packets "dev $h2 ingress" $index 1 + check_err $? "Did not match chain $i" + done + + # Rules cleanup + for i in $(eval echo {$NUM_CHAINS..0}); do + index=$((BASE_INDEX - i - 1)) + tc filter del dev $h2 ingress chain $i \ + pref 2 handle $index flower + index=$((BASE_INDEX + i + 1)) + tc filter del dev $h2 ingress chain $i \ + pref 1 handle $index flower + done + + # Chains cleanup + for i in $(eval echo {$NUM_CHAINS..1}); do + tc chain del dev $h2 ingress chain $i + done + + log_test "bloom complex test ($tcflags)" +} + + +bloom_delta_test() +{ + # When multiple masks are used, the eRP table is activated. When + # masks are close enough (delta) the masks reside on the same + # eRP table. This test verifies that the eRP table is correctly + # allocated and used in delta condition and that Bloom filter is + # still functional with delta. + + RET=0 + + tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \ + $tcflags dst_ip 192.1.0.0/16 action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.1.2.1 -B 192.1.2.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 103 1 + check_err $? "Single filter - did not match" + + tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \ + $tcflags dst_ip 192.2.1.0/24 action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.2.1.1 -B 192.2.1.2 \ + -t ip -q + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Delta filters - did not match second filter" + + tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower + tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower + + log_test "bloom delta test ($tcflags)" +} + setup_prepare() { h1=${NETIFS[p1]} diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh new file mode 100755 index 000000000000..ae6146ec5afd --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh @@ -0,0 +1,1121 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test various aspects of VxLAN offloading which are specific to mlxsw, such +# as sanitization of invalid configurations and offload indication. + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS="sanitization_test offload_indication_test \ + sanitization_vlan_aware_test offload_indication_vlan_aware_test" +NUM_NETIFS=2 +source $lib_dir/lib.sh + +setup_prepare() +{ + swp1=${NETIFS[p1]} + swp2=${NETIFS[p2]} + + ip link set dev $swp1 up + ip link set dev $swp2 up +} + +cleanup() +{ + pre_cleanup + + ip link set dev $swp2 down + ip link set dev $swp1 down +} + +sanitization_single_dev_test_pass() +{ + ip link set dev $swp1 master br0 + check_err $? + ip link set dev vxlan0 master br0 + check_err $? + + ip link set dev $swp1 nomaster + + ip link set dev $swp1 master br0 + check_err $? +} + +sanitization_single_dev_test_fail() +{ + ip link set dev $swp1 master br0 + check_err $? + ip link set dev vxlan0 master br0 &> /dev/null + check_fail $? + + ip link set dev $swp1 nomaster + + ip link set dev vxlan0 master br0 + check_err $? + ip link set dev $swp1 master br0 &> /dev/null + check_fail $? +} + +sanitization_single_dev_valid_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + + sanitization_single_dev_test_pass + + ip link del dev vxlan0 + ip link del dev br0 + + log_test "vxlan device - valid configuration" +} + +sanitization_single_dev_vlan_aware_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 vlan_filtering 1 + + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + + sanitization_single_dev_test_pass + + ip link del dev vxlan0 + ip link del dev br0 + + log_test "vxlan device with a vlan-aware bridge" +} + +sanitization_single_dev_mcast_enabled_test() +{ + RET=0 + + ip link add dev br0 type bridge + + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + + sanitization_single_dev_test_fail + + ip link del dev vxlan0 + ip link del dev br0 + + log_test "vxlan device with a multicast enabled bridge" +} + +sanitization_single_dev_mcast_group_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 \ + dev $swp2 group 239.0.0.1 + + sanitization_single_dev_test_fail + + ip link del dev vxlan0 + ip link del dev br0 + + log_test "vxlan device with a multicast group" +} + +sanitization_single_dev_no_local_ip_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit dstport 4789 + + sanitization_single_dev_test_fail + + ip link del dev vxlan0 + ip link del dev br0 + + log_test "vxlan device with no local ip" +} + +sanitization_single_dev_local_ipv6_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 2001:db8::1 dstport 4789 + + sanitization_single_dev_test_fail + + ip link del dev vxlan0 + ip link del dev br0 + + log_test "vxlan device with local ipv6 address" +} + +sanitization_single_dev_learning_enabled_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 learning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + + sanitization_single_dev_test_pass + + ip link del dev vxlan0 + ip link del dev br0 + + log_test "vxlan device with learning enabled" +} + +sanitization_single_dev_local_interface_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 dev $swp2 + + sanitization_single_dev_test_fail + + ip link del dev vxlan0 + ip link del dev br0 + + log_test "vxlan device with local interface" +} + +sanitization_single_dev_port_range_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 \ + srcport 4000 5000 + + sanitization_single_dev_test_fail + + ip link del dev vxlan0 + ip link del dev br0 + + log_test "vxlan device with udp source port range" +} + +sanitization_single_dev_tos_static_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos 20 local 198.51.100.1 dstport 4789 + + sanitization_single_dev_test_fail + + ip link del dev vxlan0 + ip link del dev br0 + + log_test "vxlan device with static tos" +} + +sanitization_single_dev_ttl_inherit_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl inherit tos inherit local 198.51.100.1 dstport 4789 + + sanitization_single_dev_test_fail + + ip link del dev vxlan0 + ip link del dev br0 + + log_test "vxlan device with inherit ttl" +} + +sanitization_single_dev_udp_checksum_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 nolearning udpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + + sanitization_single_dev_test_fail + + ip link del dev vxlan0 + ip link del dev br0 + + log_test "vxlan device with udp checksum" +} + +sanitization_single_dev_test() +{ + # These tests make sure that we correctly sanitize VxLAN device + # configurations we do not support + sanitization_single_dev_valid_test + sanitization_single_dev_vlan_aware_test + sanitization_single_dev_mcast_enabled_test + sanitization_single_dev_mcast_group_test + sanitization_single_dev_no_local_ip_test + sanitization_single_dev_local_ipv6_test + sanitization_single_dev_learning_enabled_test + sanitization_single_dev_local_interface_test + sanitization_single_dev_port_range_test + sanitization_single_dev_tos_static_test + sanitization_single_dev_ttl_inherit_test + sanitization_single_dev_udp_checksum_test +} + +sanitization_multi_devs_test_pass() +{ + ip link set dev $swp1 master br0 + check_err $? + ip link set dev vxlan0 master br0 + check_err $? + ip link set dev $swp2 master br1 + check_err $? + ip link set dev vxlan1 master br1 + check_err $? + + ip link set dev $swp2 nomaster + ip link set dev $swp1 nomaster + + ip link set dev $swp1 master br0 + check_err $? + ip link set dev $swp2 master br1 + check_err $? +} + +sanitization_multi_devs_test_fail() +{ + ip link set dev $swp1 master br0 + check_err $? + ip link set dev vxlan0 master br0 + check_err $? + ip link set dev $swp2 master br1 + check_err $? + ip link set dev vxlan1 master br1 &> /dev/null + check_fail $? + + ip link set dev $swp2 nomaster + ip link set dev $swp1 nomaster + + ip link set dev vxlan1 master br1 + check_err $? + ip link set dev $swp1 master br0 + check_err $? + ip link set dev $swp2 master br1 &> /dev/null + check_fail $? +} + +sanitization_multi_devs_valid_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + ip link add dev br1 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + ip link add name vxlan1 up type vxlan id 20 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + + sanitization_multi_devs_test_pass + + ip link del dev vxlan1 + ip link del dev vxlan0 + ip link del dev br1 + ip link del dev br0 + + log_test "multiple vxlan devices - valid configuration" +} + +sanitization_multi_devs_ttl_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + ip link add dev br1 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + ip link add name vxlan1 up type vxlan id 20 nolearning noudpcsum \ + ttl 40 tos inherit local 198.51.100.1 dstport 4789 + + sanitization_multi_devs_test_fail + + ip link del dev vxlan1 + ip link del dev vxlan0 + ip link del dev br1 + ip link del dev br0 + + log_test "multiple vxlan devices with different ttl" +} + +sanitization_multi_devs_udp_dstport_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + ip link add dev br1 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + ip link add name vxlan1 up type vxlan id 20 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 5789 + + sanitization_multi_devs_test_fail + + ip link del dev vxlan1 + ip link del dev vxlan0 + ip link del dev br1 + ip link del dev br0 + + log_test "multiple vxlan devices with different udp destination port" +} + +sanitization_multi_devs_local_ip_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 + ip link add dev br1 type bridge mcast_snooping 0 + + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + ip link add name vxlan1 up type vxlan id 20 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.2 dstport 4789 + + sanitization_multi_devs_test_fail + + ip link del dev vxlan1 + ip link del dev vxlan0 + ip link del dev br1 + ip link del dev br0 + + log_test "multiple vxlan devices with different local ip" +} + +sanitization_multi_devs_test() +{ + # The device has a single VTEP, which means all the VxLAN devices + # we offload must share certain properties such as source IP and + # UDP destination port. These tests make sure that we forbid + # configurations that violate this limitation + sanitization_multi_devs_valid_test + sanitization_multi_devs_ttl_test + sanitization_multi_devs_udp_dstport_test + sanitization_multi_devs_local_ip_test +} + +sanitization_test() +{ + sanitization_single_dev_test + sanitization_multi_devs_test +} + +offload_indication_setup_create() +{ + # Create a simple setup with two bridges, each with a VxLAN device + # and one local port + ip link add name br0 up type bridge mcast_snooping 0 + ip link add name br1 up type bridge mcast_snooping 0 + + ip link set dev $swp1 master br0 + ip link set dev $swp2 master br1 + + ip address add 198.51.100.1/32 dev lo + + ip link add name vxlan0 up master br0 type vxlan id 10 nolearning \ + noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789 + ip link add name vxlan1 up master br1 type vxlan id 20 nolearning \ + noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789 +} + +offload_indication_setup_destroy() +{ + ip link del dev vxlan1 + ip link del dev vxlan0 + + ip address del 198.51.100.1/32 dev lo + + ip link set dev $swp2 nomaster + ip link set dev $swp1 nomaster + + ip link del dev br1 + ip link del dev br0 +} + +offload_indication_fdb_flood_test() +{ + RET=0 + + bridge fdb append 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.2 + + bridge fdb show brport vxlan0 | grep 00:00:00:00:00:00 \ + | grep -q offload + check_err $? + + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self + + log_test "vxlan flood entry offload indication" +} + +offload_indication_fdb_bridge_test() +{ + RET=0 + + bridge fdb add de:ad:be:ef:13:37 dev vxlan0 self master static \ + dst 198.51.100.2 + + bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep self \ + | grep -q offload + check_err $? + bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep -v self \ + | grep -q offload + check_err $? + + log_test "vxlan entry offload indication - initial state" + + # Remove FDB entry from the bridge driver and check that corresponding + # entry in the VxLAN driver is not marked as offloaded + RET=0 + + bridge fdb del de:ad:be:ef:13:37 dev vxlan0 master + bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep self \ + | grep -q offload + check_fail $? + + log_test "vxlan entry offload indication - after removal from bridge" + + # Add the FDB entry back to the bridge driver and make sure it is + # marked as offloaded in both drivers + RET=0 + + bridge fdb add de:ad:be:ef:13:37 dev vxlan0 master static + bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep self \ + | grep -q offload + check_err $? + bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep -v self \ + | grep -q offload + check_err $? + + log_test "vxlan entry offload indication - after re-add to bridge" + + # Remove FDB entry from the VxLAN driver and check that corresponding + # entry in the bridge driver is not marked as offloaded + RET=0 + + bridge fdb del de:ad:be:ef:13:37 dev vxlan0 self + bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep -v self \ + | grep -q offload + check_fail $? + + log_test "vxlan entry offload indication - after removal from vxlan" + + # Add the FDB entry back to the VxLAN driver and make sure it is + # marked as offloaded in both drivers + RET=0 + + bridge fdb add de:ad:be:ef:13:37 dev vxlan0 self dst 198.51.100.2 + bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep self \ + | grep -q offload + check_err $? + bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep -v self \ + | grep -q offload + check_err $? + + log_test "vxlan entry offload indication - after re-add to vxlan" + + bridge fdb del de:ad:be:ef:13:37 dev vxlan0 self master +} + +offload_indication_fdb_test() +{ + offload_indication_fdb_flood_test + offload_indication_fdb_bridge_test +} + +offload_indication_decap_route_test() +{ + RET=0 + + ip route show table local | grep 198.51.100.1 | grep -q offload + check_err $? + + ip link set dev vxlan0 down + ip route show table local | grep 198.51.100.1 | grep -q offload + check_err $? + + ip link set dev vxlan1 down + ip route show table local | grep 198.51.100.1 | grep -q offload + check_fail $? + + log_test "vxlan decap route - vxlan device down" + + RET=0 + + ip link set dev vxlan1 up + ip route show table local | grep 198.51.100.1 | grep -q offload + check_err $? + + ip link set dev vxlan0 up + ip route show table local | grep 198.51.100.1 | grep -q offload + check_err $? + + log_test "vxlan decap route - vxlan device up" + + RET=0 + + ip address delete 198.51.100.1/32 dev lo + ip route show table local | grep 198.51.100.1 | grep -q offload + check_fail $? + + ip address add 198.51.100.1/32 dev lo + ip route show table local | grep 198.51.100.1 | grep -q offload + check_err $? + + log_test "vxlan decap route - add local route" + + RET=0 + + ip link set dev $swp1 nomaster + ip route show table local | grep 198.51.100.1 | grep -q offload + check_err $? + + ip link set dev $swp2 nomaster + ip route show table local | grep 198.51.100.1 | grep -q offload + check_fail $? + + ip link set dev $swp1 master br0 + ip link set dev $swp2 master br1 + ip route show table local | grep 198.51.100.1 | grep -q offload + check_err $? + + log_test "vxlan decap route - local ports enslavement" + + RET=0 + + ip link del dev br0 + ip route show table local | grep 198.51.100.1 | grep -q offload + check_err $? + + ip link del dev br1 + ip route show table local | grep 198.51.100.1 | grep -q offload + check_fail $? + + log_test "vxlan decap route - bridge device deletion" + + RET=0 + + ip link add name br0 up type bridge mcast_snooping 0 + ip link add name br1 up type bridge mcast_snooping 0 + ip link set dev $swp1 master br0 + ip link set dev $swp2 master br1 + ip link set dev vxlan0 master br0 + ip link set dev vxlan1 master br1 + ip route show table local | grep 198.51.100.1 | grep -q offload + check_err $? + + ip link del dev vxlan0 + ip route show table local | grep 198.51.100.1 | grep -q offload + check_err $? + + ip link del dev vxlan1 + ip route show table local | grep 198.51.100.1 | grep -q offload + check_fail $? + + log_test "vxlan decap route - vxlan device deletion" + + ip link add name vxlan0 up master br0 type vxlan id 10 nolearning \ + noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789 + ip link add name vxlan1 up master br1 type vxlan id 20 nolearning \ + noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789 +} + +check_fdb_offloaded() +{ + local mac=00:11:22:33:44:55 + local zmac=00:00:00:00:00:00 + + bridge fdb show dev vxlan0 | grep $mac | grep self | grep -q offload + check_err $? + bridge fdb show dev vxlan0 | grep $mac | grep master | grep -q offload + check_err $? + + bridge fdb show dev vxlan0 | grep $zmac | grep self | grep -q offload + check_err $? +} + +check_vxlan_fdb_not_offloaded() +{ + local mac=00:11:22:33:44:55 + local zmac=00:00:00:00:00:00 + + bridge fdb show dev vxlan0 | grep $mac | grep -q self + check_err $? + bridge fdb show dev vxlan0 | grep $mac | grep self | grep -q offload + check_fail $? + + bridge fdb show dev vxlan0 | grep $zmac | grep -q self + check_err $? + bridge fdb show dev vxlan0 | grep $zmac | grep self | grep -q offload + check_fail $? +} + +check_bridge_fdb_not_offloaded() +{ + local mac=00:11:22:33:44:55 + local zmac=00:00:00:00:00:00 + + bridge fdb show dev vxlan0 | grep $mac | grep -q master + check_err $? + bridge fdb show dev vxlan0 | grep $mac | grep master | grep -q offload + check_fail $? +} + +__offload_indication_join_vxlan_first() +{ + local vid=$1; shift + + local mac=00:11:22:33:44:55 + local zmac=00:00:00:00:00:00 + + bridge fdb append $zmac dev vxlan0 self dst 198.51.100.2 + + ip link set dev vxlan0 master br0 + bridge fdb add dev vxlan0 $mac self master static dst 198.51.100.2 + + RET=0 + check_vxlan_fdb_not_offloaded + ip link set dev $swp1 master br0 + sleep .1 + check_fdb_offloaded + log_test "offload indication - attach vxlan first" + + RET=0 + ip link set dev vxlan0 down + check_vxlan_fdb_not_offloaded + check_bridge_fdb_not_offloaded + log_test "offload indication - set vxlan down" + + RET=0 + ip link set dev vxlan0 up + sleep .1 + check_fdb_offloaded + log_test "offload indication - set vxlan up" + + if [[ ! -z $vid ]]; then + RET=0 + bridge vlan del dev vxlan0 vid $vid + check_vxlan_fdb_not_offloaded + check_bridge_fdb_not_offloaded + log_test "offload indication - delete VLAN" + + RET=0 + bridge vlan add dev vxlan0 vid $vid + check_vxlan_fdb_not_offloaded + check_bridge_fdb_not_offloaded + log_test "offload indication - add tagged VLAN" + + RET=0 + bridge vlan add dev vxlan0 vid $vid pvid untagged + sleep .1 + check_fdb_offloaded + log_test "offload indication - add pvid/untagged VLAN" + fi + + RET=0 + ip link set dev $swp1 nomaster + check_vxlan_fdb_not_offloaded + log_test "offload indication - detach port" +} + +offload_indication_join_vxlan_first() +{ + ip link add dev br0 up type bridge mcast_snooping 0 + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + + __offload_indication_join_vxlan_first + + ip link del dev vxlan0 + ip link del dev br0 +} + +__offload_indication_join_vxlan_last() +{ + local zmac=00:00:00:00:00:00 + + RET=0 + + bridge fdb append $zmac dev vxlan0 self dst 198.51.100.2 + + ip link set dev $swp1 master br0 + + bridge fdb show dev vxlan0 | grep $zmac | grep self | grep -q offload + check_fail $? + + ip link set dev vxlan0 master br0 + + bridge fdb show dev vxlan0 | grep $zmac | grep self | grep -q offload + check_err $? + + log_test "offload indication - attach vxlan last" +} + +offload_indication_join_vxlan_last() +{ + ip link add dev br0 up type bridge mcast_snooping 0 + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + + __offload_indication_join_vxlan_last + + ip link del dev vxlan0 + ip link del dev br0 +} + +offload_indication_test() +{ + offload_indication_setup_create + offload_indication_fdb_test + offload_indication_decap_route_test + offload_indication_setup_destroy + + log_info "offload indication - replay & cleanup" + offload_indication_join_vxlan_first + offload_indication_join_vxlan_last +} + +sanitization_vlan_aware_test() +{ + RET=0 + + ip link add dev br0 type bridge mcast_snooping 0 vlan_filtering 1 + + ip link add name vxlan10 up master br0 type vxlan id 10 nolearning \ + noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789 + + ip link add name vxlan20 up master br0 type vxlan id 20 nolearning \ + noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789 + + # Test that when each VNI is mapped to a different VLAN we can enslave + # a port to the bridge + bridge vlan add vid 10 dev vxlan10 pvid untagged + bridge vlan add vid 20 dev vxlan20 pvid untagged + + ip link set dev $swp1 master br0 + check_err $? + + log_test "vlan-aware - enslavement to vlan-aware bridge" + + # Try to map both VNIs to the same VLAN and make sure configuration + # fails + RET=0 + + bridge vlan add vid 10 dev vxlan20 pvid untagged &> /dev/null + check_fail $? + + log_test "vlan-aware - two vnis mapped to the same vlan" + + # Test that enslavement of a port to a bridge fails when two VNIs + # are mapped to the same VLAN + RET=0 + + ip link set dev $swp1 nomaster + + bridge vlan del vid 20 dev vxlan20 pvid untagged + bridge vlan add vid 10 dev vxlan20 pvid untagged + + ip link set dev $swp1 master br0 &> /dev/null + check_fail $? + + log_test "vlan-aware - failed enslavement to vlan-aware bridge" + + bridge vlan del vid 10 dev vxlan20 + bridge vlan add vid 20 dev vxlan20 pvid untagged + + # Test that offloading of an unsupported tunnel fails when it is + # triggered by addition of VLAN to a local port + RET=0 + + # TOS must be set to inherit + ip link set dev vxlan10 type vxlan tos 42 + + ip link set dev $swp1 master br0 + bridge vlan add vid 10 dev $swp1 &> /dev/null + check_fail $? + + log_test "vlan-aware - failed vlan addition to a local port" + + ip link set dev vxlan10 type vxlan tos inherit + + ip link del dev vxlan20 + ip link del dev vxlan10 + ip link del dev br0 +} + +offload_indication_vlan_aware_setup_create() +{ + # Create a simple setup with two VxLAN devices and a single VLAN-aware + # bridge + ip link add name br0 up type bridge mcast_snooping 0 vlan_filtering 1 \ + vlan_default_pvid 0 + + ip link set dev $swp1 master br0 + + bridge vlan add vid 10 dev $swp1 + bridge vlan add vid 20 dev $swp1 + + ip address add 198.51.100.1/32 dev lo + + ip link add name vxlan10 up master br0 type vxlan id 10 nolearning \ + noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789 + ip link add name vxlan20 up master br0 type vxlan id 20 nolearning \ + noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789 + + bridge vlan add vid 10 dev vxlan10 pvid untagged + bridge vlan add vid 20 dev vxlan20 pvid untagged +} + +offload_indication_vlan_aware_setup_destroy() +{ + bridge vlan del vid 20 dev vxlan20 + bridge vlan del vid 10 dev vxlan10 + + ip link del dev vxlan20 + ip link del dev vxlan10 + + ip address del 198.51.100.1/32 dev lo + + bridge vlan del vid 20 dev $swp1 + bridge vlan del vid 10 dev $swp1 + + ip link set dev $swp1 nomaster + + ip link del dev br0 +} + +offload_indication_vlan_aware_fdb_test() +{ + RET=0 + + log_info "vxlan entry offload indication - vlan-aware" + + bridge fdb add de:ad:be:ef:13:37 dev vxlan10 self master static \ + dst 198.51.100.2 vlan 10 + + bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep self \ + | grep -q offload + check_err $? + bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep -v self \ + | grep -q offload + check_err $? + + log_test "vxlan entry offload indication - initial state" + + # Remove FDB entry from the bridge driver and check that corresponding + # entry in the VxLAN driver is not marked as offloaded + RET=0 + + bridge fdb del de:ad:be:ef:13:37 dev vxlan10 master vlan 10 + bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep self \ + | grep -q offload + check_fail $? + + log_test "vxlan entry offload indication - after removal from bridge" + + # Add the FDB entry back to the bridge driver and make sure it is + # marked as offloaded in both drivers + RET=0 + + bridge fdb add de:ad:be:ef:13:37 dev vxlan10 master static vlan 10 + bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep self \ + | grep -q offload + check_err $? + bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep -v self \ + | grep -q offload + check_err $? + + log_test "vxlan entry offload indication - after re-add to bridge" + + # Remove FDB entry from the VxLAN driver and check that corresponding + # entry in the bridge driver is not marked as offloaded + RET=0 + + bridge fdb del de:ad:be:ef:13:37 dev vxlan10 self + bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep -v self \ + | grep -q offload + check_fail $? + + log_test "vxlan entry offload indication - after removal from vxlan" + + # Add the FDB entry back to the VxLAN driver and make sure it is + # marked as offloaded in both drivers + RET=0 + + bridge fdb add de:ad:be:ef:13:37 dev vxlan10 self dst 198.51.100.2 + bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep self \ + | grep -q offload + check_err $? + bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep -v self \ + | grep -q offload + check_err $? + + log_test "vxlan entry offload indication - after re-add to vxlan" + + bridge fdb del de:ad:be:ef:13:37 dev vxlan10 self master vlan 10 +} + +offload_indication_vlan_aware_decap_route_test() +{ + RET=0 + + ip route show table local | grep 198.51.100.1 | grep -q offload + check_err $? + + # Toggle PVID flag on one VxLAN device and make sure route is still + # marked as offloaded + bridge vlan add vid 10 dev vxlan10 untagged + + ip route show table local | grep 198.51.100.1 | grep -q offload + check_err $? + + # Toggle PVID flag on second VxLAN device and make sure route is no + # longer marked as offloaded + bridge vlan add vid 20 dev vxlan20 untagged + + ip route show table local | grep 198.51.100.1 | grep -q offload + check_fail $? + + # Toggle PVID flag back and make sure route is marked as offloaded + bridge vlan add vid 10 dev vxlan10 pvid untagged + bridge vlan add vid 20 dev vxlan20 pvid untagged + + ip route show table local | grep 198.51.100.1 | grep -q offload + check_err $? + + log_test "vxlan decap route - vni map/unmap" +} + +offload_indication_vlan_aware_join_vxlan_first() +{ + ip link add dev br0 up type bridge mcast_snooping 0 \ + vlan_filtering 1 vlan_default_pvid 1 + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + + __offload_indication_join_vxlan_first 1 + + ip link del dev vxlan0 + ip link del dev br0 +} + +offload_indication_vlan_aware_join_vxlan_last() +{ + ip link add dev br0 up type bridge mcast_snooping 0 \ + vlan_filtering 1 vlan_default_pvid 1 + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + + __offload_indication_join_vxlan_last + + ip link del dev vxlan0 + ip link del dev br0 +} + +offload_indication_vlan_aware_l3vni_test() +{ + local zmac=00:00:00:00:00:00 + + RET=0 + + sysctl_set net.ipv6.conf.default.disable_ipv6 1 + ip link add dev br0 up type bridge mcast_snooping 0 \ + vlan_filtering 1 vlan_default_pvid 0 + ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + + ip link set dev $swp1 master br0 + + # The test will use the offload indication on the FDB entry to + # understand if the tunnel is offloaded or not + bridge fdb append $zmac dev vxlan0 self dst 192.0.2.1 + + ip link set dev vxlan0 master br0 + bridge vlan add dev vxlan0 vid 10 pvid untagged + + # No local port or router port is member in the VLAN, so tunnel should + # not be offloaded + bridge fdb show brport vxlan0 | grep $zmac | grep self \ + | grep -q offload + check_fail $? "vxlan tunnel offloaded when should not" + + # Configure a VLAN interface and make sure tunnel is offloaded + ip link add link br0 name br10 up type vlan id 10 + sysctl_set net.ipv6.conf.br10.disable_ipv6 0 + ip -6 address add 2001:db8:1::1/64 dev br10 + bridge fdb show brport vxlan0 | grep $zmac | grep self \ + | grep -q offload + check_err $? "vxlan tunnel not offloaded when should" + + # Unlink the VXLAN device, make sure tunnel is no longer offloaded, + # then add it back to the bridge and make sure it is offloaded + ip link set dev vxlan0 nomaster + bridge fdb show brport vxlan0 | grep $zmac | grep self \ + | grep -q offload + check_fail $? "vxlan tunnel offloaded after unlinked from bridge" + + ip link set dev vxlan0 master br0 + bridge fdb show brport vxlan0 | grep $zmac | grep self \ + | grep -q offload + check_fail $? "vxlan tunnel offloaded despite no matching vid" + + bridge vlan add dev vxlan0 vid 10 pvid untagged + bridge fdb show brport vxlan0 | grep $zmac | grep self \ + | grep -q offload + check_err $? "vxlan tunnel not offloaded after adding vid" + + log_test "vxlan - l3 vni" + + ip link del dev vxlan0 + ip link del dev br0 + sysctl_restore net.ipv6.conf.default.disable_ipv6 +} + +offload_indication_vlan_aware_test() +{ + offload_indication_vlan_aware_setup_create + offload_indication_vlan_aware_fdb_test + offload_indication_vlan_aware_decap_route_test + offload_indication_vlan_aware_setup_destroy + + log_info "offload indication - replay & cleanup - vlan aware" + offload_indication_vlan_aware_join_vxlan_first + offload_indication_vlan_aware_join_vxlan_last + offload_indication_vlan_aware_l3vni_test +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh new file mode 100755 index 000000000000..fedcb7b35af9 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh @@ -0,0 +1,309 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test VxLAN flooding. The device stores flood records in a singly linked list +# where each record stores up to three IPv4 addresses of remote VTEPs. The test +# verifies that packets are correctly flooded in various cases such as deletion +# of a record in the middle of the list. +# +# +--------------------+ +# | H1 (vrf) | +# | + $h1 | +# | | 203.0.113.1/24| +# +----|---------------+ +# | +# +----|----------------------------------------------------------------------+ +# | SW | | +# | +--|--------------------------------------------------------------------+ | +# | | + $swp1 BR0 (802.1d) | | +# | | | | +# | | + vxlan0 (vxlan) | | +# | | local 198.51.100.1 | | +# | | remote 198.51.100.{2..13} | | +# | | id 10 dstport 4789 | | +# | +-----------------------------------------------------------------------+ | +# | | +# | 198.51.100.0/24 via 192.0.2.2 | +# | | +# | + $rp1 | +# | | 192.0.2.1/24 | +# +----|----------------------------------------------------------------------+ +# | +# +----|--------------------------------------------------------+ +# | | R2 (vrf) | +# | + $rp2 | +# | 192.0.2.2/24 | +# | | +# +-------------------------------------------------------------+ + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS="flooding_test" +NUM_NETIFS=4 +source $lib_dir/tc_common.sh +source $lib_dir/lib.sh + +h1_create() +{ + simple_if_init $h1 203.0.113.1/24 +} + +h1_destroy() +{ + simple_if_fini $h1 203.0.113.1/24 +} + +switch_create() +{ + # Make sure the bridge uses the MAC address of the local port and + # not that of the VxLAN's device + ip link add dev br0 type bridge mcast_snooping 0 + ip link set dev br0 address $(mac_get $swp1) + + ip link add name vxlan0 type vxlan id 10 nolearning noudpcsum \ + ttl 20 tos inherit local 198.51.100.1 dstport 4789 + + ip address add 198.51.100.1/32 dev lo + + ip link set dev $swp1 master br0 + ip link set dev vxlan0 master br0 + + ip link set dev br0 up + ip link set dev $swp1 up + ip link set dev vxlan0 up +} + +switch_destroy() +{ + ip link set dev vxlan0 down + ip link set dev $swp1 down + ip link set dev br0 down + + ip link set dev vxlan0 nomaster + ip link set dev $swp1 nomaster + + ip address del 198.51.100.1/32 dev lo + + ip link del dev vxlan0 + + ip link del dev br0 +} + +router1_create() +{ + # This router is in the default VRF, where the VxLAN device is + # performing the L3 lookup + ip link set dev $rp1 up + ip address add 192.0.2.1/24 dev $rp1 + ip route add 198.51.100.0/24 via 192.0.2.2 +} + +router1_destroy() +{ + ip route del 198.51.100.0/24 via 192.0.2.2 + ip address del 192.0.2.1/24 dev $rp1 + ip link set dev $rp1 down +} + +router2_create() +{ + # This router is not in the default VRF, so use simple_if_init() + simple_if_init $rp2 192.0.2.2/24 +} + +router2_destroy() +{ + simple_if_fini $rp2 192.0.2.2/24 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + rp1=${NETIFS[p3]} + rp2=${NETIFS[p4]} + + vrf_prepare + + h1_create + + switch_create + + router1_create + router2_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + router2_destroy + router1_destroy + + switch_destroy + + h1_destroy + + vrf_cleanup +} + +flooding_remotes_add() +{ + local num_remotes=$1 + local lsb + local i + + for i in $(eval echo {1..$num_remotes}); do + lsb=$((i + 1)) + + bridge fdb append 00:00:00:00:00:00 dev vxlan0 self \ + dst 198.51.100.$lsb + done +} + +flooding_filters_add() +{ + local num_remotes=$1 + local lsb + local i + + tc qdisc add dev $rp2 clsact + + for i in $(eval echo {1..$num_remotes}); do + lsb=$((i + 1)) + + tc filter add dev $rp2 ingress protocol ip pref $i handle $i \ + flower ip_proto udp dst_ip 198.51.100.$lsb \ + dst_port 4789 skip_sw action drop + done +} + +flooding_filters_del() +{ + local num_remotes=$1 + local i + + for i in $(eval echo {1..$num_remotes}); do + tc filter del dev $rp2 ingress protocol ip pref $i \ + handle $i flower + done + + tc qdisc del dev $rp2 clsact +} + +flooding_check_packets() +{ + local packets=("$@") + local num_remotes=${#packets[@]} + local i + + for i in $(eval echo {1..$num_remotes}); do + tc_check_packets "dev $rp2 ingress" $i ${packets[i - 1]} + check_err $? "remote $i - did not get expected number of packets" + done +} + +flooding_test() +{ + # Use 12 remote VTEPs that will be stored in 4 records. The array + # 'packets' will store how many packets are expected to be received + # by each remote VTEP at each stage of the test + declare -a packets=(1 1 1 1 1 1 1 1 1 1 1 1) + local num_remotes=12 + + RET=0 + + # Add FDB entries for remote VTEPs and corresponding tc filters on the + # ingress of the nexthop router. These filters will count how many + # packets were flooded to each remote VTEP + flooding_remotes_add $num_remotes + flooding_filters_add $num_remotes + + # Send one packet and make sure it is flooded to all the remote VTEPs + $MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 1 packet" + + # Delete the third record which corresponds to VTEPs with LSB 8..10 + # and check that packet is flooded correctly when we remove a record + # from the middle of the list + RET=0 + + packets=(2 2 2 2 2 2 1 1 1 2 2 2) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.8 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.9 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.10 + + $MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 2 packets" + + # Delete the first record and make sure the packet is flooded correctly + RET=0 + + packets=(2 2 2 3 3 3 1 1 1 3 3 3) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.2 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.3 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.4 + + $MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 3 packets" + + # Delete the last record and make sure the packet is flooded correctly + RET=0 + + packets=(2 2 2 4 4 4 1 1 1 3 3 3) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.11 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.12 + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.13 + + $MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 4 packets" + + # Delete the last record, one entry at a time and make sure single + # entries are correctly removed + RET=0 + + packets=(2 2 2 4 5 5 1 1 1 3 3 3) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.5 + + $MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 5 packets" + + RET=0 + + packets=(2 2 2 4 5 6 1 1 1 3 3 3) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.6 + + $MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 6 packets" + + RET=0 + + packets=(2 2 2 4 5 6 1 1 1 3 3 3) + bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.7 + + $MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1 + flooding_check_packets "${packets[@]}" + log_test "flood after 7 packets" + + flooding_filters_del $num_remotes +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/filesystems/binderfs/.gitignore b/tools/testing/selftests/filesystems/binderfs/.gitignore new file mode 100644 index 000000000000..8a5d9bf63dd4 --- /dev/null +++ b/tools/testing/selftests/filesystems/binderfs/.gitignore @@ -0,0 +1 @@ +binderfs_test diff --git a/tools/testing/selftests/filesystems/binderfs/Makefile b/tools/testing/selftests/filesystems/binderfs/Makefile new file mode 100644 index 000000000000..58cb659b56b4 --- /dev/null +++ b/tools/testing/selftests/filesystems/binderfs/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0 + +CFLAGS += -I../../../../../usr/include/ +TEST_GEN_PROGS := binderfs_test + +include ../../lib.mk diff --git a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c new file mode 100644 index 000000000000..8c2ed962e1c7 --- /dev/null +++ b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c @@ -0,0 +1,275 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include <errno.h> +#include <fcntl.h> +#include <sched.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/mount.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> +#include <linux/android/binder.h> +#include <linux/android/binderfs.h> +#include "../../kselftest.h" + +static ssize_t write_nointr(int fd, const void *buf, size_t count) +{ + ssize_t ret; +again: + ret = write(fd, buf, count); + if (ret < 0 && errno == EINTR) + goto again; + + return ret; +} + +static void write_to_file(const char *filename, const void *buf, size_t count, + int allowed_errno) +{ + int fd, saved_errno; + ssize_t ret; + + fd = open(filename, O_WRONLY | O_CLOEXEC); + if (fd < 0) + ksft_exit_fail_msg("%s - Failed to open file %s\n", + strerror(errno), filename); + + ret = write_nointr(fd, buf, count); + if (ret < 0) { + if (allowed_errno && (errno == allowed_errno)) { + close(fd); + return; + } + + goto on_error; + } + + if ((size_t)ret != count) + goto on_error; + + close(fd); + return; + +on_error: + saved_errno = errno; + close(fd); + errno = saved_errno; + + if (ret < 0) + ksft_exit_fail_msg("%s - Failed to write to file %s\n", + strerror(errno), filename); + + ksft_exit_fail_msg("Failed to write to file %s\n", filename); +} + +static void change_to_userns(void) +{ + int ret; + uid_t uid; + gid_t gid; + /* {g,u}id_map files only allow a max of 4096 bytes written to them */ + char idmap[4096]; + + uid = getuid(); + gid = getgid(); + + ret = unshare(CLONE_NEWUSER); + if (ret < 0) + ksft_exit_fail_msg("%s - Failed to unshare user namespace\n", + strerror(errno)); + + write_to_file("/proc/self/setgroups", "deny", strlen("deny"), ENOENT); + + ret = snprintf(idmap, sizeof(idmap), "0 %d 1", uid); + if (ret < 0 || (size_t)ret >= sizeof(idmap)) + ksft_exit_fail_msg("%s - Failed to prepare uid mapping\n", + strerror(errno)); + + write_to_file("/proc/self/uid_map", idmap, strlen(idmap), 0); + + ret = snprintf(idmap, sizeof(idmap), "0 %d 1", gid); + if (ret < 0 || (size_t)ret >= sizeof(idmap)) + ksft_exit_fail_msg("%s - Failed to prepare uid mapping\n", + strerror(errno)); + + write_to_file("/proc/self/gid_map", idmap, strlen(idmap), 0); + + ret = setgid(0); + if (ret) + ksft_exit_fail_msg("%s - Failed to setgid(0)\n", + strerror(errno)); + + ret = setuid(0); + if (ret) + ksft_exit_fail_msg("%s - Failed to setgid(0)\n", + strerror(errno)); +} + +static void change_to_mountns(void) +{ + int ret; + + ret = unshare(CLONE_NEWNS); + if (ret < 0) + ksft_exit_fail_msg("%s - Failed to unshare mount namespace\n", + strerror(errno)); + + ret = mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0); + if (ret < 0) + ksft_exit_fail_msg("%s - Failed to mount / as private\n", + strerror(errno)); +} + +static void rmdir_protect_errno(const char *dir) +{ + int saved_errno = errno; + (void)rmdir(dir); + errno = saved_errno; +} + +static void __do_binderfs_test(void) +{ + int fd, ret, saved_errno; + size_t len; + ssize_t wret; + bool keep = false; + struct binderfs_device device = { 0 }; + struct binder_version version = { 0 }; + + change_to_mountns(); + + ret = mkdir("/dev/binderfs", 0755); + if (ret < 0) { + if (errno != EEXIST) + ksft_exit_fail_msg( + "%s - Failed to create binderfs mountpoint\n", + strerror(errno)); + + keep = true; + } + + ret = mount(NULL, "/dev/binderfs", "binder", 0, 0); + if (ret < 0) { + if (errno != ENODEV) + ksft_exit_fail_msg("%s - Failed to mount binderfs\n", + strerror(errno)); + + keep ? : rmdir_protect_errno("/dev/binderfs"); + ksft_exit_skip( + "The Android binderfs filesystem is not available\n"); + } + + /* binderfs mount test passed */ + ksft_inc_pass_cnt(); + + memcpy(device.name, "my-binder", strlen("my-binder")); + + fd = open("/dev/binderfs/binder-control", O_RDONLY | O_CLOEXEC); + if (fd < 0) + ksft_exit_fail_msg( + "%s - Failed to open binder-control device\n", + strerror(errno)); + + ret = ioctl(fd, BINDER_CTL_ADD, &device); + saved_errno = errno; + close(fd); + errno = saved_errno; + if (ret < 0) { + keep ? : rmdir_protect_errno("/dev/binderfs"); + ksft_exit_fail_msg( + "%s - Failed to allocate new binder device\n", + strerror(errno)); + } + + ksft_print_msg( + "Allocated new binder device with major %d, minor %d, and name %s\n", + device.major, device.minor, device.name); + + /* binder device allocation test passed */ + ksft_inc_pass_cnt(); + + fd = open("/dev/binderfs/my-binder", O_CLOEXEC | O_RDONLY); + if (fd < 0) { + keep ? : rmdir_protect_errno("/dev/binderfs"); + ksft_exit_fail_msg("%s - Failed to open my-binder device\n", + strerror(errno)); + } + + ret = ioctl(fd, BINDER_VERSION, &version); + saved_errno = errno; + close(fd); + errno = saved_errno; + if (ret < 0) { + keep ? : rmdir_protect_errno("/dev/binderfs"); + ksft_exit_fail_msg( + "%s - Failed to open perform BINDER_VERSION request\n", + strerror(errno)); + } + + ksft_print_msg("Detected binder version: %d\n", + version.protocol_version); + + /* binder transaction with binderfs binder device passed */ + ksft_inc_pass_cnt(); + + ret = unlink("/dev/binderfs/my-binder"); + if (ret < 0) { + keep ? : rmdir_protect_errno("/dev/binderfs"); + ksft_exit_fail_msg("%s - Failed to delete binder device\n", + strerror(errno)); + } + + /* binder device removal passed */ + ksft_inc_pass_cnt(); + + ret = unlink("/dev/binderfs/binder-control"); + if (!ret) { + keep ? : rmdir_protect_errno("/dev/binderfs"); + ksft_exit_fail_msg("Managed to delete binder-control device\n"); + } else if (errno != EPERM) { + keep ? : rmdir_protect_errno("/dev/binderfs"); + ksft_exit_fail_msg( + "%s - Failed to delete binder-control device but exited with unexpected error code\n", + strerror(errno)); + } + + /* binder-control device removal failed as expected */ + ksft_inc_xfail_cnt(); + +on_error: + ret = umount2("/dev/binderfs", MNT_DETACH); + keep ?: rmdir_protect_errno("/dev/binderfs"); + if (ret < 0) + ksft_exit_fail_msg("%s - Failed to unmount binderfs\n", + strerror(errno)); + + /* binderfs unmount test passed */ + ksft_inc_pass_cnt(); +} + +static void binderfs_test_privileged() +{ + if (geteuid() != 0) + ksft_print_msg( + "Tests are not run as root. Skipping privileged tests\n"); + else + __do_binderfs_test(); +} + +static void binderfs_test_unprivileged() +{ + change_to_userns(); + __do_binderfs_test(); +} + +int main(int argc, char *argv[]) +{ + binderfs_test_privileged(); + binderfs_test_unprivileged(); + ksft_exit_pass(); +} diff --git a/tools/testing/selftests/filesystems/binderfs/config b/tools/testing/selftests/filesystems/binderfs/config new file mode 100644 index 000000000000..02dd6cc9cf99 --- /dev/null +++ b/tools/testing/selftests/filesystems/binderfs/config @@ -0,0 +1,3 @@ +CONFIG_ANDROID=y +CONFIG_ANDROID_BINDERFS=y +CONFIG_ANDROID_BINDER_IPC=y diff --git a/tools/testing/selftests/firmware/config b/tools/testing/selftests/firmware/config index bf634dda0720..913a25a4a32b 100644 --- a/tools/testing/selftests/firmware/config +++ b/tools/testing/selftests/firmware/config @@ -1,5 +1,6 @@ CONFIG_TEST_FIRMWARE=y CONFIG_FW_LOADER=y CONFIG_FW_LOADER_USER_HELPER=y +CONFIG_FW_LOADER_USER_HELPER_FALLBACK=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y diff --git a/tools/testing/selftests/firmware/fw_filesystem.sh b/tools/testing/selftests/firmware/fw_filesystem.sh index a4320c4b44dc..466cf2f91ba0 100755 --- a/tools/testing/selftests/firmware/fw_filesystem.sh +++ b/tools/testing/selftests/firmware/fw_filesystem.sh @@ -155,11 +155,8 @@ read_firmwares() { for i in $(seq 0 3); do config_set_read_fw_idx $i - # Verify the contents are what we expect. - # -Z required for now -- check for yourself, md5sum - # on $FW and DIR/read_firmware will yield the same. Even - # cmp agrees, so something is off. - if ! diff -q -Z "$FW" $DIR/read_firmware 2>/dev/null ; then + # Verify the contents match + if ! diff -q "$FW" $DIR/read_firmware 2>/dev/null ; then echo "request #$i: firmware was not loaded" >&2 exit 1 fi @@ -171,7 +168,7 @@ read_firmwares_expect_nofile() for i in $(seq 0 3); do config_set_read_fw_idx $i # Ensures contents differ - if diff -q -Z "$FW" $DIR/read_firmware 2>/dev/null ; then + if diff -q "$FW" $DIR/read_firmware 2>/dev/null ; then echo "request $i: file was not expected to match" >&2 exit 1 fi diff --git a/tools/testing/selftests/ftrace/config b/tools/testing/selftests/ftrace/config index 07db5ab09cc7..c2c8de4fafff 100644 --- a/tools/testing/selftests/ftrace/config +++ b/tools/testing/selftests/ftrace/config @@ -4,6 +4,12 @@ CONFIG_FUNCTION_PROFILER=y CONFIG_TRACER_SNAPSHOT=y CONFIG_STACK_TRACER=y CONFIG_HIST_TRIGGERS=y +CONFIG_SCHED_TRACER=y CONFIG_PREEMPT_TRACER=y CONFIG_IRQSOFF_TRACER=y CONFIG_PREEMPTIRQ_DELAY_TEST=m +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +CONFIG_SAMPLES=y +CONFIG_SAMPLE_TRACE_PRINTK=m +CONFIG_KALLSYMS_ALL=y diff --git a/tools/testing/selftests/ftrace/ftracetest b/tools/testing/selftests/ftrace/ftracetest index f9a9d424c980..75244db70331 100755 --- a/tools/testing/selftests/ftrace/ftracetest +++ b/tools/testing/selftests/ftrace/ftracetest @@ -60,15 +60,29 @@ parse_opts() { # opts shift 1 ;; --verbose|-v|-vv|-vvv) + if [ $VERBOSE -eq -1 ]; then + usage "--console can not use with --verbose" + fi VERBOSE=$((VERBOSE + 1)) [ $1 = '-vv' ] && VERBOSE=$((VERBOSE + 1)) [ $1 = '-vvv' ] && VERBOSE=$((VERBOSE + 2)) shift 1 ;; + --console) + if [ $VERBOSE -ne 0 ]; then + usage "--console can not use with --verbose" + fi + VERBOSE=-1 + shift 1 + ;; --debug|-d) DEBUG=1 shift 1 ;; + --stop-fail) + STOP_FAILURE=1 + shift 1 + ;; --fail-unsupported) UNSUPPORTED_RESULT=1 shift 1 @@ -117,6 +131,7 @@ KEEP_LOG=0 DEBUG=0 VERBOSE=0 UNSUPPORTED_RESULT=0 +STOP_FAILURE=0 # Parse command-line options parse_opts $* @@ -137,11 +152,33 @@ else date > $LOG_FILE fi +# Define text colors +# Check available colors on the terminal, if any +ncolors=`tput colors 2>/dev/null` +color_reset= +color_red= +color_green= +color_blue= +# If stdout exists and number of colors is eight or more, use them +if [ -t 1 -a "$ncolors" -a "$ncolors" -ge 8 ]; then + color_reset="\e[0m" + color_red="\e[31m" + color_green="\e[32m" + color_blue="\e[34m" +fi + +strip_esc() { + # busybox sed implementation doesn't accept "\x1B", so use [:cntrl:] instead. + sed -E "s/[[:cntrl:]]\[([0-9]{1,2}(;[0-9]{1,2})?)?[m|K]//g" +} + prlog() { # messages - [ -z "$LOG_FILE" ] && echo "$@" || echo "$@" | tee -a $LOG_FILE + echo -e "$@" + [ "$LOG_FILE" ] && echo -e "$@" | strip_esc >> $LOG_FILE } catlog() { #file - [ -z "$LOG_FILE" ] && cat $1 || cat $1 | tee -a $LOG_FILE + cat $1 + [ "$LOG_FILE" ] && cat $1 | strip_esc >> $LOG_FILE } prlog "=== Ftrace unit tests ===" @@ -180,37 +217,37 @@ test_on_instance() { # testfile eval_result() { # sigval case $1 in $PASS) - prlog " [PASS]" + prlog " [${color_green}PASS${color_reset}]" PASSED_CASES="$PASSED_CASES $CASENO" return 0 ;; $FAIL) - prlog " [FAIL]" + prlog " [${color_red}FAIL${color_reset}]" FAILED_CASES="$FAILED_CASES $CASENO" return 1 # this is a bug. ;; $UNRESOLVED) - prlog " [UNRESOLVED]" + prlog " [${color_blue}UNRESOLVED${color_reset}]" UNRESOLVED_CASES="$UNRESOLVED_CASES $CASENO" return 1 # this is a kind of bug.. something happened. ;; $UNTESTED) - prlog " [UNTESTED]" + prlog " [${color_blue}UNTESTED${color_reset}]" UNTESTED_CASES="$UNTESTED_CASES $CASENO" return 0 ;; $UNSUPPORTED) - prlog " [UNSUPPORTED]" + prlog " [${color_blue}UNSUPPORTED${color_reset}]" UNSUPPORTED_CASES="$UNSUPPORTED_CASES $CASENO" return $UNSUPPORTED_RESULT # depends on use case ;; $XFAIL) - prlog " [XFAIL]" + prlog " [${color_red}XFAIL${color_reset}]" XFAILED_CASES="$XFAILED_CASES $CASENO" return 0 ;; *) - prlog " [UNDEFINED]" + prlog " [${color_blue}UNDEFINED${color_reset}]" UNDEFINED_CASES="$UNDEFINED_CASES $CASENO" return 1 # this must be a test bug ;; @@ -269,16 +306,18 @@ __run_test() { # testfile # Run one test case run_test() { # testfile local testname=`basename $1` + testcase $1 if [ ! -z "$LOG_FILE" ] ; then - local testlog=`mktemp $LOG_DIR/${testname}-log.XXXXXX` + local testlog=`mktemp $LOG_DIR/${CASENO}-${testname}-log.XXXXXX` else local testlog=/proc/self/fd/1 fi export TMPDIR=`mktemp -d /tmp/ftracetest-dir.XXXXXX` - testcase $1 echo "execute$INSTANCE: "$1 > $testlog SIG_RESULT=0 - if [ -z "$LOG_FILE" ]; then + if [ $VERBOSE -eq -1 ]; then + __run_test $1 + elif [ -z "$LOG_FILE" ]; then __run_test $1 2>&1 elif [ $VERBOSE -ge 3 ]; then __run_test $1 | tee -a $testlog 2>&1 @@ -304,6 +343,10 @@ run_test() { # testfile # Main loop for t in $TEST_CASES; do run_test $t + if [ $STOP_FAILURE -ne 0 -a $TOTAL_RESULT -ne 0 ]; then + echo "A failure detected. Stop test." + exit 1 + fi done # Test on instance loop @@ -315,7 +358,12 @@ for t in $TEST_CASES; do run_test $t rmdir $TRACING_DIR TRACING_DIR=$SAVED_TRACING_DIR + if [ $STOP_FAILURE -ne 0 -a $TOTAL_RESULT -ne 0 ]; then + echo "A failure detected. Stop test." + exit 1 + fi done +(cd $TRACING_DIR; initialize_ftrace) # for cleanup prlog "" prlog "# of passed: " `echo $PASSED_CASES | wc -w` diff --git a/tools/testing/selftests/ftrace/test.d/00basic/ringbuffer_size.tc b/tools/testing/selftests/ftrace/test.d/00basic/ringbuffer_size.tc new file mode 100644 index 000000000000..ab70f0077c35 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/00basic/ringbuffer_size.tc @@ -0,0 +1,22 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: Change the ringbuffer size +# flags: instance + +rb_size_test() { +ORIG=`cat buffer_size_kb` + +expr $ORIG / 2 > buffer_size_kb + +expr $ORIG \* 2 > buffer_size_kb + +echo $ORIG > buffer_size_kb +} + +rb_size_test + +: "If per-cpu buffer is supported, imbalance it" +if [ -d per_cpu/cpu0 ]; then + cd per_cpu/cpu0 + rb_size_test +fi diff --git a/tools/testing/selftests/ftrace/test.d/00basic/trace_pipe.tc b/tools/testing/selftests/ftrace/test.d/00basic/trace_pipe.tc new file mode 100644 index 000000000000..5058fbcfd90f --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/00basic/trace_pipe.tc @@ -0,0 +1,16 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: trace_pipe and trace_marker +# flags: instance + +[ ! -f trace_marker ] && exit_unsupported + +echo "test input 1" > trace_marker + +: "trace interface never consume the ring buffer" +grep -q "test input 1" trace +grep -q "test input 1" trace + +: "trace interface never consume the ring buffer" +head -n 1 trace_pipe | grep -q "test input 1" +! grep -q "test input 1" trace diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc new file mode 100644 index 000000000000..c6d8387dbbb8 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc @@ -0,0 +1,30 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: Generic dynamic event - add/remove kprobe events + +[ -f dynamic_events ] || exit_unsupported + +grep -q "place: \[<module>:\]<symbol>" README || exit_unsupported +grep -q "place (kretprobe): \[<module>:\]<symbol>" README || exit_unsupported + +echo 0 > events/enable +echo > dynamic_events + +PLACE=_do_fork + +echo "p:myevent1 $PLACE" >> dynamic_events +echo "r:myevent2 $PLACE" >> dynamic_events + +grep -q myevent1 dynamic_events +grep -q myevent2 dynamic_events +test -d events/kprobes/myevent1 +test -d events/kprobes/myevent2 + +echo "-:myevent2" >> dynamic_events + +grep -q myevent1 dynamic_events +! grep -q myevent2 dynamic_events + +echo > dynamic_events + +clear_trace diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_synth.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_synth.tc new file mode 100644 index 000000000000..62b77b5941d0 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_synth.tc @@ -0,0 +1,27 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: Generic dynamic event - add/remove synthetic events + +[ -f dynamic_events ] || exit_unsupported + +grep -q "s:\[synthetic/\]" README || exit_unsupported + +echo 0 > events/enable +echo > dynamic_events + +echo "s:latency1 u64 lat; pid_t pid;" >> dynamic_events +echo "s:latency2 u64 lat; pid_t pid;" >> dynamic_events + +grep -q latency1 dynamic_events +grep -q latency2 dynamic_events +test -d events/synthetic/latency1 +test -d events/synthetic/latency2 + +echo "-:synthetic/latency2" >> dynamic_events + +grep -q latency1 dynamic_events +! grep -q latency2 dynamic_events + +echo > dynamic_events + +clear_trace diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc b/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc new file mode 100644 index 000000000000..e0842109cb57 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc @@ -0,0 +1,50 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: Generic dynamic event - selective clear (compatibility) + +[ -f dynamic_events ] || exit_unsupported + +grep -q "place: \[<module>:\]<symbol>" README || exit_unsupported +grep -q "place (kretprobe): \[<module>:\]<symbol>" README || exit_unsupported + +grep -q "s:\[synthetic/\]" README || exit_unsupported + +[ -f synthetic_events ] || exit_unsupported +[ -f kprobe_events ] || exit_unsupported + +echo 0 > events/enable +echo > dynamic_events + +PLACE=_do_fork + +setup_events() { +echo "p:myevent1 $PLACE" >> dynamic_events +echo "s:latency1 u64 lat; pid_t pid;" >> dynamic_events +echo "r:myevent2 $PLACE" >> dynamic_events +echo "s:latency2 u64 lat; pid_t pid;" >> dynamic_events + +grep -q myevent1 dynamic_events +grep -q myevent2 dynamic_events +grep -q latency1 dynamic_events +grep -q latency2 dynamic_events +} + +setup_events +echo > synthetic_events + +grep -q myevent1 dynamic_events +grep -q myevent2 dynamic_events +! grep -q latency1 dynamic_events +! grep -q latency2 dynamic_events + +echo > dynamic_events + +setup_events +echo > kprobe_events + +! grep -q myevent1 dynamic_events +! grep -q myevent2 dynamic_events +grep -q latency1 dynamic_events +grep -q latency2 dynamic_events + +echo > dynamic_events diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc b/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc new file mode 100644 index 000000000000..901922e97878 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc @@ -0,0 +1,49 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: Generic dynamic event - generic clear event + +[ -f dynamic_events ] || exit_unsupported + +grep -q "place: \[<module>:\]<symbol>" README || exit_unsupported +grep -q "place (kretprobe): \[<module>:\]<symbol>" README || exit_unsupported + +grep -q "s:\[synthetic/\]" README || exit_unsupported + +echo 0 > events/enable +echo > dynamic_events + +PLACE=_do_fork + +setup_events() { +echo "p:myevent1 $PLACE" >> dynamic_events +echo "s:latency1 u64 lat; pid_t pid;" >> dynamic_events +echo "r:myevent2 $PLACE" >> dynamic_events +echo "s:latency2 u64 lat; pid_t pid;" >> dynamic_events + +grep -q myevent1 dynamic_events +grep -q myevent2 dynamic_events +grep -q latency1 dynamic_events +grep -q latency2 dynamic_events +} + +setup_events + +echo "!p:myevent1 $PLACE" >> dynamic_events +! grep -q myevent1 dynamic_events +grep -q myevent2 dynamic_events +grep -q latency1 dynamic_events +grep -q latency2 dynamic_events + +echo "!s:latency1 u64 lat; pid_t pid;" >> dynamic_events +grep -q myevent2 dynamic_events +! grep -q latency1 dynamic_events +grep -q latency2 dynamic_events + +echo "!r:myevent2 $PLACE" >> dynamic_events +! grep -q myevent2 dynamic_events +grep -q latency2 dynamic_events + +echo "!s:latency2 u64 lat; pid_t pid;" >> dynamic_events +! grep -q latency2 dynamic_events + +echo > dynamic_events diff --git a/tools/testing/selftests/ftrace/test.d/event/event-enable.tc b/tools/testing/selftests/ftrace/test.d/event/event-enable.tc index 9daf034186f5..dfb0d5122f7b 100644 --- a/tools/testing/selftests/ftrace/test.d/event/event-enable.tc +++ b/tools/testing/selftests/ftrace/test.d/event/event-enable.tc @@ -9,23 +9,15 @@ do_reset() { } fail() { #msg - do_reset echo $1 exit_fail } -yield() { - ping localhost -c 1 || sleep .001 || usleep 1 || sleep 1 -} - if [ ! -f set_event -o ! -d events/sched ]; then echo "event tracing is not supported" exit_unsupported fi -reset_tracer -do_reset - echo 'sched:sched_switch' > set_event yield @@ -57,6 +49,4 @@ if [ $count -ne 0 ]; then fail "sched_switch events should not be recorded" fi -do_reset - exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/event/event-pid.tc b/tools/testing/selftests/ftrace/test.d/event/event-pid.tc index 132478b305c2..f9cb214220b1 100644 --- a/tools/testing/selftests/ftrace/test.d/event/event-pid.tc +++ b/tools/testing/selftests/ftrace/test.d/event/event-pid.tc @@ -16,10 +16,6 @@ fail() { #msg exit_fail } -yield() { - ping localhost -c 1 || sleep .001 || usleep 1 || sleep 1 -} - if [ ! -f set_event -o ! -d events/sched ]; then echo "event tracing is not supported" exit_unsupported @@ -30,8 +26,7 @@ if [ ! -f set_event_pid ]; then exit_unsupported fi -reset_tracer -do_reset +echo 0 > options/event-fork echo 1 > events/sched/sched_switch/enable @@ -47,6 +42,7 @@ do_reset read mypid rest < /proc/self/stat echo $mypid > set_event_pid +grep -q $mypid set_event_pid echo 'sched:sched_switch' > set_event yield diff --git a/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc b/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc index 6a37a8642ee6..83a8c571e93a 100644 --- a/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc +++ b/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc @@ -9,23 +9,15 @@ do_reset() { } fail() { #msg - do_reset echo $1 exit_fail } -yield() { - ping localhost -c 1 || sleep .001 || usleep 1 || sleep 1 -} - if [ ! -f set_event -o ! -d events/sched ]; then echo "event tracing is not supported" exit_unsupported fi -reset_tracer -do_reset - echo 'sched:*' > set_event yield @@ -57,6 +49,4 @@ if [ $count -ne 0 ]; then fail "any of scheduler events should not be recorded" fi -do_reset - exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/event/toplevel-enable.tc b/tools/testing/selftests/ftrace/test.d/event/toplevel-enable.tc index 4e9b6e2c0219..84d7bda08d2a 100644 --- a/tools/testing/selftests/ftrace/test.d/event/toplevel-enable.tc +++ b/tools/testing/selftests/ftrace/test.d/event/toplevel-enable.tc @@ -8,23 +8,15 @@ do_reset() { } fail() { #msg - do_reset echo $1 exit_fail } -yield() { - ping localhost -c 1 || sleep .001 || usleep 1 || sleep 1 -} - if [ ! -f available_events -o ! -f set_event -o ! -d events ]; then echo "event tracing is not supported" exit_unsupported fi -reset_tracer -do_reset - echo '*:*' > set_event yield @@ -60,6 +52,4 @@ if [ $count -ne 0 ]; then fail "any of events should not be recorded" fi -do_reset - exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/event/trace_printk.tc b/tools/testing/selftests/ftrace/test.d/event/trace_printk.tc new file mode 100644 index 000000000000..b02550b42be9 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/event/trace_printk.tc @@ -0,0 +1,27 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: Test trace_printk from module + +rmmod trace-printk ||: +if ! modprobe trace-printk ; then + echo "No trace-printk sample module - please make CONFIG_SAMPLE_TRACE_PRINTK=m" + exit_unresolved; +fi + +echo "Waiting for irq work" +sleep 1 + +grep -q ": This .* trace_bputs" trace +grep -q ": This .* trace_puts" trace +grep -q ": This .* trace_bprintk" trace +grep -q ": This .* trace_printk" trace + +grep -q ": (irq) .* trace_bputs" trace +grep -q ": (irq) .* trace_puts" trace +grep -q ": (irq) .* trace_bprintk" trace +grep -q ": (irq) .* trace_printk" trace + +grep -q "This is a %s that will use trace_bprintk" printk_formats +grep -q "(irq) This is a static string that will use trace_bputs" printk_formats + +rmmod trace-printk ||: diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc index 1aec99d108eb..aefab0c66d54 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc @@ -16,13 +16,9 @@ if [ ! -f set_ftrace_filter ]; then fi do_reset() { - reset_tracer if [ -e /proc/sys/kernel/stack_tracer_enabled ]; then echo 0 > /proc/sys/kernel/stack_tracer_enabled fi - enable_tracing - clear_trace - echo > set_ftrace_filter } fail() { # msg diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter.tc b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter.tc index 9f8d27ca39cf..c8a5209f2119 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter.tc @@ -9,14 +9,7 @@ if ! grep -q function_graph available_tracers; then exit_unsupported fi -do_reset() { - reset_tracer - enable_tracing - clear_trace -} - fail() { # msg - do_reset echo $1 exit_fail } @@ -48,6 +41,4 @@ if [ $count -eq 0 ]; then fail "No schedule traces found?" fi -do_reset - exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc index 524ce24b3c22..64cfcc75e3c1 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc @@ -35,12 +35,6 @@ if [ $do_function_fork -eq 1 ]; then fi do_reset() { - reset_tracer - clear_trace - enable_tracing - echo > set_ftrace_filter - echo > set_ftrace_pid - if [ $do_function_fork -eq 0 ]; then return fi @@ -54,10 +48,6 @@ fail() { # msg exit_fail } -yield() { - ping localhost -c 1 || sleep .001 || usleep 1 || sleep 1 -} - do_test() { disable_tracing diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc new file mode 100644 index 000000000000..36fb59f886ea --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc @@ -0,0 +1,12 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: ftrace - stacktrace filter command +# flags: instance + +echo _do_fork:stacktrace >> set_ftrace_filter + +grep -q "_do_fork:stacktrace:unlimited" set_ftrace_filter + +(echo "forked"; sleep 1) + +grep -q "<stack trace>" trace diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_cpumask.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_cpumask.tc new file mode 100644 index 000000000000..86a1f07ef2ca --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_cpumask.tc @@ -0,0 +1,42 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: ftrace - function trace with cpumask + +if ! which nproc ; then + nproc() { + ls -d /sys/devices/system/cpu/cpu[0-9]* | wc -l + } +fi + +NP=`nproc` + +if [ $NP -eq 1 ] ;then + echo "We can not test cpumask on UP environment" + exit_unresolved +fi + +ORIG_CPUMASK=`cat tracing_cpumask` + +do_reset() { + echo $ORIG_CPUMASK > tracing_cpumask +} + +echo 0 > tracing_on +echo > trace +: "Bitmask only record on CPU1" +echo 2 > tracing_cpumask +MASK=0x`cat tracing_cpumask` +test `printf "%d" $MASK` -eq 2 || do_reset + +echo function > current_tracer +echo 1 > tracing_on +(echo "forked") +echo 0 > tracing_on + +: "Check CPU1 events are recorded" +grep -q -e "\[001\]" trace || do_reset + +: "There should be No other cpu events" +! grep -qv -e "\[001\]" -e "^#" trace || do_reset + +do_reset diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc index 6fed4cf2db81..ca2ffd7957f9 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc @@ -25,15 +25,12 @@ do_reset() { } fail() { # mesg - do_reset echo $1 exit_fail } SLEEP_TIME=".1" -do_reset - echo "Testing function probes with events:" EVENT="sched:sched_switch" diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_mod_trace.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_mod_trace.tc new file mode 100644 index 000000000000..9330c873f9fe --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_mod_trace.tc @@ -0,0 +1,24 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: ftrace - function trace on module + +[ ! -f set_ftrace_filter ] && exit_unsupported + +: "mod: allows to filter a non exist function" +echo 'non_exist_func:mod:non_exist_module' > set_ftrace_filter +grep -q "non_exist_func" set_ftrace_filter + +: "mod: on exist module" +echo '*:mod:trace_printk' > set_ftrace_filter +if ! modprobe trace-printk ; then + echo "No trace-printk sample module - please make CONFIG_SAMPLE_TRACE_PRINTK= +m" + exit_unresolved; +fi + +: "Wildcard should be resolved after loading module" +grep -q "trace_printk_irq_work" set_ftrace_filter + +: "After removing the filter becomes empty" +rmmod trace_printk +test `cat set_ftrace_filter | wc -l` -eq 0 diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_profile_stat.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_profile_stat.tc new file mode 100644 index 000000000000..0d501058aa75 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_profile_stat.tc @@ -0,0 +1,22 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: ftrace - function profiling + +[ ! -f function_profile_enabled ] && exit_unsupported + +: "Enable function profile" +echo 1 > function_profile_enabled + +: "Profile must be updated" +cp trace_stat/function0 $TMPDIR/ +( echo "forked"; sleep 1 ) +: "diff returns 0 if there is no difference" +! diff trace_stat/function0 $TMPDIR/function0 + +echo 0 > function_profile_enabled + +: "Profile must NOT be updated" +cp trace_stat/function0 $TMPDIR/ +( echo "forked"; sleep 1 ) +: "diff returns 0 if there is no difference" +diff trace_stat/function0 $TMPDIR/function0 diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc index b2d5a8febfe8..dfbae637c60c 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc @@ -29,8 +29,6 @@ if [ ! -f function_profile_enabled ]; then fi fail() { # mesg - reset_tracer - echo > set_ftrace_filter echo $1 exit_fail } @@ -76,6 +74,4 @@ if ! grep -v -e '^#' -e 'schedule' trace > /dev/null; then fail "no other functions besides schedule was found" fi -reset_tracer - exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc index 68e7a48f5828..51f6e6146bd9 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc @@ -15,22 +15,11 @@ if [ ! -f set_ftrace_filter ]; then exit_unsupported fi -do_reset() { - reset_tracer - reset_ftrace_filter - disable_events - clear_trace - enable_tracing -} - fail() { # mesg - do_reset echo $1 exit_fail } -do_reset - FILTER=set_ftrace_filter FUNC1="schedule" FUNC2="do_softirq" @@ -165,6 +154,4 @@ test_actual rm $TMPDIR/expected rm $TMPDIR/actual -do_reset - exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_stack_tracer.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_stack_tracer.tc new file mode 100644 index 000000000000..b414f0e3c646 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_stack_tracer.tc @@ -0,0 +1,39 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: ftrace - Max stack tracer +# Test the basic function of max-stack usage tracing + +if [ ! -f stack_trace ]; then + echo "Max stack tracer is not supported - please make CONFIG_STACK_TRACER=y" + exit_unsupported +fi + +echo > stack_trace_filter +echo 0 > stack_max_size +echo 1 > /proc/sys/kernel/stack_tracer_enabled + +: "Fork and wait for the first entry become !lock" +timeout=10 +while [ $timeout -ne 0 ]; do + ( echo "forked" ) + FL=`grep " 0)" stack_trace` + echo $FL | grep -q "lock" || break; + timeout=$((timeout - 1)) +done +echo 0 > /proc/sys/kernel/stack_tracer_enabled + +echo '*lock*' > stack_trace_filter +test `cat stack_trace_filter | wc -l` -eq `grep lock stack_trace_filter | wc -l` + +echo 0 > stack_max_size +echo 1 > /proc/sys/kernel/stack_tracer_enabled + +: "Fork and always the first entry including lock" +timeout=10 +while [ $timeout -ne 0 ]; do + ( echo "forked" ) + FL=`grep " 0)" stack_trace` + echo $FL | grep -q "lock" + timeout=$((timeout - 1)) +done +echo 0 > /proc/sys/kernel/stack_tracer_enabled diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_traceonoff_triggers.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_traceonoff_triggers.tc index f6d9ac73268a..0c04282d33dd 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func_traceonoff_triggers.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_traceonoff_triggers.tc @@ -16,24 +16,13 @@ if [ ! -f set_ftrace_filter ]; then exit_unsupported fi -do_reset() { - reset_ftrace_filter - reset_tracer - disable_events - clear_trace - enable_tracing -} - fail() { # mesg - do_reset echo $1 exit_fail } SLEEP_TIME=".1" -do_reset - echo "Testing function probes with enabling disabling tracing:" cnt_trace() { diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions index e4645d5e3126..7b96e80e6b8a 100644 --- a/tools/testing/selftests/ftrace/test.d/functions +++ b/tools/testing/selftests/ftrace/test.d/functions @@ -89,12 +89,23 @@ initialize_ftrace() { # Reset ftrace to initial-state reset_tracer reset_trigger reset_events_filter + reset_ftrace_filter disable_events echo > set_event_pid # event tracer is always on + echo > set_ftrace_pid [ -f set_ftrace_filter ] && echo | tee set_ftrace_* [ -f set_graph_function ] && echo | tee set_graph_* [ -f stack_trace_filter ] && echo > stack_trace_filter [ -f kprobe_events ] && echo > kprobe_events [ -f uprobe_events ] && echo > uprobe_events + [ -f synthetic_events ] && echo > synthetic_events + [ -f snapshot ] && echo 0 > snapshot + clear_trace enable_tracing } + +LOCALHOST=127.0.0.1 + +yield() { + ping $LOCALHOST -c 1 || sleep .001 || usleep 1 || sleep 1 +} diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc b/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc index 4604d2103c89..bb1eb5a7c64e 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc @@ -4,10 +4,7 @@ [ -f kprobe_events ] || exit_unsupported # this is configurable -echo 0 > events/enable -echo > kprobe_events echo p:myevent _do_fork > kprobe_events grep myevent kprobe_events test -d events/kprobes/myevent echo > kprobe_events -clear_trace diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc b/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc index bbc443a9190c..442c1a8c5edf 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc @@ -4,12 +4,9 @@ [ -f kprobe_events ] || exit_unsupported -echo 0 > events/enable -echo > kprobe_events echo p:myevent _do_fork > kprobe_events test -d events/kprobes/myevent echo 1 > events/kprobes/myevent/enable echo > kprobe_events && exit_fail # this must fail echo 0 > events/kprobes/myevent/enable echo > kprobe_events # this must succeed -clear_trace diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc index 8b43c6804fc3..bcdecf80a8f1 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc @@ -4,13 +4,15 @@ [ -f kprobe_events ] || exit_unsupported # this is configurable -echo 0 > events/enable -echo > kprobe_events echo 'p:testprobe _do_fork $stack $stack0 +0($stack)' > kprobe_events -grep testprobe kprobe_events +grep testprobe kprobe_events | grep -q 'arg1=\$stack arg2=\$stack0 arg3=+0(\$stack)' test -d events/kprobes/testprobe + echo 1 > events/kprobes/testprobe/enable ( echo "forked") +grep testprobe trace | grep '_do_fork' | \ + grep -q 'arg1=0x[[:xdigit:]]* arg2=0x[[:xdigit:]]* arg3=0x[[:xdigit:]]*$' + echo 0 > events/kprobes/testprobe/enable echo "-:testprobe" >> kprobe_events clear_trace diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc new file mode 100644 index 000000000000..15c1f70fcaf9 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc @@ -0,0 +1,17 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: Kprobe event with comm arguments + +[ -f kprobe_events ] || exit_unsupported # this is configurable + +grep -A1 "fetcharg:" README | grep -q "\$comm" || exit_unsupported # this is too old + +echo 'p:testprobe _do_fork comm=$comm ' > kprobe_events +grep testprobe kprobe_events | grep -q 'comm=$comm' +test -d events/kprobes/testprobe + +echo 1 > events/kprobes/testprobe/enable +( echo "forked") +grep testprobe trace | grep -q 'comm=".*"' + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc index 1ad70cdaf442..46e7744f8358 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc @@ -4,9 +4,6 @@ [ -f kprobe_events ] || exit_unsupported # this is configurable -echo 0 > events/enable -echo > kprobe_events - case `uname -m` in x86_64) ARG1=%di @@ -44,5 +41,3 @@ echo 1 > events/kprobes/testprobe/enable echo "p:test _do_fork" >> kprobe_events grep -qe "testprobe.* arg1=\"test\" arg2=\"test\"" trace -echo 0 > events/enable -echo > kprobe_events diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc new file mode 100644 index 000000000000..2b6dd33f9076 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc @@ -0,0 +1,39 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: Kprobe event symbol argument + +[ -f kprobe_events ] || exit_unsupported # this is configurable + +SYMBOL="linux_proc_banner" + +if [ ! -f /proc/kallsyms ]; then + echo "Can not check the target symbol - please enable CONFIG_KALLSYMS" + exit_unresolved +elif ! grep "$SYMBOL\$" /proc/kallsyms; then + echo "Linux banner is not exported - please enable CONFIG_KALLSYMS_ALL" + exit_unresolved +fi + +: "Test get basic types symbol argument" +echo "p:testprobe_u _do_fork arg1=@linux_proc_banner:u64 arg2=@linux_proc_banner:u32 arg3=@linux_proc_banner:u16 arg4=@linux_proc_banner:u8" > kprobe_events +echo "p:testprobe_s _do_fork arg1=@linux_proc_banner:s64 arg2=@linux_proc_banner:s32 arg3=@linux_proc_banner:s16 arg4=@linux_proc_banner:s8" >> kprobe_events +if grep -q "x8/16/32/64" README; then + echo "p:testprobe_x _do_fork arg1=@linux_proc_banner:x64 arg2=@linux_proc_banner:x32 arg3=@linux_proc_banner:x16 arg4=@linux_proc_banner:x8" >> kprobe_events +fi +echo "p:testprobe_bf _do_fork arg1=@linux_proc_banner:b8@4/32" >> kprobe_events +echo 1 > events/kprobes/enable +(echo "forked") +echo 0 > events/kprobes/enable +grep "testprobe_[usx]:.* arg1=.* arg2=.* arg3=.* arg4=.*" trace +grep "testprobe_bf:.* arg1=.*" trace + +: "Test get string symbol argument" +echo "p:testprobe_str _do_fork arg1=@linux_proc_banner:string" > kprobe_events +echo 1 > events/kprobes/enable +(echo "forked") +echo 0 > events/kprobes/enable +RESULT=`grep "testprobe_str" trace | sed -e 's/.* arg1=\(.*\)/\1/'` + +RESULT=`echo $RESULT | sed -e 's/.* \((.*)\) \((.*)\) .*/\1 \2/'` +ORIG=`cat /proc/version | sed -e 's/.* \((.*)\) \((.*)\) .*/\1 \2/'` +test "$RESULT" = "$ORIG" diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc index d026ff4e562f..6f0f19953193 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc @@ -6,9 +6,6 @@ grep "x8/16/32/64" README > /dev/null || exit_unsupported # version issue -echo 0 > events/enable -echo > kprobe_events - PROBEFUNC="vfs_read" GOODREG= BADREG= @@ -78,8 +75,11 @@ test_badarg "\$stackp" "\$stack0+10" "\$stack1-10" echo "r ${PROBEFUNC} \$retval" > kprobe_events ! echo "p ${PROBEFUNC} \$retval" > kprobe_events +# $comm was introduced in 4.8, older kernels reject it. +if grep -A1 "fetcharg:" README | grep -q '\$comm' ; then : "Comm access" test_goodarg "\$comm" +fi : "Indirect memory access" test_goodarg "+0(${GOODREG})" "-0(${GOODREG})" "+10(\$stack)" \ @@ -100,5 +100,3 @@ test_badarg "${GOODREG}::${GOODTYPE}" "${GOODREG}:${BADTYPE}" \ test_goodarg "\$comm:string" "+0(\$stack):string" test_badarg "\$comm:x64" "\$stack:string" "${GOODREG}:string" - -echo > kprobe_events diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc index 2a1755bfc290..1bcb67dcae26 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc @@ -6,33 +6,45 @@ grep "x8/16/32/64" README > /dev/null || exit_unsupported # version issue -echo 0 > events/enable -echo > kprobe_events -enable_tracing - -echo 'p:testprobe _do_fork $stack0:s32 $stack0:u32 $stack0:x32 $stack0:b8@4/32' > kprobe_events -grep testprobe kprobe_events -test -d events/kprobes/testprobe - -echo 1 > events/kprobes/testprobe/enable -( echo "forked") -echo 0 > events/kprobes/testprobe/enable -ARGS=`tail -n 1 trace | sed -e 's/.* arg1=\(.*\) arg2=\(.*\) arg3=\(.*\) arg4=\(.*\)/\1 \2 \3 \4/'` +gen_event() { # Bitsize + echo "p:testprobe _do_fork \$stack0:s$1 \$stack0:u$1 \$stack0:x$1 \$stack0:b4@4/$1" +} -check_types() { - X1=`printf "%x" $1 | tail -c 8` +check_types() { # s-type u-type x-type bf-type width + test $# -eq 5 + CW=$5 + CW=$((CW / 4)) + X1=`printf "%x" $1 | tail -c ${CW}` X2=`printf "%x" $2` X3=`printf "%x" $3` test $X1 = $X2 test $X2 = $X3 test 0x$X3 = $3 - B4=`printf "%02x" $4` - B3=`echo -n $X3 | tail -c 3 | head -c 2` + B4=`printf "%1x" $4` + B3=`printf "%03x" 0x$X3 | tail -c 2 | head -c 1` test $B3 = $B4 } -check_types $ARGS -echo "-:testprobe" >> kprobe_events -clear_trace -test -d events/kprobes/testprobe && exit_fail || exit_pass +for width in 64 32 16 8; do + : "Add new event with basic types" + gen_event $width > kprobe_events + grep testprobe kprobe_events + test -d events/kprobes/testprobe + + : "Trace the event" + echo 1 > events/kprobes/testprobe/enable + ( echo "forked") + echo 0 > events/kprobes/testprobe/enable + + : "Confirm the arguments is recorded in given types correctly" + ARGS=`grep "testprobe" trace | sed -e 's/.* arg1=\(.*\) arg2=\(.*\) arg3=\(.*\) arg4=\(.*\)/\1 \2 \3 \4/'` + check_types $ARGS $width + + : "Clear event for next loop" + echo "-:testprobe" >> kprobe_events + clear_trace + +done + +exit_pass diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc index 2724a1068cb1..3fb70e01b1fe 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc @@ -4,9 +4,6 @@ [ -f kprobe_events ] || exit_unsupported # this is configurable -disable_events -echo > kprobe_events - :;: "Add an event on function without name" ;: FUNC=`grep " [tT] .*vfs_read$" /proc/kallsyms | tail -n 1 | cut -f 3 -d " "` @@ -33,5 +30,3 @@ echo "p $FUNC" > kprobe_events EVENT=`grep $FUNC kprobe_events | cut -f 1 -d " " | cut -f 2 -d:` [ "x" != "x$EVENT" ] || exit_failure test -d events/$EVENT || exit_failure - -echo > kprobe_events diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc index cc4cac0e60f2..492426e95e09 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc @@ -8,8 +8,6 @@ grep function available_tracers || exit_unsupported # this is configurable # prepare echo nop > current_tracer echo _do_fork > set_ftrace_filter -echo 0 > events/enable -echo > kprobe_events echo 'p:testprobe _do_fork' > kprobe_events # kprobe on / ftrace off @@ -47,10 +45,3 @@ echo > trace ( echo "forked") grep testprobe trace ! grep '_do_fork <-' trace - -# cleanup -echo nop > current_tracer -echo > set_ftrace_filter -echo 0 > events/kprobes/testprobe/enable -echo > kprobe_events -echo > trace diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_module.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_module.tc index 1e9f75f7a30f..d861bd776c5e 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_module.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_module.tc @@ -4,14 +4,18 @@ [ -f kprobe_events ] || exit_unsupported # this is configurable -disable_events -echo > kprobe_events +rmmod trace-printk ||: +if ! modprobe trace-printk ; then + echo "No trace-printk sample module - please make CONFIG_SAMPLE_TRACE_PRINTK= +m" + exit_unresolved; +fi + +MOD=trace_printk +FUNC=trace_printk_irq_work :;: "Add an event on a module function without specifying event name" ;: -MOD=`lsmod | head -n 2 | tail -n 1 | cut -f1 -d" "` -FUNC=`grep -m 1 ".* t .*\\[$MOD\\]" /proc/kallsyms | xargs | cut -f3 -d" "` -[ "x" != "x$MOD" -a "y" != "y$FUNC" ] || exit_unresolved echo "p $MOD:$FUNC" > kprobe_events PROBE_NAME=`echo $MOD:$FUNC | tr ".:" "_"` test -d events/kprobes/p_${PROBE_NAME}_0 || exit_failure @@ -26,4 +30,24 @@ test -d events/kprobes/event1 || exit_failure echo "p:kprobes1/event1 $MOD:$FUNC" > kprobe_events test -d events/kprobes1/event1 || exit_failure -echo > kprobe_events +:;: "Remove target module, but event still be there" ;: +if ! rmmod trace-printk ; then + echo "Failed to unload module - please enable CONFIG_MODULE_UNLOAD" + exit_unresolved; +fi +test -d events/kprobes1/event1 + +:;: "Check posibility to defining events on unloaded module";: +echo "p:event2 $MOD:$FUNC" >> kprobe_events + +:;: "Target is gone, but we can prepare for next time";: +echo 1 > events/kprobes1/event1/enable + +:;: "Load module again, which means the event1 should be recorded";: +modprobe trace-printk +grep "event1:" trace + +:;: "Remove the module again and check the event is not locked" +rmmod trace-printk +echo 0 > events/kprobes1/event1/enable +echo "-:kprobes1/event1" >> kprobe_events diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc index 321954683aaa..ac9ab4a12e53 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc @@ -4,13 +4,16 @@ [ -f kprobe_events ] || exit_unsupported # this is configurable -echo 0 > events/enable -echo > kprobe_events +# Add new kretprobe event echo 'r:testprobe2 _do_fork $retval' > kprobe_events -grep testprobe2 kprobe_events +grep testprobe2 kprobe_events | grep -q 'arg1=\$retval' test -d events/kprobes/testprobe2 + echo 1 > events/kprobes/testprobe2/enable ( echo "forked") + +cat trace | grep testprobe2 | grep -q '<- _do_fork' + echo 0 > events/kprobes/testprobe2/enable echo '-:testprobe2' >> kprobe_events clear_trace diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_maxactive.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_maxactive.tc index 7c0290684c43..8e05b178519a 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_maxactive.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_maxactive.tc @@ -5,8 +5,6 @@ [ -f kprobe_events ] || exit_unsupported # this is configurable grep -q 'r\[maxactive\]' README || exit_unsupported # this is older version -echo > kprobe_events - # Test if we successfully reject unknown messages if echo 'a:myprobeaccept inet_csk_accept' > kprobe_events; then false; else true; fi @@ -37,5 +35,3 @@ echo > kprobe_events echo 'r10 inet_csk_accept' > kprobe_events grep inet_csk_accept kprobe_events echo > kprobe_events - -clear_trace diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc index ce361b9d62cf..5862eee91e1d 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc @@ -12,11 +12,6 @@ case `uname -m` in *) OFFS=0;; esac -if [ -d events/kprobes ]; then - echo 0 > events/kprobes/enable - echo > kprobe_events -fi - N=0 echo "Setup up kprobes on first available 256 text symbols" grep -i " t " /proc/kallsyms | cut -f3 -d" " | grep -v .*\\..* | \ diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/probepoint.tc b/tools/testing/selftests/ftrace/test.d/kprobe/probepoint.tc index 519d2763f5d2..a902aa0aaabc 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/probepoint.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/probepoint.tc @@ -30,8 +30,6 @@ if [ `printf "%x" -1 | wc -c` != 9 ]; then UINT_TEST=yes fi -echo 0 > events/enable -echo > kprobe_events echo "p:testprobe ${TARGET_FUNC}" > kprobe_events echo "p:testprobe ${TARGET}" > kprobe_events echo "p:testprobe ${TARGET_FUNC}${NEXT}" > kprobe_events @@ -39,5 +37,3 @@ echo "p:testprobe ${TARGET_FUNC}${NEXT}" > kprobe_events if [ "${UINT_TEST}" = yes ]; then ! echo "p:testprobe ${TARGET_FUNC}${OVERFLOW}" > kprobe_events fi -echo > kprobe_events -clear_trace diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc b/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc new file mode 100644 index 000000000000..0384b525cdee --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc @@ -0,0 +1,15 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: Kprobe dynamic event - adding and removing + +[ -f kprobe_events ] || exit_unsupported # this is configurable + +! grep -q 'myevent' kprobe_profile +echo p:myevent _do_fork > kprobe_events +grep -q 'myevent[[:space:]]*0[[:space:]]*0$' kprobe_profile +echo 1 > events/kprobes/myevent/enable +( echo "forked" ) +grep -q 'myevent[[:space:]]*[[:digit:]]*[[:space:]]*0$' kprobe_profile +echo 0 > events/kprobes/myevent/enable +echo > kprobe_events +! grep -q 'myevent' kprobe_profile diff --git a/tools/testing/selftests/ftrace/test.d/template b/tools/testing/selftests/ftrace/test.d/template index 5c39ceb18a0d..e1a5d14c4eaf 100644 --- a/tools/testing/selftests/ftrace/test.d/template +++ b/tools/testing/selftests/ftrace/test.d/template @@ -1,4 +1,5 @@ #!/bin/sh +# SPDX-License-Identifier: GPL-2.0 # description: %HERE DESCRIBE WHAT THIS DOES% # you have to add ".tc" extention for your testcase file # Note that all tests are run with "errexit" option. diff --git a/tools/testing/selftests/ftrace/test.d/tracer/wakeup.tc b/tools/testing/selftests/ftrace/test.d/tracer/wakeup.tc new file mode 100644 index 000000000000..b0893d7edda3 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/tracer/wakeup.tc @@ -0,0 +1,25 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: Test wakeup tracer + +if ! which chrt ; then + echo "chrt is not found. This test requires nice command." + exit_unresolved +fi + +if ! grep -wq "wakeup" available_tracers ; then + echo "wakeup tracer is not supported" + exit_unsupported +fi + +echo wakeup > current_tracer +echo 1 > tracing_on +echo 0 > tracing_max_latency + +: "Wakeup higher priority task" +chrt -f 5 sleep 1 + +echo 0 > tracing_on +grep '+ \[[[:digit:]]*\]' trace +grep '==> \[[[:digit:]]*\]' trace + diff --git a/tools/testing/selftests/ftrace/test.d/tracer/wakeup_rt.tc b/tools/testing/selftests/ftrace/test.d/tracer/wakeup_rt.tc new file mode 100644 index 000000000000..b9b6669a623b --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/tracer/wakeup_rt.tc @@ -0,0 +1,25 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: Test wakeup RT tracer + +if ! which chrt ; then + echo "chrt is not found. This test requires chrt command." + exit_unresolved +fi + +if ! grep -wq "wakeup_rt" available_tracers ; then + echo "wakeup_rt tracer is not supported" + exit_unsupported +fi + +echo wakeup_rt > current_tracer +echo 1 > tracing_on +echo 0 > tracing_max_latency + +: "Wakeup a realtime task" +chrt -f 5 sleep 1 + +echo 0 > tracing_on +grep "+ \[[[:digit:]]*\]" trace +grep "==> \[[[:digit:]]*\]" trace + diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc index 2aabab363cfb..401104344593 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc @@ -2,14 +2,7 @@ # description: event trigger - test extended error support -do_reset() { - reset_trigger - echo > set_event - clear_trace -} - fail() { #msg - do_reset echo $1 exit_fail } @@ -24,9 +17,6 @@ if [ ! -f synthetic_events ]; then exit_unsupported fi -reset_tracer -do_reset - echo "Test extended error support" echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_wakeup/trigger ! echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_wakeup/trigger 2> /dev/null @@ -34,6 +24,4 @@ if ! grep -q "ERROR:" events/sched/sched_wakeup/hist; then fail "Failed to generate extended error in histogram" fi -do_reset - exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc index 7fd5b4a8f060..f59b2a9a1f22 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc @@ -1,14 +1,7 @@ #!/bin/sh # description: event trigger - test field variable support -do_reset() { - reset_trigger - echo > set_event - clear_trace -} - fail() { #msg - do_reset echo $1 exit_fail } @@ -23,10 +16,6 @@ if [ ! -f synthetic_events ]; then exit_unsupported fi -clear_synthetic_events -reset_tracer -do_reset - echo "Test field variable support" echo 'wakeup_latency u64 lat; pid_t pid; int prio; char comm[16]' > synthetic_events @@ -34,7 +23,7 @@ echo 'hist:keys=comm:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/ echo 'hist:keys=next_comm:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).wakeup_latency($wakeup_lat,next_pid,sched.sched_waking.prio,next_comm) if next_comm=="ping"' > events/sched/sched_switch/trigger echo 'hist:keys=pid,prio,comm:vals=lat:sort=pid,prio' > events/synthetic/wakeup_latency/trigger -ping localhost -c 3 +ping $LOCALHOST -c 3 if ! grep -q "ping" events/synthetic/wakeup_latency/hist; then fail "Failed to create inter-event histogram" fi @@ -49,6 +38,4 @@ if grep -q "synthetic_prio=prio" events/sched/sched_waking/hist; then fail "Failed to remove histogram with field variable" fi -do_reset - exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc index c93dbe38b5df..524d9ce361e2 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc @@ -1,14 +1,7 @@ #!/bin/sh # description: event trigger - test inter-event combined histogram trigger -do_reset() { - reset_trigger - echo > set_event - clear_trace -} - fail() { #msg - do_reset echo $1 exit_fail } @@ -23,10 +16,6 @@ if [ ! -f synthetic_events ]; then exit_unsupported fi -reset_tracer -do_reset -clear_synthetic_events - echo "Test create synthetic event" echo 'waking_latency u64 lat pid_t pid' > synthetic_events @@ -48,11 +37,9 @@ echo 'waking+wakeup_latency u64 lat; pid_t pid' >> synthetic_events echo 'hist:keys=pid,lat:sort=pid,lat:ww_lat=$waking_lat+$wakeup_lat:onmatch(synthetic.wakeup_latency).waking+wakeup_latency($ww_lat,pid)' >> events/synthetic/wakeup_latency/trigger echo 'hist:keys=pid,lat:sort=pid,lat' >> events/synthetic/waking+wakeup_latency/trigger -ping localhost -c 3 +ping $LOCALHOST -c 3 if ! grep -q "pid:" events/synthetic/waking+wakeup_latency/hist; then fail "Failed to create combined histogram" fi -do_reset - exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-multi-actions-accept.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-multi-actions-accept.tc index c193dce611a2..4ddc546771b5 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-multi-actions-accept.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-multi-actions-accept.tc @@ -1,15 +1,7 @@ #!/bin/sh # description: event trigger - test multiple actions on hist trigger - -do_reset() { - reset_trigger - echo > set_event - clear_trace -} - fail() { #msg - do_reset echo $1 exit_fail } @@ -24,10 +16,6 @@ if [ ! -f synthetic_events ]; then exit_unsupported fi -clear_synthetic_events -reset_tracer -do_reset - echo "Test multiple actions on hist trigger" echo 'wakeup_latency u64 lat; pid_t pid' >> synthetic_events TRIGGER1=events/sched/sched_wakeup/trigger @@ -39,6 +27,4 @@ echo 'hist:keys=next_pid:onmatch(sched.sched_wakeup).wakeup_latency(sched.sched_ echo 'hist:keys=next_pid:onmatch(sched.sched_wakeup).wakeup_latency(sched.sched_switch.$wakeup_lat,prev_pid) if next_comm=="cyclictest"' >> $TRIGGER2 echo 'hist:keys=next_pid if next_comm=="cyclictest"' >> $TRIGGER2 -do_reset - exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc index e84e7d048566..39fb65b0cd9f 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc @@ -1,14 +1,7 @@ #!/bin/sh # description: event trigger - test inter-event histogram trigger onmatch action -do_reset() { - reset_trigger - echo > set_event - clear_trace -} - fail() { #msg - do_reset echo $1 exit_fail } @@ -23,10 +16,6 @@ if [ ! -f synthetic_events ]; then exit_unsupported fi -clear_synthetic_events -reset_tracer -do_reset - echo "Test create synthetic event" echo 'wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events @@ -40,11 +29,10 @@ echo "Test histogram variables,simple expression support and onmatch action" echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_wakeup/trigger echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_wakeup).wakeup_latency($wakeup_lat,next_pid,next_comm) if next_comm=="ping"' > events/sched/sched_switch/trigger echo 'hist:keys=comm,pid,lat:wakeup_lat=lat:sort=lat' > events/synthetic/wakeup_latency/trigger -ping localhost -c 5 + +ping $LOCALHOST -c 5 if ! grep -q "ping" events/synthetic/wakeup_latency/hist; then fail "Failed to create onmatch action inter-event histogram" fi -do_reset - exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc index 7907d8aacde3..81ab3939c96a 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc @@ -1,14 +1,7 @@ #!/bin/sh # description: event trigger - test inter-event histogram trigger onmatch-onmax action -do_reset() { - reset_trigger - echo > set_event - clear_trace -} - fail() { #msg - do_reset echo $1 exit_fail } @@ -23,10 +16,6 @@ if [ ! -f synthetic_events ]; then exit_unsupported fi -clear_synthetic_events -reset_tracer -do_reset - echo "Test create synthetic event" echo 'wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events @@ -40,11 +29,10 @@ echo "Test histogram variables,simple expression support and onmatch-onmax actio echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_wakeup/trigger echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_wakeup).wakeup_latency($wakeup_lat,next_pid,next_comm):onmax($wakeup_lat).save(next_comm,prev_pid,prev_prio,prev_comm) if next_comm=="ping"' >> events/sched/sched_switch/trigger echo 'hist:keys=comm,pid,lat:wakeup_lat=lat:sort=lat' > events/synthetic/wakeup_latency/trigger -ping localhost -c 5 + +ping $LOCALHOST -c 5 if [ ! grep -q "ping" events/synthetic/wakeup_latency/hist -o ! grep -q "max:" events/sched/sched_switch/hist]; then fail "Failed to create onmatch-onmax action inter-event histogram" fi -do_reset - exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc index 38b7ed6242b2..1180ab5f0845 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc @@ -1,14 +1,7 @@ #!/bin/sh # description: event trigger - test inter-event histogram trigger onmax action -do_reset() { - reset_trigger - echo > set_event - clear_trace -} - fail() { #msg - do_reset echo $1 exit_fail } @@ -23,10 +16,6 @@ if [ ! -f synthetic_events ]; then exit_unsupported fi -clear_synthetic_events -reset_tracer -do_reset - echo "Test create synthetic event" echo 'wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events @@ -38,11 +27,10 @@ echo "Test onmax action" echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_waking/trigger echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmax($wakeup_lat).save(next_comm,prev_pid,prev_prio,prev_comm) if next_comm=="ping"' >> events/sched/sched_switch/trigger -ping localhost -c 3 + +ping $LOCALHOST -c 3 if ! grep -q "max:" events/sched/sched_switch/hist; then fail "Failed to create onmax action inter-event histogram" fi -do_reset - exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc index c604438df13b..41128219231a 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc @@ -1,13 +1,7 @@ #!/bin/sh # description: event trigger - test synthetic event create remove -do_reset() { - reset_trigger - echo > set_event - clear_trace -} fail() { #msg - do_reset echo $1 exit_fail } @@ -22,10 +16,6 @@ if [ ! -f synthetic_events ]; then exit_unsupported fi -clear_synthetic_events -reset_tracer -do_reset - echo "Test create synthetic event" echo 'wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events @@ -49,6 +39,4 @@ if [ -d events/synthetic/wakeup_latency ]; then fail "Created wakeup_latency synthetic event with an invalid format" fi -do_reset - exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-eventonoff.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-eventonoff.tc index 28cc355a3a7b..eddb51e1fbf7 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-eventonoff.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-eventonoff.tc @@ -3,14 +3,7 @@ # description: event trigger - test event enable/disable trigger # flags: instance -do_reset() { - reset_trigger - echo > set_event - clear_trace -} - fail() { #msg - do_reset echo $1 exit_fail } @@ -25,9 +18,6 @@ if [ ! -f events/sched/sched_process_fork/trigger ]; then exit_unsupported fi -reset_tracer -do_reset - FEATURE=`grep enable_event events/sched/sched_process_fork/trigger` if [ -z "$FEATURE" ]; then echo "event enable/disable trigger is not supported" @@ -61,6 +51,4 @@ echo 'enable_event:sched:sched_switch' > events/sched/sched_process_fork/trigger ! echo 'enable_event:sched:sched_switch' > events/sched/sched_process_fork/trigger ! echo 'disable_event:sched:sched_switch' > events/sched/sched_process_fork/trigger -do_reset - exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-filter.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-filter.tc index a48e23eb8a8b..2dcc2296ebdd 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-filter.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-filter.tc @@ -3,14 +3,7 @@ # description: event trigger - test trigger filter # flags: instance -do_reset() { - reset_trigger - echo > set_event - clear_trace -} - fail() { #msg - do_reset echo $1 exit_fail } @@ -25,9 +18,6 @@ if [ ! -f events/sched/sched_process_fork/trigger ]; then exit_unsupported fi -reset_tracer -do_reset - echo "Test trigger filter" echo 1 > tracing_on echo 'traceoff if child_pid == 0' > events/sched/sched_process_fork/trigger @@ -54,8 +44,4 @@ echo '!traceoff' > events/sched/sched_process_fork/trigger echo 'traceoff if parent_pid >= 0 || child_pid >= 0' > events/sched/sched_process_fork/trigger echo '!traceoff' > events/sched/sched_process_fork/trigger - - -do_reset - exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc index 8da80efc44d8..fab4431639d3 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc @@ -3,14 +3,7 @@ # description: event trigger - test histogram modifiers # flags: instance -do_reset() { - reset_trigger - echo > set_event - clear_trace -} - fail() { #msg - do_reset echo $1 exit_fail } @@ -30,9 +23,6 @@ if [ ! -f events/sched/sched_process_fork/hist ]; then exit_unsupported fi -reset_tracer -do_reset - echo "Test histogram with execname modifier" echo 'hist:keys=common_pid.execname' > events/sched/sched_process_fork/trigger @@ -71,6 +61,4 @@ for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done grep 'bytes_req: ~ 2^[0-9]*' events/kmem/kmalloc/hist > /dev/null || \ fail "log2 modifier on kmem/kmalloc did not work" -do_reset - exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc index 449fe9ff91a2..177e8d4c4744 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc @@ -3,14 +3,7 @@ # description: event trigger - test histogram trigger # flags: instance -do_reset() { - reset_trigger - echo > set_event - clear_trace -} - fail() { #msg - do_reset echo $1 exit_fail } @@ -30,9 +23,6 @@ if [ ! -f events/sched/sched_process_fork/hist ]; then exit_unsupported fi -reset_tracer -do_reset - echo "Test histogram basic tigger" echo 'hist:keys=parent_pid:vals=child_pid' > events/sched/sched_process_fork/trigger @@ -79,6 +69,4 @@ check_inc `grep -o "child_pid:[[:space:]]*[[:digit:]]*" \ events/sched/sched_process_fork/hist | cut -d: -f2 ` || fail "sort param on sched_process_fork did not work" -do_reset - exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc index c5ef8b9d02b3..18fdaab9f570 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc @@ -3,14 +3,7 @@ # description: event trigger - test multiple histogram triggers # flags: instance -do_reset() { - reset_trigger - echo > set_event - clear_trace -} - fail() { #msg - do_reset echo $1 exit_fail } @@ -30,11 +23,6 @@ if [ ! -f events/sched/sched_process_fork/hist ]; then exit_unsupported fi -reset_tracer -do_reset - -reset_trigger - echo "Test histogram multiple tiggers" echo 'hist:keys=parent_pid:vals=child_pid' > events/sched/sched_process_fork/trigger @@ -67,8 +55,4 @@ grep test_hist events/sched/sched_process_exit/hist > /dev/null || \ diffs=`diff events/sched/sched_process_exit/hist events/sched/sched_process_fork/hist | wc -l` test $diffs -eq 0 || fail "Same name histograms are not same" -reset_trigger - -do_reset - exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc index ed38f0050d77..7717c0a09686 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc @@ -2,14 +2,7 @@ # SPDX-License-Identifier: GPL-2.0 # description: event trigger - test snapshot-trigger -do_reset() { - reset_trigger - echo > set_event - clear_trace -} - fail() { #msg - do_reset echo $1 exit_fail } @@ -29,9 +22,6 @@ if [ ! -f snapshot ]; then exit_unsupported fi -reset_tracer -do_reset - FEATURE=`grep snapshot events/sched/sched_process_fork/trigger` if [ -z "$FEATURE" ]; then echo "snapshot trigger is not supported" @@ -57,6 +47,4 @@ echo "Test snapshot semantic errors" echo "snapshot" > events/sched/sched_process_fork/trigger ! echo "snapshot" > events/sched/sched_process_fork/trigger -do_reset - exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-stacktrace.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-stacktrace.tc index 3121d795a868..398c05c4d2a7 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-stacktrace.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-stacktrace.tc @@ -2,14 +2,7 @@ # SPDX-License-Identifier: GPL-2.0 # description: event trigger - test stacktrace-trigger -do_reset() { - reset_trigger - echo > set_event - clear_trace -} - fail() { #msg - do_reset echo $1 exit_fail } @@ -24,9 +17,6 @@ if [ ! -f events/sched/sched_process_fork/trigger ]; then exit_unsupported fi -reset_tracer -do_reset - FEATURE=`grep stacktrace events/sched/sched_process_fork/trigger` if [ -z "$FEATURE" ]; then echo "stacktrace trigger is not supported" @@ -49,6 +39,4 @@ echo "Test stacktrace semantic errors" echo "stacktrace" > events/sched/sched_process_fork/trigger ! echo "stacktrace" > events/sched/sched_process_fork/trigger -do_reset - exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-hist.tc index 2acbfe2c0c0c..ab6bedb25736 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-hist.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-hist.tc @@ -3,14 +3,7 @@ # description: trace_marker trigger - test histogram trigger # flags: instance -do_reset() { - reset_trigger - echo > set_event - clear_trace -} - fail() { #msg - do_reset echo $1 exit_fail } @@ -35,8 +28,6 @@ if [ ! -f events/ftrace/print/hist ]; then exit_unsupported fi -do_reset - echo "Test histogram trace_marker tigger" echo 'hist:keys=common_pid' > events/ftrace/print/trigger @@ -44,6 +35,4 @@ for i in `seq 1 10` ; do echo "hello" > trace_marker; done grep 'hitcount: *10$' events/ftrace/print/hist > /dev/null || \ fail "hist trigger did not trigger correct times on trace_marker" -do_reset - exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-snapshot.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-snapshot.tc index 6748e8cb42d0..df246e505af7 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-snapshot.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-snapshot.tc @@ -3,15 +3,7 @@ # description: trace_marker trigger - test snapshot trigger # flags: instance -do_reset() { - reset_trigger - echo > set_event - echo 0 > snapshot - clear_trace -} - fail() { #msg - do_reset echo $1 exit_fail } @@ -47,15 +39,13 @@ test_trace() { fi echo "testing $line for >$x<" match=`echo $line | sed -e "s/>$x<//"` - if [ "$line" == "$match" ]; then + if [ "$line" = "$match" ]; then fail "$line does not have >$x< in it" fi - let x=$x+2 + x=$((x+2)) done } -do_reset - echo "Test snapshot trace_marker tigger" echo 'snapshot' > events/ftrace/print/trigger @@ -69,6 +59,4 @@ for i in `seq 1 10` ; do echo "hello >$i<" > trace_marker; done test_trace trace 1 test_trace snapshot 2 -do_reset - exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic-kernel.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic-kernel.tc index 0a69c5d1cda8..18b4d1c2807e 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic-kernel.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic-kernel.tc @@ -3,15 +3,7 @@ # description: trace_marker trigger - test histogram with synthetic event against kernel event # flags: -do_reset() { - reset_trigger - echo > set_event - echo > synthetic_events - clear_trace -} - fail() { #msg - do_reset echo $1 exit_fail } @@ -46,8 +38,6 @@ if [ ! -f events/ftrace/print/hist ]; then exit_unsupported fi -do_reset - echo "Test histogram kernel event to trace_marker latency histogram trigger" echo 'latency u64 lat' > synthetic_events @@ -63,6 +53,4 @@ grep 'hitcount: *1$' events/ftrace/print/hist > /dev/null || \ grep 'hitcount: *1$' events/synthetic/latency/hist > /dev/null || \ fail "hist trigger did not trigger " -do_reset - exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic.tc index 3666dd6ab02a..dd262d6d0db6 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic.tc @@ -3,15 +3,7 @@ # description: trace_marker trigger - test histogram with synthetic event # flags: -do_reset() { - reset_trigger - echo > set_event - echo > synthetic_events - clear_trace -} - fail() { #msg - do_reset echo $1 exit_fail } @@ -41,8 +33,6 @@ if [ ! -f events/ftrace/print/hist ]; then exit_unsupported fi -do_reset - echo "Test histogram trace_marker to trace_marker latency histogram trigger" echo 'latency u64 lat' > synthetic_events @@ -61,6 +51,4 @@ fi grep 'hitcount: *1$' events/synthetic/latency/hist > /dev/null || \ fail "hist trigger did not trigger " -do_reset - exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-traceonoff.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-traceonoff.tc index c59d9eb546da..d5d2dcbc9cab 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-traceonoff.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-traceonoff.tc @@ -2,14 +2,7 @@ # SPDX-License-Identifier: GPL-2.0 # description: event trigger - test traceon/off trigger -do_reset() { - reset_trigger - echo > set_event - clear_trace -} - fail() { #msg - do_reset echo $1 exit_fail } @@ -24,9 +17,6 @@ if [ ! -f events/sched/sched_process_fork/trigger ]; then exit_unsupported fi -reset_tracer -do_reset - echo "Test traceoff trigger" echo 1 > tracing_on echo 'traceoff' > events/sched/sched_process_fork/trigger @@ -54,6 +44,4 @@ echo 'traceon' > events/sched/sched_process_fork/trigger ! echo 'traceon' > events/sched/sched_process_fork/trigger ! echo 'traceoff' > events/sched/sched_process_fork/trigger -do_reset - exit 0 diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile index ad1eeb14fda7..30996306cabc 100644 --- a/tools/testing/selftests/futex/functional/Makefile +++ b/tools/testing/selftests/futex/functional/Makefile @@ -19,6 +19,7 @@ TEST_GEN_FILES := \ TEST_PROGS := run.sh top_srcdir = ../../../../.. +KSFT_KHDR_INSTALL := 1 include ../../lib.mk $(TEST_GEN_FILES): $(HEADERS) diff --git a/tools/testing/selftests/gpio/Makefile b/tools/testing/selftests/gpio/Makefile index 4665cdbf1a8d..0bb80619db58 100644 --- a/tools/testing/selftests/gpio/Makefile +++ b/tools/testing/selftests/gpio/Makefile @@ -1,28 +1,32 @@ # SPDX-License-Identifier: GPL-2.0 +MOUNT_CFLAGS := $(shell pkg-config --cflags mount 2>/dev/null) +MOUNT_LDLIBS := $(shell pkg-config --libs mount 2>/dev/null) +ifeq ($(MOUNT_LDLIBS),) +MOUNT_LDLIBS := -lmount -I/usr/include/libmount +endif + +CFLAGS += -O2 -g -std=gnu99 -Wall -I../../../../usr/include/ $(MOUNT_CFLAGS) +LDLIBS += $(MOUNT_LDLIBS) + TEST_PROGS := gpio-mockup.sh -TEST_FILES := gpio-mockup-sysfs.sh $(BINARIES) -BINARIES := gpio-mockup-chardev -EXTRA_PROGS := ../gpiogpio-event-mon ../gpiogpio-hammer ../gpiolsgpio -EXTRA_DIRS := ../gpioinclude/ -EXTRA_OBJS := ../gpiogpio-event-mon-in.o ../gpiogpio-event-mon.o -EXTRA_OBJS += ../gpiogpio-hammer-in.o ../gpiogpio-utils.o ../gpiolsgpio-in.o -EXTRA_OBJS += ../gpiolsgpio.o +TEST_FILES := gpio-mockup-sysfs.sh +TEST_PROGS_EXTENDED := gpio-mockup-chardev -include ../lib.mk +GPIODIR := $(realpath ../../../gpio) +GPIOOBJ := gpio-utils.o -all: $(BINARIES) +all: $(TEST_PROGS_EXTENDED) override define CLEAN - $(RM) $(BINARIES) $(EXTRA_PROGS) $(EXTRA_OBJS) - $(RM) -r $(EXTRA_DIRS) + $(RM) $(TEST_PROGS_EXTENDED) + $(MAKE) -C $(GPIODIR) OUTPUT=$(GPIODIR)/ clean endef -CFLAGS += -O2 -g -std=gnu99 -Wall -I../../../../usr/include/ -LDLIBS += -lmount -I/usr/include/libmount +KSFT_KHDR_INSTALL := 1 +include ../lib.mk -$(BINARIES):| khdr -$(BINARIES): ../../../gpio/gpio-utils.o +$(TEST_PROGS_EXTENDED): $(GPIODIR)/$(GPIOOBJ) -../../../gpio/gpio-utils.o: - make ARCH=$(ARCH) CROSS_COMPILE=$(CROSS_COMPILE) -C ../../../gpio +$(GPIODIR)/$(GPIOOBJ): + $(MAKE) OUTPUT=$(GPIODIR)/ -C $(GPIODIR) diff --git a/tools/testing/selftests/gpio/gpio-mockup-chardev.c b/tools/testing/selftests/gpio/gpio-mockup-chardev.c index f8d468f54e98..aaa1e9f083c3 100644 --- a/tools/testing/selftests/gpio/gpio-mockup-chardev.c +++ b/tools/testing/selftests/gpio/gpio-mockup-chardev.c @@ -37,7 +37,7 @@ static int get_debugfs(char **path) struct libmnt_table *tb; struct libmnt_iter *itr = NULL; struct libmnt_fs *fs; - int found = 0; + int found = 0, ret; cxt = mnt_new_context(); if (!cxt) @@ -58,8 +58,11 @@ static int get_debugfs(char **path) break; } } - if (found) - asprintf(path, "%s/gpio", mnt_fs_get_target(fs)); + if (found) { + ret = asprintf(path, "%s/gpio", mnt_fs_get_target(fs)); + if (ret < 0) + err(EXIT_FAILURE, "failed to format string"); + } mnt_free_iter(itr); mnt_free_context(cxt); diff --git a/tools/testing/selftests/ima/Makefile b/tools/testing/selftests/ima/Makefile new file mode 100644 index 000000000000..0b3adf5444b6 --- /dev/null +++ b/tools/testing/selftests/ima/Makefile @@ -0,0 +1,11 @@ +# Makefile for kexec_load + +uname_M := $(shell uname -m 2>/dev/null || echo not) +ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) + +ifeq ($(ARCH),x86) +TEST_PROGS := test_kexec_load.sh + +include ../lib.mk + +endif diff --git a/tools/testing/selftests/ima/config b/tools/testing/selftests/ima/config new file mode 100644 index 000000000000..6bc86d4d9bb4 --- /dev/null +++ b/tools/testing/selftests/ima/config @@ -0,0 +1,4 @@ +CONFIG_IMA_APPRAISE +CONFIG_IMA_ARCH_POLICY +CONFIG_SECURITYFS +CONFIG_KEXEC_VERIFY_SIG diff --git a/tools/testing/selftests/ima/test_kexec_load.sh b/tools/testing/selftests/ima/test_kexec_load.sh new file mode 100755 index 000000000000..1c10093fb526 --- /dev/null +++ b/tools/testing/selftests/ima/test_kexec_load.sh @@ -0,0 +1,54 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0+ +# Loading a kernel image via the kexec_load syscall should fail +# when the kerne is CONFIG_KEXEC_VERIFY_SIG enabled and the system +# is booted in secureboot mode. + +TEST="$0" +EFIVARFS="/sys/firmware/efi/efivars" +rc=0 + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +# kexec requires root privileges +if [ $UID != 0 ]; then + echo "$TEST: must be run as root" >&2 + exit $ksft_skip +fi + +# Make sure that efivars is mounted in the normal location +if ! grep -q "^\S\+ $EFIVARFS efivarfs" /proc/mounts; then + echo "$TEST: efivars is not mounted on $EFIVARFS" >&2 + exit $ksft_skip +fi + +# Get secureboot mode +file="$EFIVARFS/SecureBoot-*" +if [ ! -e $file ]; then + echo "$TEST: unknown secureboot mode" >&2 + exit $ksft_skip +fi +secureboot=`hexdump $file | awk '{print substr($4,length($4),1)}'` + +# kexec_load should fail in secure boot mode +KERNEL_IMAGE="/boot/vmlinuz-`uname -r`" +kexec -l $KERNEL_IMAGE &>> /dev/null +if [ $? == 0 ]; then + kexec -u + if [ "$secureboot" == "1" ]; then + echo "$TEST: kexec_load succeeded [FAIL]" + rc=1 + else + echo "$TEST: kexec_load succeeded [PASS]" + fi +else + if [ "$secureboot" == "1" ]; then + echo "$TEST: kexec_load failed [PASS]" + else + echo "$TEST: kexec_load failed [FAIL]" + rc=1 + fi +fi + +exit $rc diff --git a/tools/testing/selftests/ir/.gitignore b/tools/testing/selftests/ir/.gitignore new file mode 100644 index 000000000000..070ea0c75fb8 --- /dev/null +++ b/tools/testing/selftests/ir/.gitignore @@ -0,0 +1 @@ +ir_loopback diff --git a/tools/testing/selftests/ir/Makefile b/tools/testing/selftests/ir/Makefile new file mode 100644 index 000000000000..ad06489c22a5 --- /dev/null +++ b/tools/testing/selftests/ir/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0 +TEST_PROGS := ir_loopback.sh +TEST_GEN_PROGS_EXTENDED := ir_loopback +APIDIR := ../../../include/uapi +CFLAGS += -Wall -O2 -I$(APIDIR) + +include ../lib.mk diff --git a/tools/testing/selftests/ir/ir_loopback.c b/tools/testing/selftests/ir/ir_loopback.c new file mode 100644 index 000000000000..858c19caf224 --- /dev/null +++ b/tools/testing/selftests/ir/ir_loopback.c @@ -0,0 +1,199 @@ +// SPDX-License-Identifier: GPL-2.0 +// test ir decoder +// +// Copyright (C) 2018 Sean Young <sean@mess.org> + +// When sending LIRC_MODE_SCANCODE, the IR will be encoded. rc-loopback +// will send this IR to the receiver side, where we try to read the decoded +// IR. Decoding happens in a separate kernel thread, so we will need to +// wait until that is scheduled, hence we use poll to check for read +// readiness. + +#include <linux/lirc.h> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdbool.h> +#include <string.h> +#include <unistd.h> +#include <poll.h> +#include <time.h> +#include <sys/types.h> +#include <sys/ioctl.h> +#include <dirent.h> +#include <sys/stat.h> +#include <fcntl.h> +#include "../kselftest.h" + +#define TEST_SCANCODES 10 +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) + +static const struct { + enum rc_proto proto; + const char *name; + unsigned int mask; + const char *decoder; +} protocols[] = { + { RC_PROTO_RC5, "rc-5", 0x1f7f, "rc-5" }, + { RC_PROTO_RC5X_20, "rc-5x-20", 0x1f7f3f, "rc-5" }, + { RC_PROTO_RC5_SZ, "rc-5-sz", 0x2fff, "rc-5-sz" }, + { RC_PROTO_JVC, "jvc", 0xffff, "jvc" }, + { RC_PROTO_SONY12, "sony-12", 0x1f007f, "sony" }, + { RC_PROTO_SONY15, "sony-15", 0xff007f, "sony" }, + { RC_PROTO_SONY20, "sony-20", 0x1fff7f, "sony" }, + { RC_PROTO_NEC, "nec", 0xffff, "nec" }, + { RC_PROTO_NECX, "nec-x", 0xffffff, "nec" }, + { RC_PROTO_NEC32, "nec-32", 0xffffffff, "nec" }, + { RC_PROTO_SANYO, "sanyo", 0x1fffff, "sanyo" }, + { RC_PROTO_RC6_0, "rc-6-0", 0xffff, "rc-6" }, + { RC_PROTO_RC6_6A_20, "rc-6-6a-20", 0xfffff, "rc-6" }, + { RC_PROTO_RC6_6A_24, "rc-6-6a-24", 0xffffff, "rc-6" }, + { RC_PROTO_RC6_6A_32, "rc-6-6a-32", 0xffffffff, "rc-6" }, + { RC_PROTO_RC6_MCE, "rc-6-mce", 0x00007fff, "rc-6" }, + { RC_PROTO_SHARP, "sharp", 0x1fff, "sharp" }, +}; + +int lirc_open(const char *rc) +{ + struct dirent *dent; + char buf[100]; + DIR *d; + int fd; + + snprintf(buf, sizeof(buf), "/sys/class/rc/%s", rc); + + d = opendir(buf); + if (!d) + ksft_exit_fail_msg("cannot open %s: %m\n", buf); + + while ((dent = readdir(d)) != NULL) { + if (!strncmp(dent->d_name, "lirc", 4)) { + snprintf(buf, sizeof(buf), "/dev/%s", dent->d_name); + break; + } + } + + if (!dent) + ksft_exit_fail_msg("cannot find lirc device for %s\n", rc); + + closedir(d); + + fd = open(buf, O_RDWR | O_NONBLOCK); + if (fd == -1) + ksft_exit_fail_msg("cannot open: %s: %m\n", buf); + + return fd; +} + +int main(int argc, char **argv) +{ + unsigned int mode; + char buf[100]; + int rlircfd, wlircfd, protocolfd, i, n; + + srand(time(NULL)); + + if (argc != 3) + ksft_exit_fail_msg("Usage: %s <write rcN> <read rcN>\n", + argv[0]); + + rlircfd = lirc_open(argv[2]); + mode = LIRC_MODE_SCANCODE; + if (ioctl(rlircfd, LIRC_SET_REC_MODE, &mode)) + ksft_exit_fail_msg("failed to set scancode rec mode %s: %m\n", + argv[2]); + + wlircfd = lirc_open(argv[1]); + if (ioctl(wlircfd, LIRC_SET_SEND_MODE, &mode)) + ksft_exit_fail_msg("failed to set scancode send mode %s: %m\n", + argv[1]); + + snprintf(buf, sizeof(buf), "/sys/class/rc/%s/protocols", argv[2]); + protocolfd = open(buf, O_WRONLY); + if (protocolfd == -1) + ksft_exit_fail_msg("failed to open %s: %m\n", buf); + + printf("Sending IR on %s and receiving IR on %s.\n", argv[1], argv[2]); + + for (i = 0; i < ARRAY_SIZE(protocols); i++) { + if (write(protocolfd, protocols[i].decoder, + strlen(protocols[i].decoder)) == -1) + ksft_exit_fail_msg("failed to set write decoder\n"); + + printf("Testing protocol %s for decoder %s (%d/%d)...\n", + protocols[i].name, protocols[i].decoder, + i + 1, (int)ARRAY_SIZE(protocols)); + + for (n = 0; n < TEST_SCANCODES; n++) { + unsigned int scancode = rand() & protocols[i].mask; + unsigned int rc_proto = protocols[i].proto; + + if (rc_proto == RC_PROTO_RC6_MCE) + scancode |= 0x800f0000; + + if (rc_proto == RC_PROTO_NECX && + (((scancode >> 16) ^ ~(scancode >> 8)) & 0xff) == 0) + continue; + + if (rc_proto == RC_PROTO_NEC32 && + (((scancode >> 8) ^ ~scancode) & 0xff) == 0) + continue; + + struct lirc_scancode lsc = { + .rc_proto = rc_proto, + .scancode = scancode + }; + + printf("Testing scancode:%x\n", scancode); + + while (write(wlircfd, &lsc, sizeof(lsc)) < 0) { + if (errno == EINTR) + continue; + + ksft_exit_fail_msg("failed to send ir: %m\n"); + } + + struct pollfd pfd = { .fd = rlircfd, .events = POLLIN }; + struct lirc_scancode lsc2; + + poll(&pfd, 1, 1000); + + bool decoded = true; + + while (read(rlircfd, &lsc2, sizeof(lsc2)) < 0) { + if (errno == EINTR) + continue; + + ksft_test_result_error("no scancode decoded: %m\n"); + decoded = false; + break; + } + + if (!decoded) + continue; + + if (lsc.rc_proto != lsc2.rc_proto) + ksft_test_result_error("decoded protocol is different: %d\n", + lsc2.rc_proto); + + else if (lsc.scancode != lsc2.scancode) + ksft_test_result_error("decoded scancode is different: %llx\n", + lsc2.scancode); + else + ksft_inc_pass_cnt(); + } + + printf("OK\n"); + } + + close(rlircfd); + close(wlircfd); + close(protocolfd); + + if (ksft_get_fail_cnt() > 0) + ksft_exit_fail(); + else + ksft_exit_pass(); + + return 0; +} diff --git a/tools/testing/selftests/ir/ir_loopback.sh b/tools/testing/selftests/ir/ir_loopback.sh new file mode 100755 index 000000000000..0a0b8dfa39be --- /dev/null +++ b/tools/testing/selftests/ir/ir_loopback.sh @@ -0,0 +1,20 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +if ! /sbin/modprobe -q -n rc-loopback; then + echo "ir_loopback: module rc-loopback is not found [SKIP]" + exit $ksft_skip +fi + +/sbin/modprobe rc-loopback +if [ $? -ne 0 ]; then + exit +fi + +RCDEV=$(grep -l DRV_NAME=rc-loopback /sys/class/rc/rc*/uevent | grep -o 'rc[0-9]\+') + +./ir_loopback $RCDEV $RCDEV +exit diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index 6ae3730c4ee3..76d654ef3234 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -354,7 +354,7 @@ * ASSERT_EQ(expected, measured): expected == measured */ #define ASSERT_EQ(expected, seen) \ - __EXPECT(expected, seen, ==, 1) + __EXPECT(expected, #expected, seen, #seen, ==, 1) /** * ASSERT_NE(expected, seen) @@ -365,7 +365,7 @@ * ASSERT_NE(expected, measured): expected != measured */ #define ASSERT_NE(expected, seen) \ - __EXPECT(expected, seen, !=, 1) + __EXPECT(expected, #expected, seen, #seen, !=, 1) /** * ASSERT_LT(expected, seen) @@ -376,7 +376,7 @@ * ASSERT_LT(expected, measured): expected < measured */ #define ASSERT_LT(expected, seen) \ - __EXPECT(expected, seen, <, 1) + __EXPECT(expected, #expected, seen, #seen, <, 1) /** * ASSERT_LE(expected, seen) @@ -387,7 +387,7 @@ * ASSERT_LE(expected, measured): expected <= measured */ #define ASSERT_LE(expected, seen) \ - __EXPECT(expected, seen, <=, 1) + __EXPECT(expected, #expected, seen, #seen, <=, 1) /** * ASSERT_GT(expected, seen) @@ -398,7 +398,7 @@ * ASSERT_GT(expected, measured): expected > measured */ #define ASSERT_GT(expected, seen) \ - __EXPECT(expected, seen, >, 1) + __EXPECT(expected, #expected, seen, #seen, >, 1) /** * ASSERT_GE(expected, seen) @@ -409,7 +409,7 @@ * ASSERT_GE(expected, measured): expected >= measured */ #define ASSERT_GE(expected, seen) \ - __EXPECT(expected, seen, >=, 1) + __EXPECT(expected, #expected, seen, #seen, >=, 1) /** * ASSERT_NULL(seen) @@ -419,7 +419,7 @@ * ASSERT_NULL(measured): NULL == measured */ #define ASSERT_NULL(seen) \ - __EXPECT(NULL, seen, ==, 1) + __EXPECT(NULL, "NULL", seen, #seen, ==, 1) /** * ASSERT_TRUE(seen) @@ -429,7 +429,7 @@ * ASSERT_TRUE(measured): measured != 0 */ #define ASSERT_TRUE(seen) \ - ASSERT_NE(0, seen) + __EXPECT(0, "0", seen, #seen, !=, 1) /** * ASSERT_FALSE(seen) @@ -439,7 +439,7 @@ * ASSERT_FALSE(measured): measured == 0 */ #define ASSERT_FALSE(seen) \ - ASSERT_EQ(0, seen) + __EXPECT(0, "0", seen, #seen, ==, 1) /** * ASSERT_STREQ(expected, seen) @@ -472,7 +472,7 @@ * EXPECT_EQ(expected, measured): expected == measured */ #define EXPECT_EQ(expected, seen) \ - __EXPECT(expected, seen, ==, 0) + __EXPECT(expected, #expected, seen, #seen, ==, 0) /** * EXPECT_NE(expected, seen) @@ -483,7 +483,7 @@ * EXPECT_NE(expected, measured): expected != measured */ #define EXPECT_NE(expected, seen) \ - __EXPECT(expected, seen, !=, 0) + __EXPECT(expected, #expected, seen, #seen, !=, 0) /** * EXPECT_LT(expected, seen) @@ -494,7 +494,7 @@ * EXPECT_LT(expected, measured): expected < measured */ #define EXPECT_LT(expected, seen) \ - __EXPECT(expected, seen, <, 0) + __EXPECT(expected, #expected, seen, #seen, <, 0) /** * EXPECT_LE(expected, seen) @@ -505,7 +505,7 @@ * EXPECT_LE(expected, measured): expected <= measured */ #define EXPECT_LE(expected, seen) \ - __EXPECT(expected, seen, <=, 0) + __EXPECT(expected, #expected, seen, #seen, <=, 0) /** * EXPECT_GT(expected, seen) @@ -516,7 +516,7 @@ * EXPECT_GT(expected, measured): expected > measured */ #define EXPECT_GT(expected, seen) \ - __EXPECT(expected, seen, >, 0) + __EXPECT(expected, #expected, seen, #seen, >, 0) /** * EXPECT_GE(expected, seen) @@ -527,7 +527,7 @@ * EXPECT_GE(expected, measured): expected >= measured */ #define EXPECT_GE(expected, seen) \ - __EXPECT(expected, seen, >=, 0) + __EXPECT(expected, #expected, seen, #seen, >=, 0) /** * EXPECT_NULL(seen) @@ -537,7 +537,7 @@ * EXPECT_NULL(measured): NULL == measured */ #define EXPECT_NULL(seen) \ - __EXPECT(NULL, seen, ==, 0) + __EXPECT(NULL, "NULL", seen, #seen, ==, 0) /** * EXPECT_TRUE(seen) @@ -547,7 +547,7 @@ * EXPECT_TRUE(measured): 0 != measured */ #define EXPECT_TRUE(seen) \ - EXPECT_NE(0, seen) + __EXPECT(0, "0", seen, #seen, !=, 0) /** * EXPECT_FALSE(seen) @@ -557,7 +557,7 @@ * EXPECT_FALSE(measured): 0 == measured */ #define EXPECT_FALSE(seen) \ - EXPECT_EQ(0, seen) + __EXPECT(0, "0", seen, #seen, ==, 0) /** * EXPECT_STREQ(expected, seen) @@ -597,7 +597,7 @@ if (_metadata->passed && _metadata->step < 255) \ _metadata->step++; -#define __EXPECT(_expected, _seen, _t, _assert) do { \ +#define __EXPECT(_expected, _expected_str, _seen, _seen_str, _t, _assert) do { \ /* Avoid multiple evaluation of the cases */ \ __typeof__(_expected) __exp = (_expected); \ __typeof__(_seen) __seen = (_seen); \ @@ -606,8 +606,8 @@ unsigned long long __exp_print = (uintptr_t)__exp; \ unsigned long long __seen_print = (uintptr_t)__seen; \ __TH_LOG("Expected %s (%llu) %s %s (%llu)", \ - #_expected, __exp_print, #_t, \ - #_seen, __seen_print); \ + _expected_str, __exp_print, #_t, \ + _seen_str, __seen_print); \ _metadata->passed = 0; \ /* Ensure the optional handler is triggered */ \ _metadata->trigger = 1; \ diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 5c34752e1cff..6210ba41c29e 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -1,6 +1,8 @@ -cr4_cpuid_sync_test -platform_info_test -set_sregs_test -sync_regs_test -vmx_tsc_adjust_test -state_test +/x86_64/cr4_cpuid_sync_test +/x86_64/evmcs_test +/x86_64/platform_info_test +/x86_64/set_sregs_test +/x86_64/sync_regs_test +/x86_64/vmx_tsc_adjust_test +/x86_64/state_test +/dirty_log_test diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index ec32dad3c3f0..f9a0e9938480 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -1,26 +1,34 @@ all: -top_srcdir = ../../../../ +top_srcdir = ../../../.. +KSFT_KHDR_INSTALL := 1 UNAME_M := $(shell uname -m) -LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c -LIBKVM_x86_64 = lib/x86.c lib/vmx.c - -TEST_GEN_PROGS_x86_64 = platform_info_test -TEST_GEN_PROGS_x86_64 += set_sregs_test -TEST_GEN_PROGS_x86_64 += sync_regs_test -TEST_GEN_PROGS_x86_64 += vmx_tsc_adjust_test -TEST_GEN_PROGS_x86_64 += cr4_cpuid_sync_test -TEST_GEN_PROGS_x86_64 += state_test +LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/ucall.c lib/sparsebit.c +LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c +LIBKVM_aarch64 = lib/aarch64/processor.c + +TEST_GEN_PROGS_x86_64 = x86_64/platform_info_test +TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test +TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test +TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test +TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test +TEST_GEN_PROGS_x86_64 += x86_64/state_test +TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test +TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid TEST_GEN_PROGS_x86_64 += dirty_log_test +TEST_GEN_PROGS_x86_64 += clear_dirty_log_test + +TEST_GEN_PROGS_aarch64 += dirty_log_test +TEST_GEN_PROGS_aarch64 += clear_dirty_log_test TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M)) LIBKVM += $(LIBKVM_$(UNAME_M)) INSTALL_HDR_PATH = $(top_srcdir)/usr LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/ -LINUX_TOOL_INCLUDE = $(top_srcdir)tools/include -CFLAGS += -O2 -g -std=gnu99 -I$(LINUX_TOOL_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -I.. +LINUX_TOOL_INCLUDE = $(top_srcdir)/tools/include +CFLAGS += -O2 -g -std=gnu99 -I$(LINUX_TOOL_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -Iinclude/$(UNAME_M) -I.. LDFLAGS += -pthread # After inclusion, $(OUTPUT) is defined and @@ -29,7 +37,7 @@ include ../lib.mk STATIC_LIBS := $(OUTPUT)/libkvm.a LIBKVM_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM)) -EXTRA_CLEAN += $(LIBKVM_OBJ) $(STATIC_LIBS) +EXTRA_CLEAN += $(LIBKVM_OBJ) $(STATIC_LIBS) cscope.* x := $(shell mkdir -p $(sort $(dir $(LIBKVM_OBJ)))) $(LIBKVM_OBJ): $(OUTPUT)/%.o: %.c @@ -40,4 +48,12 @@ $(OUTPUT)/libkvm.a: $(LIBKVM_OBJ) all: $(STATIC_LIBS) $(TEST_GEN_PROGS): $(STATIC_LIBS) -$(STATIC_LIBS):| khdr + +cscope: include_paths = $(LINUX_TOOL_INCLUDE) $(LINUX_HDR_PATH) include lib .. +cscope: + $(RM) cscope.* + (find $(include_paths) -name '*.h' \ + -exec realpath --relative-base=$(PWD) {} \;; \ + find . -name '*.c' \ + -exec realpath --relative-base=$(PWD) {} \;) | sort -u > cscope.files + cscope -b diff --git a/tools/testing/selftests/kvm/clear_dirty_log_test.c b/tools/testing/selftests/kvm/clear_dirty_log_test.c new file mode 100644 index 000000000000..749336937d37 --- /dev/null +++ b/tools/testing/selftests/kvm/clear_dirty_log_test.c @@ -0,0 +1,2 @@ +#define USE_CLEAR_DIRTY_LOG +#include "dirty_log_test.c" diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 0c2cdc105f96..4715cfba20dc 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -5,6 +5,8 @@ * Copyright (C) 2018, Red Hat, Inc. */ +#define _GNU_SOURCE /* for program_invocation_name */ + #include <stdio.h> #include <stdlib.h> #include <unistd.h> @@ -15,76 +17,85 @@ #include "test_util.h" #include "kvm_util.h" +#include "processor.h" + +#define DEBUG printf -#define DEBUG printf +#define VCPU_ID 1 -#define VCPU_ID 1 /* The memory slot index to track dirty pages */ -#define TEST_MEM_SLOT_INDEX 1 -/* - * GPA offset of the testing memory slot. Must be bigger than the - * default vm mem slot, which is DEFAULT_GUEST_PHY_PAGES. - */ -#define TEST_MEM_OFFSET (1ULL << 30) /* 1G */ -/* Size of the testing memory slot */ -#define TEST_MEM_PAGES (1ULL << 18) /* 1G for 4K pages */ +#define TEST_MEM_SLOT_INDEX 1 + +/* Default guest test memory offset, 1G */ +#define DEFAULT_GUEST_TEST_MEM 0x40000000 + /* How many pages to dirty for each guest loop */ -#define TEST_PAGES_PER_LOOP 1024 +#define TEST_PAGES_PER_LOOP 1024 + /* How many host loops to run (one KVM_GET_DIRTY_LOG for each loop) */ -#define TEST_HOST_LOOP_N 32 +#define TEST_HOST_LOOP_N 32UL + /* Interval for each host loop (ms) */ -#define TEST_HOST_LOOP_INTERVAL 10 +#define TEST_HOST_LOOP_INTERVAL 10UL + +/* + * Guest/Host shared variables. Ensure addr_gva2hva() and/or + * sync_global_to/from_guest() are used when accessing from + * the host. READ/WRITE_ONCE() should also be used with anything + * that may change. + */ +static uint64_t host_page_size; +static uint64_t guest_page_size; +static uint64_t guest_num_pages; +static uint64_t random_array[TEST_PAGES_PER_LOOP]; +static uint64_t iteration; /* - * Guest variables. We use these variables to share data between host - * and guest. There are two copies of the variables, one in host memory - * (which is unused) and one in guest memory. When the host wants to - * access these variables, it needs to call addr_gva2hva() to access the - * guest copy. + * Guest physical memory offset of the testing memory slot. + * This will be set to the topmost valid physical address minus + * the test memory size. */ -uint64_t guest_random_array[TEST_PAGES_PER_LOOP]; -uint64_t guest_iteration; -uint64_t guest_page_size; +static uint64_t guest_test_phys_mem; /* - * Writes to the first byte of a random page within the testing memory - * region continuously. + * Guest virtual memory offset of the testing memory slot. + * Must not conflict with identity mapped test code. */ -void guest_code(void) +static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM; + +/* + * Continuously write to the first 8 bytes of a random pages within + * the testing memory region. + */ +static void guest_code(void) { - int i = 0; - uint64_t volatile *array = guest_random_array; - uint64_t volatile *guest_addr; + int i; while (true) { for (i = 0; i < TEST_PAGES_PER_LOOP; i++) { - /* - * Write to the first 8 bytes of a random page - * on the testing memory region. - */ - guest_addr = (uint64_t *) - (TEST_MEM_OFFSET + - (array[i] % TEST_MEM_PAGES) * guest_page_size); - *guest_addr = guest_iteration; + uint64_t addr = guest_test_virt_mem; + addr += (READ_ONCE(random_array[i]) % guest_num_pages) + * guest_page_size; + addr &= ~(host_page_size - 1); + *(uint64_t *)addr = READ_ONCE(iteration); } + /* Tell the host that we need more random numbers */ GUEST_SYNC(1); } } -/* - * Host variables. These variables should only be used by the host - * rather than the guest. - */ -bool host_quit; +/* Host variables */ +static bool host_quit; /* Points to the test VM memory region on which we track dirty logs */ -void *host_test_mem; +static void *host_test_mem; +static uint64_t host_num_pages; /* For statistics only */ -uint64_t host_dirty_count; -uint64_t host_clear_count; -uint64_t host_track_next_count; +static uint64_t host_dirty_count; +static uint64_t host_clear_count; +static uint64_t host_track_next_count; /* * We use this bitmap to track some pages that should have its dirty @@ -93,40 +104,34 @@ uint64_t host_track_next_count; * page bit is cleared in the latest bitmap, then the system must * report that write in the next get dirty log call. */ -unsigned long *host_bmap_track; +static unsigned long *host_bmap_track; -void generate_random_array(uint64_t *guest_array, uint64_t size) +static void generate_random_array(uint64_t *guest_array, uint64_t size) { uint64_t i; - for (i = 0; i < size; i++) { + for (i = 0; i < size; i++) guest_array[i] = random(); - } } -void *vcpu_worker(void *data) +static void *vcpu_worker(void *data) { int ret; - uint64_t loops, *guest_array, pages_count = 0; struct kvm_vm *vm = data; + uint64_t *guest_array; + uint64_t pages_count = 0; struct kvm_run *run; - struct guest_args args; + struct ucall uc; run = vcpu_state(vm, VCPU_ID); - /* Retrieve the guest random array pointer and cache it */ - guest_array = addr_gva2hva(vm, (vm_vaddr_t)guest_random_array); - - DEBUG("VCPU starts\n"); - + guest_array = addr_gva2hva(vm, (vm_vaddr_t)random_array); generate_random_array(guest_array, TEST_PAGES_PER_LOOP); while (!READ_ONCE(host_quit)) { - /* Let the guest to dirty these random pages */ + /* Let the guest dirty the random pages */ ret = _vcpu_run(vm, VCPU_ID); - guest_args_read(vm, VCPU_ID, &args); - if (run->exit_reason == KVM_EXIT_IO && - args.port == GUEST_PORT_SYNC) { + if (get_ucall(vm, VCPU_ID, &uc) == UCALL_SYNC) { pages_count += TEST_PAGES_PER_LOOP; generate_random_array(guest_array, TEST_PAGES_PER_LOOP); } else { @@ -137,18 +142,20 @@ void *vcpu_worker(void *data) } } - DEBUG("VCPU exits, dirtied %"PRIu64" pages\n", pages_count); + DEBUG("Dirtied %"PRIu64" pages\n", pages_count); return NULL; } -void vm_dirty_log_verify(unsigned long *bmap, uint64_t iteration) +static void vm_dirty_log_verify(unsigned long *bmap) { uint64_t page; - uint64_t volatile *value_ptr; + uint64_t *value_ptr; + uint64_t step = host_page_size >= guest_page_size ? 1 : + guest_page_size / host_page_size; - for (page = 0; page < TEST_MEM_PAGES; page++) { - value_ptr = host_test_mem + page * getpagesize(); + for (page = 0; page < host_num_pages; page += step) { + value_ptr = host_test_mem + page * host_page_size; /* If this is a special page that we were tracking... */ if (test_and_clear_bit(page, host_bmap_track)) { @@ -208,88 +215,155 @@ void vm_dirty_log_verify(unsigned long *bmap, uint64_t iteration) } } -void help(char *name) +static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid, + uint64_t extra_mem_pages, void *guest_code, + unsigned long type) { - puts(""); - printf("usage: %s [-i iterations] [-I interval] [-h]\n", name); - puts(""); - printf(" -i: specify iteration counts (default: %"PRIu64")\n", - TEST_HOST_LOOP_N); - printf(" -I: specify interval in ms (default: %"PRIu64" ms)\n", - TEST_HOST_LOOP_INTERVAL); - puts(""); - exit(0); + struct kvm_vm *vm; + uint64_t extra_pg_pages = extra_mem_pages / 512 * 2; + + vm = _vm_create(mode, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, + O_RDWR, type); + kvm_vm_elf_load(vm, program_invocation_name, 0, 0); +#ifdef __x86_64__ + vm_create_irqchip(vm); +#endif + vm_vcpu_add_default(vm, vcpuid, guest_code); + return vm; } -int main(int argc, char *argv[]) +static void run_test(enum vm_guest_mode mode, unsigned long iterations, + unsigned long interval, uint64_t phys_offset) { + unsigned int guest_pa_bits, guest_page_shift; pthread_t vcpu_thread; struct kvm_vm *vm; - uint64_t volatile *psize, *iteration; - unsigned long *bmap, iterations = TEST_HOST_LOOP_N, - interval = TEST_HOST_LOOP_INTERVAL; - int opt; + uint64_t max_gfn; + unsigned long *bmap; + unsigned long type = 0; + + switch (mode) { + case VM_MODE_P52V48_4K: + guest_pa_bits = 52; + guest_page_shift = 12; + break; + case VM_MODE_P52V48_64K: + guest_pa_bits = 52; + guest_page_shift = 16; + break; + case VM_MODE_P48V48_4K: + guest_pa_bits = 48; + guest_page_shift = 12; + break; + case VM_MODE_P48V48_64K: + guest_pa_bits = 48; + guest_page_shift = 16; + break; + case VM_MODE_P40V48_4K: + guest_pa_bits = 40; + guest_page_shift = 12; + break; + case VM_MODE_P40V48_64K: + guest_pa_bits = 40; + guest_page_shift = 16; + break; + default: + TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", mode); + } - while ((opt = getopt(argc, argv, "hi:I:")) != -1) { - switch (opt) { - case 'i': - iterations = strtol(optarg, NULL, 10); - break; - case 'I': - interval = strtol(optarg, NULL, 10); - break; - case 'h': - default: - help(argv[0]); - break; - } + DEBUG("Testing guest mode: %s\n", vm_guest_mode_string(mode)); + +#ifdef __x86_64__ + /* + * FIXME + * The x86_64 kvm selftests framework currently only supports a + * single PML4 which restricts the number of physical address + * bits we can change to 39. + */ + guest_pa_bits = 39; +#endif +#ifdef __aarch64__ + if (guest_pa_bits != 40) + type = KVM_VM_TYPE_ARM_IPA_SIZE(guest_pa_bits); +#endif + max_gfn = (1ul << (guest_pa_bits - guest_page_shift)) - 1; + guest_page_size = (1ul << guest_page_shift); + /* 1G of guest page sized pages */ + guest_num_pages = (1ul << (30 - guest_page_shift)); + host_page_size = getpagesize(); + host_num_pages = (guest_num_pages * guest_page_size) / host_page_size + + !!((guest_num_pages * guest_page_size) % host_page_size); + + if (!phys_offset) { + guest_test_phys_mem = (max_gfn - guest_num_pages) * guest_page_size; + guest_test_phys_mem &= ~(host_page_size - 1); + } else { + guest_test_phys_mem = phys_offset; } - TEST_ASSERT(iterations > 2, "Iteration must be bigger than zero\n"); - TEST_ASSERT(interval > 0, "Interval must be bigger than zero"); + DEBUG("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem); - DEBUG("Test iterations: %"PRIu64", interval: %"PRIu64" (ms)\n", - iterations, interval); + bmap = bitmap_alloc(host_num_pages); + host_bmap_track = bitmap_alloc(host_num_pages); - srandom(time(0)); + vm = create_vm(mode, VCPU_ID, guest_num_pages, guest_code, type); - bmap = bitmap_alloc(TEST_MEM_PAGES); - host_bmap_track = bitmap_alloc(TEST_MEM_PAGES); +#ifdef USE_CLEAR_DIRTY_LOG + struct kvm_enable_cap cap = {}; - vm = vm_create_default(VCPU_ID, TEST_MEM_PAGES, guest_code); + cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT; + cap.args[0] = 1; + vm_enable_cap(vm, &cap); +#endif /* Add an extra memory slot for testing dirty logging */ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, - TEST_MEM_OFFSET, + guest_test_phys_mem, TEST_MEM_SLOT_INDEX, - TEST_MEM_PAGES, + guest_num_pages, KVM_MEM_LOG_DIRTY_PAGES); - /* Cache the HVA pointer of the region */ - host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)TEST_MEM_OFFSET); - /* Do 1:1 mapping for the dirty track memory slot */ - virt_map(vm, TEST_MEM_OFFSET, TEST_MEM_OFFSET, - TEST_MEM_PAGES * getpagesize(), 0); + /* Do mapping for the dirty track memory slot */ + virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, + guest_num_pages * guest_page_size, 0); + /* Cache the HVA pointer of the region */ + host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)guest_test_phys_mem); + +#ifdef __x86_64__ vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); +#endif +#ifdef __aarch64__ + ucall_init(vm, UCALL_MMIO, NULL); +#endif - /* Tell the guest about the page size on the system */ - psize = addr_gva2hva(vm, (vm_vaddr_t)&guest_page_size); - *psize = getpagesize(); + /* Export the shared variables to the guest */ + sync_global_to_guest(vm, host_page_size); + sync_global_to_guest(vm, guest_page_size); + sync_global_to_guest(vm, guest_test_virt_mem); + sync_global_to_guest(vm, guest_num_pages); /* Start the iterations */ - iteration = addr_gva2hva(vm, (vm_vaddr_t)&guest_iteration); - *iteration = 1; + iteration = 1; + sync_global_to_guest(vm, iteration); + host_quit = false; + host_dirty_count = 0; + host_clear_count = 0; + host_track_next_count = 0; - /* Start dirtying pages */ pthread_create(&vcpu_thread, NULL, vcpu_worker, vm); - while (*iteration < iterations) { + while (iteration < iterations) { /* Give the vcpu thread some time to dirty some pages */ usleep(interval * 1000); kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap); - vm_dirty_log_verify(bmap, *iteration); - (*iteration)++; +#ifdef USE_CLEAR_DIRTY_LOG + kvm_vm_clear_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap, 0, + DIV_ROUND_UP(host_num_pages, 64) * 64); +#endif + vm_dirty_log_verify(bmap); + iteration++; + sync_global_to_guest(vm, iteration); } /* Tell the vcpu thread to quit */ @@ -302,7 +376,124 @@ int main(int argc, char *argv[]) free(bmap); free(host_bmap_track); + ucall_uninit(vm); kvm_vm_free(vm); +} + +struct vm_guest_mode_params { + bool supported; + bool enabled; +}; +struct vm_guest_mode_params vm_guest_mode_params[NUM_VM_MODES]; + +#define vm_guest_mode_params_init(mode, supported, enabled) \ +({ \ + vm_guest_mode_params[mode] = (struct vm_guest_mode_params){ supported, enabled }; \ +}) + +static void help(char *name) +{ + int i; + + puts(""); + printf("usage: %s [-h] [-i iterations] [-I interval] " + "[-p offset] [-m mode]\n", name); + puts(""); + printf(" -i: specify iteration counts (default: %"PRIu64")\n", + TEST_HOST_LOOP_N); + printf(" -I: specify interval in ms (default: %"PRIu64" ms)\n", + TEST_HOST_LOOP_INTERVAL); + printf(" -p: specify guest physical test memory offset\n" + " Warning: a low offset can conflict with the loaded test code.\n"); + printf(" -m: specify the guest mode ID to test " + "(default: test all supported modes)\n" + " This option may be used multiple times.\n" + " Guest mode IDs:\n"); + for (i = 0; i < NUM_VM_MODES; ++i) { + printf(" %d: %s%s\n", i, vm_guest_mode_string(i), + vm_guest_mode_params[i].supported ? " (supported)" : ""); + } + puts(""); + exit(0); +} + +int main(int argc, char *argv[]) +{ + unsigned long iterations = TEST_HOST_LOOP_N; + unsigned long interval = TEST_HOST_LOOP_INTERVAL; + bool mode_selected = false; + uint64_t phys_offset = 0; + unsigned int mode, host_ipa_limit; + int opt, i; + +#ifdef USE_CLEAR_DIRTY_LOG + if (!kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT)) { + fprintf(stderr, "KVM_CLEAR_DIRTY_LOG not available, skipping tests\n"); + exit(KSFT_SKIP); + } +#endif + +#ifdef __x86_64__ + vm_guest_mode_params_init(VM_MODE_P52V48_4K, true, true); +#endif +#ifdef __aarch64__ + vm_guest_mode_params_init(VM_MODE_P40V48_4K, true, true); + vm_guest_mode_params_init(VM_MODE_P40V48_64K, true, true); + + host_ipa_limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE); + if (host_ipa_limit >= 52) + vm_guest_mode_params_init(VM_MODE_P52V48_64K, true, true); + if (host_ipa_limit >= 48) { + vm_guest_mode_params_init(VM_MODE_P48V48_4K, true, true); + vm_guest_mode_params_init(VM_MODE_P48V48_64K, true, true); + } +#endif + + while ((opt = getopt(argc, argv, "hi:I:p:m:")) != -1) { + switch (opt) { + case 'i': + iterations = strtol(optarg, NULL, 10); + break; + case 'I': + interval = strtol(optarg, NULL, 10); + break; + case 'p': + phys_offset = strtoull(optarg, NULL, 0); + break; + case 'm': + if (!mode_selected) { + for (i = 0; i < NUM_VM_MODES; ++i) + vm_guest_mode_params[i].enabled = false; + mode_selected = true; + } + mode = strtoul(optarg, NULL, 10); + TEST_ASSERT(mode < NUM_VM_MODES, + "Guest mode ID %d too big", mode); + vm_guest_mode_params[mode].enabled = true; + break; + case 'h': + default: + help(argv[0]); + break; + } + } + + TEST_ASSERT(iterations > 2, "Iterations must be greater than two"); + TEST_ASSERT(interval > 0, "Interval must be greater than zero"); + + DEBUG("Test iterations: %"PRIu64", interval: %"PRIu64" (ms)\n", + iterations, interval); + + srandom(time(0)); + + for (i = 0; i < NUM_VM_MODES; ++i) { + if (!vm_guest_mode_params[i].enabled) + continue; + TEST_ASSERT(vm_guest_mode_params[i].supported, + "Guest mode ID %d (%s) not supported.", + i, vm_guest_mode_string(i)); + run_test(i, iterations, interval, phys_offset); + } return 0; } diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h new file mode 100644 index 000000000000..9ef2ab1a0c08 --- /dev/null +++ b/tools/testing/selftests/kvm/include/aarch64/processor.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * AArch64 processor specific defines + * + * Copyright (C) 2018, Red Hat, Inc. + */ +#ifndef SELFTEST_KVM_PROCESSOR_H +#define SELFTEST_KVM_PROCESSOR_H + +#include "kvm_util.h" + + +#define ARM64_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \ + KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x)) + +#define CPACR_EL1 3, 0, 1, 0, 2 +#define TCR_EL1 3, 0, 2, 0, 2 +#define MAIR_EL1 3, 0, 10, 2, 0 +#define TTBR0_EL1 3, 0, 2, 0, 0 +#define SCTLR_EL1 3, 0, 1, 0, 0 + +/* + * Default MAIR + * index attribute + * DEVICE_nGnRnE 0 0000:0000 + * DEVICE_nGnRE 1 0000:0100 + * DEVICE_GRE 2 0000:1100 + * NORMAL_NC 3 0100:0100 + * NORMAL 4 1111:1111 + * NORMAL_WT 5 1011:1011 + */ +#define DEFAULT_MAIR_EL1 ((0x00ul << (0 * 8)) | \ + (0x04ul << (1 * 8)) | \ + (0x0cul << (2 * 8)) | \ + (0x44ul << (3 * 8)) | \ + (0xfful << (4 * 8)) | \ + (0xbbul << (5 * 8))) + +static inline void get_reg(struct kvm_vm *vm, uint32_t vcpuid, uint64_t id, uint64_t *addr) +{ + struct kvm_one_reg reg; + reg.id = id; + reg.addr = (uint64_t)addr; + vcpu_ioctl(vm, vcpuid, KVM_GET_ONE_REG, ®); +} + +static inline void set_reg(struct kvm_vm *vm, uint32_t vcpuid, uint64_t id, uint64_t val) +{ + struct kvm_one_reg reg; + reg.id = id; + reg.addr = (uint64_t)&val; + vcpu_ioctl(vm, vcpuid, KVM_SET_ONE_REG, ®); +} + +#endif /* SELFTEST_KVM_PROCESSOR_H */ diff --git a/tools/testing/selftests/kvm/include/evmcs.h b/tools/testing/selftests/kvm/include/evmcs.h new file mode 100644 index 000000000000..4059014d93ea --- /dev/null +++ b/tools/testing/selftests/kvm/include/evmcs.h @@ -0,0 +1,1098 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * tools/testing/selftests/kvm/include/vmx.h + * + * Copyright (C) 2018, Red Hat, Inc. + * + */ + +#ifndef SELFTEST_KVM_EVMCS_H +#define SELFTEST_KVM_EVMCS_H + +#include <stdint.h> +#include "vmx.h" + +#define u16 uint16_t +#define u32 uint32_t +#define u64 uint64_t + +extern bool enable_evmcs; + +struct hv_vp_assist_page { + __u32 apic_assist; + __u32 reserved; + __u64 vtl_control[2]; + __u64 nested_enlightenments_control[2]; + __u32 enlighten_vmentry; + __u64 current_nested_vmcs; +}; + +struct hv_enlightened_vmcs { + u32 revision_id; + u32 abort; + + u16 host_es_selector; + u16 host_cs_selector; + u16 host_ss_selector; + u16 host_ds_selector; + u16 host_fs_selector; + u16 host_gs_selector; + u16 host_tr_selector; + + u64 host_ia32_pat; + u64 host_ia32_efer; + + u64 host_cr0; + u64 host_cr3; + u64 host_cr4; + + u64 host_ia32_sysenter_esp; + u64 host_ia32_sysenter_eip; + u64 host_rip; + u32 host_ia32_sysenter_cs; + + u32 pin_based_vm_exec_control; + u32 vm_exit_controls; + u32 secondary_vm_exec_control; + + u64 io_bitmap_a; + u64 io_bitmap_b; + u64 msr_bitmap; + + u16 guest_es_selector; + u16 guest_cs_selector; + u16 guest_ss_selector; + u16 guest_ds_selector; + u16 guest_fs_selector; + u16 guest_gs_selector; + u16 guest_ldtr_selector; + u16 guest_tr_selector; + + u32 guest_es_limit; + u32 guest_cs_limit; + u32 guest_ss_limit; + u32 guest_ds_limit; + u32 guest_fs_limit; + u32 guest_gs_limit; + u32 guest_ldtr_limit; + u32 guest_tr_limit; + u32 guest_gdtr_limit; + u32 guest_idtr_limit; + + u32 guest_es_ar_bytes; + u32 guest_cs_ar_bytes; + u32 guest_ss_ar_bytes; + u32 guest_ds_ar_bytes; + u32 guest_fs_ar_bytes; + u32 guest_gs_ar_bytes; + u32 guest_ldtr_ar_bytes; + u32 guest_tr_ar_bytes; + + u64 guest_es_base; + u64 guest_cs_base; + u64 guest_ss_base; + u64 guest_ds_base; + u64 guest_fs_base; + u64 guest_gs_base; + u64 guest_ldtr_base; + u64 guest_tr_base; + u64 guest_gdtr_base; + u64 guest_idtr_base; + + u64 padding64_1[3]; + + u64 vm_exit_msr_store_addr; + u64 vm_exit_msr_load_addr; + u64 vm_entry_msr_load_addr; + + u64 cr3_target_value0; + u64 cr3_target_value1; + u64 cr3_target_value2; + u64 cr3_target_value3; + + u32 page_fault_error_code_mask; + u32 page_fault_error_code_match; + + u32 cr3_target_count; + u32 vm_exit_msr_store_count; + u32 vm_exit_msr_load_count; + u32 vm_entry_msr_load_count; + + u64 tsc_offset; + u64 virtual_apic_page_addr; + u64 vmcs_link_pointer; + + u64 guest_ia32_debugctl; + u64 guest_ia32_pat; + u64 guest_ia32_efer; + + u64 guest_pdptr0; + u64 guest_pdptr1; + u64 guest_pdptr2; + u64 guest_pdptr3; + + u64 guest_pending_dbg_exceptions; + u64 guest_sysenter_esp; + u64 guest_sysenter_eip; + + u32 guest_activity_state; + u32 guest_sysenter_cs; + + u64 cr0_guest_host_mask; + u64 cr4_guest_host_mask; + u64 cr0_read_shadow; + u64 cr4_read_shadow; + u64 guest_cr0; + u64 guest_cr3; + u64 guest_cr4; + u64 guest_dr7; + + u64 host_fs_base; + u64 host_gs_base; + u64 host_tr_base; + u64 host_gdtr_base; + u64 host_idtr_base; + u64 host_rsp; + + u64 ept_pointer; + + u16 virtual_processor_id; + u16 padding16[3]; + + u64 padding64_2[5]; + u64 guest_physical_address; + + u32 vm_instruction_error; + u32 vm_exit_reason; + u32 vm_exit_intr_info; + u32 vm_exit_intr_error_code; + u32 idt_vectoring_info_field; + u32 idt_vectoring_error_code; + u32 vm_exit_instruction_len; + u32 vmx_instruction_info; + + u64 exit_qualification; + u64 exit_io_instruction_ecx; + u64 exit_io_instruction_esi; + u64 exit_io_instruction_edi; + u64 exit_io_instruction_eip; + + u64 guest_linear_address; + u64 guest_rsp; + u64 guest_rflags; + + u32 guest_interruptibility_info; + u32 cpu_based_vm_exec_control; + u32 exception_bitmap; + u32 vm_entry_controls; + u32 vm_entry_intr_info_field; + u32 vm_entry_exception_error_code; + u32 vm_entry_instruction_len; + u32 tpr_threshold; + + u64 guest_rip; + + u32 hv_clean_fields; + u32 hv_padding_32; + u32 hv_synthetic_controls; + struct { + u32 nested_flush_hypercall:1; + u32 msr_bitmap:1; + u32 reserved:30; + } hv_enlightenments_control; + u32 hv_vp_id; + + u64 hv_vm_id; + u64 partition_assist_page; + u64 padding64_4[4]; + u64 guest_bndcfgs; + u64 padding64_5[7]; + u64 xss_exit_bitmap; + u64 padding64_6[7]; +}; + +#define HV_X64_MSR_VP_ASSIST_PAGE 0x40000073 +#define HV_X64_MSR_VP_ASSIST_PAGE_ENABLE 0x00000001 +#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT 12 +#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK \ + (~((1ull << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) + +struct hv_enlightened_vmcs *current_evmcs; +struct hv_vp_assist_page *current_vp_assist; + +static inline int enable_vp_assist(uint64_t vp_assist_pa, void *vp_assist) +{ + u64 val = (vp_assist_pa & HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK) | + HV_X64_MSR_VP_ASSIST_PAGE_ENABLE; + + wrmsr(HV_X64_MSR_VP_ASSIST_PAGE, val); + + current_vp_assist = vp_assist; + + enable_evmcs = true; + + return 0; +} + +static inline int evmcs_vmptrld(uint64_t vmcs_pa, void *vmcs) +{ + current_vp_assist->current_nested_vmcs = vmcs_pa; + current_vp_assist->enlighten_vmentry = 1; + + current_evmcs = vmcs; + + return 0; +} + +static inline int evmcs_vmptrst(uint64_t *value) +{ + *value = current_vp_assist->current_nested_vmcs & + ~HV_X64_MSR_VP_ASSIST_PAGE_ENABLE; + + return 0; +} + +static inline int evmcs_vmread(uint64_t encoding, uint64_t *value) +{ + switch (encoding) { + case GUEST_RIP: + *value = current_evmcs->guest_rip; + break; + case GUEST_RSP: + *value = current_evmcs->guest_rsp; + break; + case GUEST_RFLAGS: + *value = current_evmcs->guest_rflags; + break; + case HOST_IA32_PAT: + *value = current_evmcs->host_ia32_pat; + break; + case HOST_IA32_EFER: + *value = current_evmcs->host_ia32_efer; + break; + case HOST_CR0: + *value = current_evmcs->host_cr0; + break; + case HOST_CR3: + *value = current_evmcs->host_cr3; + break; + case HOST_CR4: + *value = current_evmcs->host_cr4; + break; + case HOST_IA32_SYSENTER_ESP: + *value = current_evmcs->host_ia32_sysenter_esp; + break; + case HOST_IA32_SYSENTER_EIP: + *value = current_evmcs->host_ia32_sysenter_eip; + break; + case HOST_RIP: + *value = current_evmcs->host_rip; + break; + case IO_BITMAP_A: + *value = current_evmcs->io_bitmap_a; + break; + case IO_BITMAP_B: + *value = current_evmcs->io_bitmap_b; + break; + case MSR_BITMAP: + *value = current_evmcs->msr_bitmap; + break; + case GUEST_ES_BASE: + *value = current_evmcs->guest_es_base; + break; + case GUEST_CS_BASE: + *value = current_evmcs->guest_cs_base; + break; + case GUEST_SS_BASE: + *value = current_evmcs->guest_ss_base; + break; + case GUEST_DS_BASE: + *value = current_evmcs->guest_ds_base; + break; + case GUEST_FS_BASE: + *value = current_evmcs->guest_fs_base; + break; + case GUEST_GS_BASE: + *value = current_evmcs->guest_gs_base; + break; + case GUEST_LDTR_BASE: + *value = current_evmcs->guest_ldtr_base; + break; + case GUEST_TR_BASE: + *value = current_evmcs->guest_tr_base; + break; + case GUEST_GDTR_BASE: + *value = current_evmcs->guest_gdtr_base; + break; + case GUEST_IDTR_BASE: + *value = current_evmcs->guest_idtr_base; + break; + case TSC_OFFSET: + *value = current_evmcs->tsc_offset; + break; + case VIRTUAL_APIC_PAGE_ADDR: + *value = current_evmcs->virtual_apic_page_addr; + break; + case VMCS_LINK_POINTER: + *value = current_evmcs->vmcs_link_pointer; + break; + case GUEST_IA32_DEBUGCTL: + *value = current_evmcs->guest_ia32_debugctl; + break; + case GUEST_IA32_PAT: + *value = current_evmcs->guest_ia32_pat; + break; + case GUEST_IA32_EFER: + *value = current_evmcs->guest_ia32_efer; + break; + case GUEST_PDPTR0: + *value = current_evmcs->guest_pdptr0; + break; + case GUEST_PDPTR1: + *value = current_evmcs->guest_pdptr1; + break; + case GUEST_PDPTR2: + *value = current_evmcs->guest_pdptr2; + break; + case GUEST_PDPTR3: + *value = current_evmcs->guest_pdptr3; + break; + case GUEST_PENDING_DBG_EXCEPTIONS: + *value = current_evmcs->guest_pending_dbg_exceptions; + break; + case GUEST_SYSENTER_ESP: + *value = current_evmcs->guest_sysenter_esp; + break; + case GUEST_SYSENTER_EIP: + *value = current_evmcs->guest_sysenter_eip; + break; + case CR0_GUEST_HOST_MASK: + *value = current_evmcs->cr0_guest_host_mask; + break; + case CR4_GUEST_HOST_MASK: + *value = current_evmcs->cr4_guest_host_mask; + break; + case CR0_READ_SHADOW: + *value = current_evmcs->cr0_read_shadow; + break; + case CR4_READ_SHADOW: + *value = current_evmcs->cr4_read_shadow; + break; + case GUEST_CR0: + *value = current_evmcs->guest_cr0; + break; + case GUEST_CR3: + *value = current_evmcs->guest_cr3; + break; + case GUEST_CR4: + *value = current_evmcs->guest_cr4; + break; + case GUEST_DR7: + *value = current_evmcs->guest_dr7; + break; + case HOST_FS_BASE: + *value = current_evmcs->host_fs_base; + break; + case HOST_GS_BASE: + *value = current_evmcs->host_gs_base; + break; + case HOST_TR_BASE: + *value = current_evmcs->host_tr_base; + break; + case HOST_GDTR_BASE: + *value = current_evmcs->host_gdtr_base; + break; + case HOST_IDTR_BASE: + *value = current_evmcs->host_idtr_base; + break; + case HOST_RSP: + *value = current_evmcs->host_rsp; + break; + case EPT_POINTER: + *value = current_evmcs->ept_pointer; + break; + case GUEST_BNDCFGS: + *value = current_evmcs->guest_bndcfgs; + break; + case XSS_EXIT_BITMAP: + *value = current_evmcs->xss_exit_bitmap; + break; + case GUEST_PHYSICAL_ADDRESS: + *value = current_evmcs->guest_physical_address; + break; + case EXIT_QUALIFICATION: + *value = current_evmcs->exit_qualification; + break; + case GUEST_LINEAR_ADDRESS: + *value = current_evmcs->guest_linear_address; + break; + case VM_EXIT_MSR_STORE_ADDR: + *value = current_evmcs->vm_exit_msr_store_addr; + break; + case VM_EXIT_MSR_LOAD_ADDR: + *value = current_evmcs->vm_exit_msr_load_addr; + break; + case VM_ENTRY_MSR_LOAD_ADDR: + *value = current_evmcs->vm_entry_msr_load_addr; + break; + case CR3_TARGET_VALUE0: + *value = current_evmcs->cr3_target_value0; + break; + case CR3_TARGET_VALUE1: + *value = current_evmcs->cr3_target_value1; + break; + case CR3_TARGET_VALUE2: + *value = current_evmcs->cr3_target_value2; + break; + case CR3_TARGET_VALUE3: + *value = current_evmcs->cr3_target_value3; + break; + case TPR_THRESHOLD: + *value = current_evmcs->tpr_threshold; + break; + case GUEST_INTERRUPTIBILITY_INFO: + *value = current_evmcs->guest_interruptibility_info; + break; + case CPU_BASED_VM_EXEC_CONTROL: + *value = current_evmcs->cpu_based_vm_exec_control; + break; + case EXCEPTION_BITMAP: + *value = current_evmcs->exception_bitmap; + break; + case VM_ENTRY_CONTROLS: + *value = current_evmcs->vm_entry_controls; + break; + case VM_ENTRY_INTR_INFO_FIELD: + *value = current_evmcs->vm_entry_intr_info_field; + break; + case VM_ENTRY_EXCEPTION_ERROR_CODE: + *value = current_evmcs->vm_entry_exception_error_code; + break; + case VM_ENTRY_INSTRUCTION_LEN: + *value = current_evmcs->vm_entry_instruction_len; + break; + case HOST_IA32_SYSENTER_CS: + *value = current_evmcs->host_ia32_sysenter_cs; + break; + case PIN_BASED_VM_EXEC_CONTROL: + *value = current_evmcs->pin_based_vm_exec_control; + break; + case VM_EXIT_CONTROLS: + *value = current_evmcs->vm_exit_controls; + break; + case SECONDARY_VM_EXEC_CONTROL: + *value = current_evmcs->secondary_vm_exec_control; + break; + case GUEST_ES_LIMIT: + *value = current_evmcs->guest_es_limit; + break; + case GUEST_CS_LIMIT: + *value = current_evmcs->guest_cs_limit; + break; + case GUEST_SS_LIMIT: + *value = current_evmcs->guest_ss_limit; + break; + case GUEST_DS_LIMIT: + *value = current_evmcs->guest_ds_limit; + break; + case GUEST_FS_LIMIT: + *value = current_evmcs->guest_fs_limit; + break; + case GUEST_GS_LIMIT: + *value = current_evmcs->guest_gs_limit; + break; + case GUEST_LDTR_LIMIT: + *value = current_evmcs->guest_ldtr_limit; + break; + case GUEST_TR_LIMIT: + *value = current_evmcs->guest_tr_limit; + break; + case GUEST_GDTR_LIMIT: + *value = current_evmcs->guest_gdtr_limit; + break; + case GUEST_IDTR_LIMIT: + *value = current_evmcs->guest_idtr_limit; + break; + case GUEST_ES_AR_BYTES: + *value = current_evmcs->guest_es_ar_bytes; + break; + case GUEST_CS_AR_BYTES: + *value = current_evmcs->guest_cs_ar_bytes; + break; + case GUEST_SS_AR_BYTES: + *value = current_evmcs->guest_ss_ar_bytes; + break; + case GUEST_DS_AR_BYTES: + *value = current_evmcs->guest_ds_ar_bytes; + break; + case GUEST_FS_AR_BYTES: + *value = current_evmcs->guest_fs_ar_bytes; + break; + case GUEST_GS_AR_BYTES: + *value = current_evmcs->guest_gs_ar_bytes; + break; + case GUEST_LDTR_AR_BYTES: + *value = current_evmcs->guest_ldtr_ar_bytes; + break; + case GUEST_TR_AR_BYTES: + *value = current_evmcs->guest_tr_ar_bytes; + break; + case GUEST_ACTIVITY_STATE: + *value = current_evmcs->guest_activity_state; + break; + case GUEST_SYSENTER_CS: + *value = current_evmcs->guest_sysenter_cs; + break; + case VM_INSTRUCTION_ERROR: + *value = current_evmcs->vm_instruction_error; + break; + case VM_EXIT_REASON: + *value = current_evmcs->vm_exit_reason; + break; + case VM_EXIT_INTR_INFO: + *value = current_evmcs->vm_exit_intr_info; + break; + case VM_EXIT_INTR_ERROR_CODE: + *value = current_evmcs->vm_exit_intr_error_code; + break; + case IDT_VECTORING_INFO_FIELD: + *value = current_evmcs->idt_vectoring_info_field; + break; + case IDT_VECTORING_ERROR_CODE: + *value = current_evmcs->idt_vectoring_error_code; + break; + case VM_EXIT_INSTRUCTION_LEN: + *value = current_evmcs->vm_exit_instruction_len; + break; + case VMX_INSTRUCTION_INFO: + *value = current_evmcs->vmx_instruction_info; + break; + case PAGE_FAULT_ERROR_CODE_MASK: + *value = current_evmcs->page_fault_error_code_mask; + break; + case PAGE_FAULT_ERROR_CODE_MATCH: + *value = current_evmcs->page_fault_error_code_match; + break; + case CR3_TARGET_COUNT: + *value = current_evmcs->cr3_target_count; + break; + case VM_EXIT_MSR_STORE_COUNT: + *value = current_evmcs->vm_exit_msr_store_count; + break; + case VM_EXIT_MSR_LOAD_COUNT: + *value = current_evmcs->vm_exit_msr_load_count; + break; + case VM_ENTRY_MSR_LOAD_COUNT: + *value = current_evmcs->vm_entry_msr_load_count; + break; + case HOST_ES_SELECTOR: + *value = current_evmcs->host_es_selector; + break; + case HOST_CS_SELECTOR: + *value = current_evmcs->host_cs_selector; + break; + case HOST_SS_SELECTOR: + *value = current_evmcs->host_ss_selector; + break; + case HOST_DS_SELECTOR: + *value = current_evmcs->host_ds_selector; + break; + case HOST_FS_SELECTOR: + *value = current_evmcs->host_fs_selector; + break; + case HOST_GS_SELECTOR: + *value = current_evmcs->host_gs_selector; + break; + case HOST_TR_SELECTOR: + *value = current_evmcs->host_tr_selector; + break; + case GUEST_ES_SELECTOR: + *value = current_evmcs->guest_es_selector; + break; + case GUEST_CS_SELECTOR: + *value = current_evmcs->guest_cs_selector; + break; + case GUEST_SS_SELECTOR: + *value = current_evmcs->guest_ss_selector; + break; + case GUEST_DS_SELECTOR: + *value = current_evmcs->guest_ds_selector; + break; + case GUEST_FS_SELECTOR: + *value = current_evmcs->guest_fs_selector; + break; + case GUEST_GS_SELECTOR: + *value = current_evmcs->guest_gs_selector; + break; + case GUEST_LDTR_SELECTOR: + *value = current_evmcs->guest_ldtr_selector; + break; + case GUEST_TR_SELECTOR: + *value = current_evmcs->guest_tr_selector; + break; + case VIRTUAL_PROCESSOR_ID: + *value = current_evmcs->virtual_processor_id; + break; + default: return 1; + } + + return 0; +} + +static inline int evmcs_vmwrite(uint64_t encoding, uint64_t value) +{ + switch (encoding) { + case GUEST_RIP: + current_evmcs->guest_rip = value; + break; + case GUEST_RSP: + current_evmcs->guest_rsp = value; + break; + case GUEST_RFLAGS: + current_evmcs->guest_rflags = value; + break; + case HOST_IA32_PAT: + current_evmcs->host_ia32_pat = value; + break; + case HOST_IA32_EFER: + current_evmcs->host_ia32_efer = value; + break; + case HOST_CR0: + current_evmcs->host_cr0 = value; + break; + case HOST_CR3: + current_evmcs->host_cr3 = value; + break; + case HOST_CR4: + current_evmcs->host_cr4 = value; + break; + case HOST_IA32_SYSENTER_ESP: + current_evmcs->host_ia32_sysenter_esp = value; + break; + case HOST_IA32_SYSENTER_EIP: + current_evmcs->host_ia32_sysenter_eip = value; + break; + case HOST_RIP: + current_evmcs->host_rip = value; + break; + case IO_BITMAP_A: + current_evmcs->io_bitmap_a = value; + break; + case IO_BITMAP_B: + current_evmcs->io_bitmap_b = value; + break; + case MSR_BITMAP: + current_evmcs->msr_bitmap = value; + break; + case GUEST_ES_BASE: + current_evmcs->guest_es_base = value; + break; + case GUEST_CS_BASE: + current_evmcs->guest_cs_base = value; + break; + case GUEST_SS_BASE: + current_evmcs->guest_ss_base = value; + break; + case GUEST_DS_BASE: + current_evmcs->guest_ds_base = value; + break; + case GUEST_FS_BASE: + current_evmcs->guest_fs_base = value; + break; + case GUEST_GS_BASE: + current_evmcs->guest_gs_base = value; + break; + case GUEST_LDTR_BASE: + current_evmcs->guest_ldtr_base = value; + break; + case GUEST_TR_BASE: + current_evmcs->guest_tr_base = value; + break; + case GUEST_GDTR_BASE: + current_evmcs->guest_gdtr_base = value; + break; + case GUEST_IDTR_BASE: + current_evmcs->guest_idtr_base = value; + break; + case TSC_OFFSET: + current_evmcs->tsc_offset = value; + break; + case VIRTUAL_APIC_PAGE_ADDR: + current_evmcs->virtual_apic_page_addr = value; + break; + case VMCS_LINK_POINTER: + current_evmcs->vmcs_link_pointer = value; + break; + case GUEST_IA32_DEBUGCTL: + current_evmcs->guest_ia32_debugctl = value; + break; + case GUEST_IA32_PAT: + current_evmcs->guest_ia32_pat = value; + break; + case GUEST_IA32_EFER: + current_evmcs->guest_ia32_efer = value; + break; + case GUEST_PDPTR0: + current_evmcs->guest_pdptr0 = value; + break; + case GUEST_PDPTR1: + current_evmcs->guest_pdptr1 = value; + break; + case GUEST_PDPTR2: + current_evmcs->guest_pdptr2 = value; + break; + case GUEST_PDPTR3: + current_evmcs->guest_pdptr3 = value; + break; + case GUEST_PENDING_DBG_EXCEPTIONS: + current_evmcs->guest_pending_dbg_exceptions = value; + break; + case GUEST_SYSENTER_ESP: + current_evmcs->guest_sysenter_esp = value; + break; + case GUEST_SYSENTER_EIP: + current_evmcs->guest_sysenter_eip = value; + break; + case CR0_GUEST_HOST_MASK: + current_evmcs->cr0_guest_host_mask = value; + break; + case CR4_GUEST_HOST_MASK: + current_evmcs->cr4_guest_host_mask = value; + break; + case CR0_READ_SHADOW: + current_evmcs->cr0_read_shadow = value; + break; + case CR4_READ_SHADOW: + current_evmcs->cr4_read_shadow = value; + break; + case GUEST_CR0: + current_evmcs->guest_cr0 = value; + break; + case GUEST_CR3: + current_evmcs->guest_cr3 = value; + break; + case GUEST_CR4: + current_evmcs->guest_cr4 = value; + break; + case GUEST_DR7: + current_evmcs->guest_dr7 = value; + break; + case HOST_FS_BASE: + current_evmcs->host_fs_base = value; + break; + case HOST_GS_BASE: + current_evmcs->host_gs_base = value; + break; + case HOST_TR_BASE: + current_evmcs->host_tr_base = value; + break; + case HOST_GDTR_BASE: + current_evmcs->host_gdtr_base = value; + break; + case HOST_IDTR_BASE: + current_evmcs->host_idtr_base = value; + break; + case HOST_RSP: + current_evmcs->host_rsp = value; + break; + case EPT_POINTER: + current_evmcs->ept_pointer = value; + break; + case GUEST_BNDCFGS: + current_evmcs->guest_bndcfgs = value; + break; + case XSS_EXIT_BITMAP: + current_evmcs->xss_exit_bitmap = value; + break; + case GUEST_PHYSICAL_ADDRESS: + current_evmcs->guest_physical_address = value; + break; + case EXIT_QUALIFICATION: + current_evmcs->exit_qualification = value; + break; + case GUEST_LINEAR_ADDRESS: + current_evmcs->guest_linear_address = value; + break; + case VM_EXIT_MSR_STORE_ADDR: + current_evmcs->vm_exit_msr_store_addr = value; + break; + case VM_EXIT_MSR_LOAD_ADDR: + current_evmcs->vm_exit_msr_load_addr = value; + break; + case VM_ENTRY_MSR_LOAD_ADDR: + current_evmcs->vm_entry_msr_load_addr = value; + break; + case CR3_TARGET_VALUE0: + current_evmcs->cr3_target_value0 = value; + break; + case CR3_TARGET_VALUE1: + current_evmcs->cr3_target_value1 = value; + break; + case CR3_TARGET_VALUE2: + current_evmcs->cr3_target_value2 = value; + break; + case CR3_TARGET_VALUE3: + current_evmcs->cr3_target_value3 = value; + break; + case TPR_THRESHOLD: + current_evmcs->tpr_threshold = value; + break; + case GUEST_INTERRUPTIBILITY_INFO: + current_evmcs->guest_interruptibility_info = value; + break; + case CPU_BASED_VM_EXEC_CONTROL: + current_evmcs->cpu_based_vm_exec_control = value; + break; + case EXCEPTION_BITMAP: + current_evmcs->exception_bitmap = value; + break; + case VM_ENTRY_CONTROLS: + current_evmcs->vm_entry_controls = value; + break; + case VM_ENTRY_INTR_INFO_FIELD: + current_evmcs->vm_entry_intr_info_field = value; + break; + case VM_ENTRY_EXCEPTION_ERROR_CODE: + current_evmcs->vm_entry_exception_error_code = value; + break; + case VM_ENTRY_INSTRUCTION_LEN: + current_evmcs->vm_entry_instruction_len = value; + break; + case HOST_IA32_SYSENTER_CS: + current_evmcs->host_ia32_sysenter_cs = value; + break; + case PIN_BASED_VM_EXEC_CONTROL: + current_evmcs->pin_based_vm_exec_control = value; + break; + case VM_EXIT_CONTROLS: + current_evmcs->vm_exit_controls = value; + break; + case SECONDARY_VM_EXEC_CONTROL: + current_evmcs->secondary_vm_exec_control = value; + break; + case GUEST_ES_LIMIT: + current_evmcs->guest_es_limit = value; + break; + case GUEST_CS_LIMIT: + current_evmcs->guest_cs_limit = value; + break; + case GUEST_SS_LIMIT: + current_evmcs->guest_ss_limit = value; + break; + case GUEST_DS_LIMIT: + current_evmcs->guest_ds_limit = value; + break; + case GUEST_FS_LIMIT: + current_evmcs->guest_fs_limit = value; + break; + case GUEST_GS_LIMIT: + current_evmcs->guest_gs_limit = value; + break; + case GUEST_LDTR_LIMIT: + current_evmcs->guest_ldtr_limit = value; + break; + case GUEST_TR_LIMIT: + current_evmcs->guest_tr_limit = value; + break; + case GUEST_GDTR_LIMIT: + current_evmcs->guest_gdtr_limit = value; + break; + case GUEST_IDTR_LIMIT: + current_evmcs->guest_idtr_limit = value; + break; + case GUEST_ES_AR_BYTES: + current_evmcs->guest_es_ar_bytes = value; + break; + case GUEST_CS_AR_BYTES: + current_evmcs->guest_cs_ar_bytes = value; + break; + case GUEST_SS_AR_BYTES: + current_evmcs->guest_ss_ar_bytes = value; + break; + case GUEST_DS_AR_BYTES: + current_evmcs->guest_ds_ar_bytes = value; + break; + case GUEST_FS_AR_BYTES: + current_evmcs->guest_fs_ar_bytes = value; + break; + case GUEST_GS_AR_BYTES: + current_evmcs->guest_gs_ar_bytes = value; + break; + case GUEST_LDTR_AR_BYTES: + current_evmcs->guest_ldtr_ar_bytes = value; + break; + case GUEST_TR_AR_BYTES: + current_evmcs->guest_tr_ar_bytes = value; + break; + case GUEST_ACTIVITY_STATE: + current_evmcs->guest_activity_state = value; + break; + case GUEST_SYSENTER_CS: + current_evmcs->guest_sysenter_cs = value; + break; + case VM_INSTRUCTION_ERROR: + current_evmcs->vm_instruction_error = value; + break; + case VM_EXIT_REASON: + current_evmcs->vm_exit_reason = value; + break; + case VM_EXIT_INTR_INFO: + current_evmcs->vm_exit_intr_info = value; + break; + case VM_EXIT_INTR_ERROR_CODE: + current_evmcs->vm_exit_intr_error_code = value; + break; + case IDT_VECTORING_INFO_FIELD: + current_evmcs->idt_vectoring_info_field = value; + break; + case IDT_VECTORING_ERROR_CODE: + current_evmcs->idt_vectoring_error_code = value; + break; + case VM_EXIT_INSTRUCTION_LEN: + current_evmcs->vm_exit_instruction_len = value; + break; + case VMX_INSTRUCTION_INFO: + current_evmcs->vmx_instruction_info = value; + break; + case PAGE_FAULT_ERROR_CODE_MASK: + current_evmcs->page_fault_error_code_mask = value; + break; + case PAGE_FAULT_ERROR_CODE_MATCH: + current_evmcs->page_fault_error_code_match = value; + break; + case CR3_TARGET_COUNT: + current_evmcs->cr3_target_count = value; + break; + case VM_EXIT_MSR_STORE_COUNT: + current_evmcs->vm_exit_msr_store_count = value; + break; + case VM_EXIT_MSR_LOAD_COUNT: + current_evmcs->vm_exit_msr_load_count = value; + break; + case VM_ENTRY_MSR_LOAD_COUNT: + current_evmcs->vm_entry_msr_load_count = value; + break; + case HOST_ES_SELECTOR: + current_evmcs->host_es_selector = value; + break; + case HOST_CS_SELECTOR: + current_evmcs->host_cs_selector = value; + break; + case HOST_SS_SELECTOR: + current_evmcs->host_ss_selector = value; + break; + case HOST_DS_SELECTOR: + current_evmcs->host_ds_selector = value; + break; + case HOST_FS_SELECTOR: + current_evmcs->host_fs_selector = value; + break; + case HOST_GS_SELECTOR: + current_evmcs->host_gs_selector = value; + break; + case HOST_TR_SELECTOR: + current_evmcs->host_tr_selector = value; + break; + case GUEST_ES_SELECTOR: + current_evmcs->guest_es_selector = value; + break; + case GUEST_CS_SELECTOR: + current_evmcs->guest_cs_selector = value; + break; + case GUEST_SS_SELECTOR: + current_evmcs->guest_ss_selector = value; + break; + case GUEST_DS_SELECTOR: + current_evmcs->guest_ds_selector = value; + break; + case GUEST_FS_SELECTOR: + current_evmcs->guest_fs_selector = value; + break; + case GUEST_GS_SELECTOR: + current_evmcs->guest_gs_selector = value; + break; + case GUEST_LDTR_SELECTOR: + current_evmcs->guest_ldtr_selector = value; + break; + case GUEST_TR_SELECTOR: + current_evmcs->guest_tr_selector = value; + break; + case VIRTUAL_PROCESSOR_ID: + current_evmcs->virtual_processor_id = value; + break; + default: return 1; + } + + return 0; +} + +static inline int evmcs_vmlaunch(void) +{ + int ret; + + current_evmcs->hv_clean_fields = 0; + + __asm__ __volatile__("push %%rbp;" + "push %%rcx;" + "push %%rdx;" + "push %%rsi;" + "push %%rdi;" + "push $0;" + "mov %%rsp, (%[host_rsp]);" + "lea 1f(%%rip), %%rax;" + "mov %%rax, (%[host_rip]);" + "vmlaunch;" + "incq (%%rsp);" + "1: pop %%rax;" + "pop %%rdi;" + "pop %%rsi;" + "pop %%rdx;" + "pop %%rcx;" + "pop %%rbp;" + : [ret]"=&a"(ret) + : [host_rsp]"r" + ((uint64_t)¤t_evmcs->host_rsp), + [host_rip]"r" + ((uint64_t)¤t_evmcs->host_rip) + : "memory", "cc", "rbx", "r8", "r9", "r10", + "r11", "r12", "r13", "r14", "r15"); + return ret; +} + +/* + * No guest state (e.g. GPRs) is established by this vmresume. + */ +static inline int evmcs_vmresume(void) +{ + int ret; + + current_evmcs->hv_clean_fields = 0; + + __asm__ __volatile__("push %%rbp;" + "push %%rcx;" + "push %%rdx;" + "push %%rsi;" + "push %%rdi;" + "push $0;" + "mov %%rsp, (%[host_rsp]);" + "lea 1f(%%rip), %%rax;" + "mov %%rax, (%[host_rip]);" + "vmresume;" + "incq (%%rsp);" + "1: pop %%rax;" + "pop %%rdi;" + "pop %%rsi;" + "pop %%rdx;" + "pop %%rcx;" + "pop %%rbp;" + : [ret]"=&a"(ret) + : [host_rsp]"r" + ((uint64_t)¤t_evmcs->host_rsp), + [host_rip]"r" + ((uint64_t)¤t_evmcs->host_rip) + : "memory", "cc", "rbx", "r8", "r9", "r10", + "r11", "r12", "r13", "r14", "r15"); + return ret; +} + +#endif /* !SELFTEST_KVM_EVMCS_H */ diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 3acf9a91704c..a84785b02557 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -7,7 +7,7 @@ * */ #ifndef SELFTEST_KVM_UTIL_H -#define SELFTEST_KVM_UTIL_H 1 +#define SELFTEST_KVM_UTIL_H #include "test_util.h" @@ -17,12 +17,6 @@ #include "sparsebit.h" -/* - * Memslots can't cover the gfn starting at this gpa otherwise vCPUs can't be - * created. Only applies to VMs using EPT. - */ -#define KVM_DEFAULT_IDENTITY_MAP_ADDRESS 0xfffbc000ul - /* Callers of kvm_util only have an incomplete/opaque description of the * structure kvm_util is using to maintain the state of a VM. @@ -33,16 +27,25 @@ typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */ typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */ /* Minimum allocated guest virtual and physical addresses */ -#define KVM_UTIL_MIN_VADDR 0x2000 +#define KVM_UTIL_MIN_VADDR 0x2000 #define DEFAULT_GUEST_PHY_PAGES 512 #define DEFAULT_GUEST_STACK_VADDR_MIN 0xab6000 -#define DEFAULT_STACK_PGS 5 +#define DEFAULT_STACK_PGS 5 enum vm_guest_mode { - VM_MODE_FLAT48PG, + VM_MODE_P52V48_4K, + VM_MODE_P52V48_64K, + VM_MODE_P48V48_4K, + VM_MODE_P48V48_64K, + VM_MODE_P40V48_4K, + VM_MODE_P40V48_64K, + NUM_VM_MODES, }; +#define vm_guest_mode_string(m) vm_guest_mode_string[m] +extern const char * const vm_guest_mode_string[]; + enum vm_mem_backing_src_type { VM_MEM_SRC_ANONYMOUS, VM_MEM_SRC_ANONYMOUS_THP, @@ -53,20 +56,24 @@ int kvm_check_cap(long cap); int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap); struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm); +struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, + int perm, unsigned long type); void kvm_vm_free(struct kvm_vm *vmp); void kvm_vm_restart(struct kvm_vm *vmp, int perm); void kvm_vm_release(struct kvm_vm *vmp); void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log); +void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log, + uint64_t first_page, uint32_t num_pages); -int kvm_memcmp_hva_gva(void *hva, - struct kvm_vm *vm, const vm_vaddr_t gva, size_t len); +int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, const vm_vaddr_t gva, + size_t len); void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename, - uint32_t data_memslot, uint32_t pgd_memslot); + uint32_t data_memslot, uint32_t pgd_memslot); void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent); -void vcpu_dump(FILE *stream, struct kvm_vm *vm, - uint32_t vcpuid, uint8_t indent); +void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, + uint8_t indent); void vm_create_irqchip(struct kvm_vm *vm); @@ -75,13 +82,16 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, uint64_t guest_paddr, uint32_t slot, uint64_t npages, uint32_t flags); -void vcpu_ioctl(struct kvm_vm *vm, - uint32_t vcpuid, unsigned long ioctl, void *arg); +void vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl, + void *arg); +int _vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl, + void *arg); void vm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg); void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags); -void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid, int pgd_memslot, int gdt_memslot); +void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid, int pgd_memslot, + int gdt_memslot); vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min, - uint32_t data_memslot, uint32_t pgd_memslot); + uint32_t data_memslot, uint32_t pgd_memslot); void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, size_t size, uint32_t pgd_memslot); void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa); @@ -93,56 +103,35 @@ struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid); void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid); int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid); void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid, - struct kvm_mp_state *mp_state); -void vcpu_regs_get(struct kvm_vm *vm, - uint32_t vcpuid, struct kvm_regs *regs); -void vcpu_regs_set(struct kvm_vm *vm, - uint32_t vcpuid, struct kvm_regs *regs); + struct kvm_mp_state *mp_state); +void vcpu_regs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs); +void vcpu_regs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs); void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...); -void vcpu_sregs_get(struct kvm_vm *vm, - uint32_t vcpuid, struct kvm_sregs *sregs); -void vcpu_sregs_set(struct kvm_vm *vm, - uint32_t vcpuid, struct kvm_sregs *sregs); -int _vcpu_sregs_set(struct kvm_vm *vm, - uint32_t vcpuid, struct kvm_sregs *sregs); +void vcpu_sregs_get(struct kvm_vm *vm, uint32_t vcpuid, + struct kvm_sregs *sregs); +void vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, + struct kvm_sregs *sregs); +int _vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, + struct kvm_sregs *sregs); void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid, - struct kvm_vcpu_events *events); + struct kvm_vcpu_events *events); void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid, - struct kvm_vcpu_events *events); -uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index); -void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index, - uint64_t msr_value); + struct kvm_vcpu_events *events); const char *exit_reason_str(unsigned int exit_reason); void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot); void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, - uint32_t pgd_memslot); -vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, - vm_paddr_t paddr_min, uint32_t memslot); - -struct kvm_cpuid2 *kvm_get_supported_cpuid(void); -void vcpu_set_cpuid( - struct kvm_vm *vm, uint32_t vcpuid, struct kvm_cpuid2 *cpuid); - -struct kvm_cpuid_entry2 * -kvm_get_supported_cpuid_index(uint32_t function, uint32_t index); - -static inline struct kvm_cpuid_entry2 * -kvm_get_supported_cpuid_entry(uint32_t function) -{ - return kvm_get_supported_cpuid_index(function, 0); -} + uint32_t pgd_memslot); +vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min, + uint32_t memslot); +vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, + vm_paddr_t paddr_min, uint32_t memslot); struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_size, void *guest_code); void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code); -typedef void (*vmx_guest_code_t)(vm_vaddr_t vmxon_vaddr, - vm_paddr_t vmxon_paddr, - vm_vaddr_t vmcs_vaddr, - vm_paddr_t vmcs_paddr); - struct kvm_userspace_memory_region * kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end); @@ -152,43 +141,49 @@ allocate_kvm_dirty_log(struct kvm_userspace_memory_region *region); int vm_create_device(struct kvm_vm *vm, struct kvm_create_device *cd); -#define GUEST_PORT_SYNC 0x1000 -#define GUEST_PORT_ABORT 0x1001 -#define GUEST_PORT_DONE 0x1002 - -static inline void __exit_to_l0(uint16_t port, uint64_t arg0, uint64_t arg1) -{ - __asm__ __volatile__("in %[port], %%al" - : - : [port]"d"(port), "D"(arg0), "S"(arg1) - : "rax"); -} - -/* - * Allows to pass three arguments to the host: port is 16bit wide, - * arg0 & arg1 are 64bit wide - */ -#define GUEST_SYNC_ARGS(_port, _arg0, _arg1) \ - __exit_to_l0(_port, (uint64_t) (_arg0), (uint64_t) (_arg1)) - -#define GUEST_ASSERT(_condition) do { \ - if (!(_condition)) \ - GUEST_SYNC_ARGS(GUEST_PORT_ABORT, \ - "Failed guest assert: " \ - #_condition, __LINE__); \ - } while (0) - -#define GUEST_SYNC(stage) GUEST_SYNC_ARGS(GUEST_PORT_SYNC, "hello", stage) +#define sync_global_to_guest(vm, g) ({ \ + typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \ + memcpy(_p, &(g), sizeof(g)); \ +}) + +#define sync_global_from_guest(vm, g) ({ \ + typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \ + memcpy(&(g), _p, sizeof(g)); \ +}) + +/* ucall implementation types */ +typedef enum { + UCALL_PIO, + UCALL_MMIO, +} ucall_type_t; + +/* Common ucalls */ +enum { + UCALL_NONE, + UCALL_SYNC, + UCALL_ABORT, + UCALL_DONE, +}; -#define GUEST_DONE() GUEST_SYNC_ARGS(GUEST_PORT_DONE, 0, 0) +#define UCALL_MAX_ARGS 6 -struct guest_args { - uint64_t arg0; - uint64_t arg1; - uint16_t port; -} __attribute__ ((packed)); +struct ucall { + uint64_t cmd; + uint64_t args[UCALL_MAX_ARGS]; +}; -void guest_args_read(struct kvm_vm *vm, uint32_t vcpu_id, - struct guest_args *args); +void ucall_init(struct kvm_vm *vm, ucall_type_t type, void *arg); +void ucall_uninit(struct kvm_vm *vm); +void ucall(uint64_t cmd, int nargs, ...); +uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc); + +#define GUEST_SYNC(stage) ucall(UCALL_SYNC, 2, "hello", stage) +#define GUEST_DONE() ucall(UCALL_DONE, 0) +#define GUEST_ASSERT(_condition) do { \ + if (!(_condition)) \ + ucall(UCALL_ABORT, 2, \ + "Failed guest assert: " \ + #_condition, __LINE__); \ +} while (0) #endif /* SELFTEST_KVM_UTIL_H */ diff --git a/tools/testing/selftests/kvm/include/sparsebit.h b/tools/testing/selftests/kvm/include/sparsebit.h index 54cfeb6568d3..31e030915c1f 100644 --- a/tools/testing/selftests/kvm/include/sparsebit.h +++ b/tools/testing/selftests/kvm/include/sparsebit.h @@ -15,8 +15,8 @@ * even in the case where most bits are set. */ -#ifndef _TEST_SPARSEBIT_H_ -#define _TEST_SPARSEBIT_H_ +#ifndef SELFTEST_KVM_SPARSEBIT_H +#define SELFTEST_KVM_SPARSEBIT_H #include <stdbool.h> #include <stdint.h> @@ -72,4 +72,4 @@ void sparsebit_validate_internal(struct sparsebit *sbit); } #endif -#endif /* _TEST_SPARSEBIT_H_ */ +#endif /* SELFTEST_KVM_SPARSEBIT_H */ diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index 73c3933436ec..c7dafe8bd02c 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -7,8 +7,8 @@ * */ -#ifndef TEST_UTIL_H -#define TEST_UTIL_H 1 +#ifndef SELFTEST_KVM_TEST_UTIL_H +#define SELFTEST_KVM_TEST_UTIL_H #include <stdlib.h> #include <stdarg.h> @@ -41,4 +41,4 @@ void test_assert(bool exp, const char *exp_str, #a, #b, #a, (unsigned long) __a, #b, (unsigned long) __b); \ } while (0) -#endif /* TEST_UTIL_H */ +#endif /* SELFTEST_KVM_TEST_UTIL_H */ diff --git a/tools/testing/selftests/kvm/include/x86.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 42c3596815b8..e2884c2b81ff 100644 --- a/tools/testing/selftests/kvm/include/x86.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -1,5 +1,5 @@ /* - * tools/testing/selftests/kvm/include/x86.h + * tools/testing/selftests/kvm/include/x86_64/processor.h * * Copyright (C) 2018, Google LLC. * @@ -7,8 +7,8 @@ * */ -#ifndef SELFTEST_KVM_X86_H -#define SELFTEST_KVM_X86_H +#ifndef SELFTEST_KVM_PROCESSOR_H +#define SELFTEST_KVM_PROCESSOR_H #include <assert.h> #include <stdint.h> @@ -305,7 +305,25 @@ static inline unsigned long get_xmm(int n) struct kvm_x86_state; struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid); -void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *state); +void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, + struct kvm_x86_state *state); + +struct kvm_cpuid2 *kvm_get_supported_cpuid(void); +void vcpu_set_cpuid(struct kvm_vm *vm, uint32_t vcpuid, + struct kvm_cpuid2 *cpuid); + +struct kvm_cpuid_entry2 * +kvm_get_supported_cpuid_index(uint32_t function, uint32_t index); + +static inline struct kvm_cpuid_entry2 * +kvm_get_supported_cpuid_entry(uint32_t function) +{ + return kvm_get_supported_cpuid_index(function, 0); +} + +uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index); +void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index, + uint64_t msr_value); /* * Basic CPU control in CR0 @@ -1044,4 +1062,4 @@ void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *s #define MSR_VM_IGNNE 0xc0010115 #define MSR_VM_HSAVE_PA 0xc0010117 -#endif /* !SELFTEST_KVM_X86_H */ +#endif /* SELFTEST_KVM_PROCESSOR_H */ diff --git a/tools/testing/selftests/kvm/include/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h index b9ffe1024d3a..c9bd935b939c 100644 --- a/tools/testing/selftests/kvm/include/vmx.h +++ b/tools/testing/selftests/kvm/include/x86_64/vmx.h @@ -1,5 +1,5 @@ /* - * tools/testing/selftests/kvm/include/vmx.h + * tools/testing/selftests/kvm/include/x86_64/vmx.h * * Copyright (C) 2018, Google LLC. * @@ -11,7 +11,7 @@ #define SELFTEST_KVM_VMX_H #include <stdint.h> -#include "x86.h" +#include "processor.h" #define CPUID_VMX_BIT 5 @@ -339,6 +339,8 @@ struct vmx_msr_entry { uint64_t value; } __attribute__ ((aligned(16))); +#include "evmcs.h" + static inline int vmxon(uint64_t phys) { uint8_t ret; @@ -372,6 +374,9 @@ static inline int vmptrld(uint64_t vmcs_pa) { uint8_t ret; + if (enable_evmcs) + return -1; + __asm__ __volatile__ ("vmptrld %[pa]; setna %[ret]" : [ret]"=rm"(ret) : [pa]"m"(vmcs_pa) @@ -385,6 +390,9 @@ static inline int vmptrst(uint64_t *value) uint64_t tmp; uint8_t ret; + if (enable_evmcs) + return evmcs_vmptrst(value); + __asm__ __volatile__("vmptrst %[value]; setna %[ret]" : [value]"=m"(tmp), [ret]"=rm"(ret) : : "cc", "memory"); @@ -411,6 +419,9 @@ static inline int vmlaunch(void) { int ret; + if (enable_evmcs) + return evmcs_vmlaunch(); + __asm__ __volatile__("push %%rbp;" "push %%rcx;" "push %%rdx;" @@ -443,6 +454,9 @@ static inline int vmresume(void) { int ret; + if (enable_evmcs) + return evmcs_vmresume(); + __asm__ __volatile__("push %%rbp;" "push %%rcx;" "push %%rdx;" @@ -482,6 +496,9 @@ static inline int vmread(uint64_t encoding, uint64_t *value) uint64_t tmp; uint8_t ret; + if (enable_evmcs) + return evmcs_vmread(encoding, value); + __asm__ __volatile__("vmread %[encoding], %[value]; setna %[ret]" : [value]"=rm"(tmp), [ret]"=rm"(ret) : [encoding]"r"(encoding) @@ -506,6 +523,9 @@ static inline int vmwrite(uint64_t encoding, uint64_t value) { uint8_t ret; + if (enable_evmcs) + return evmcs_vmwrite(encoding, value); + __asm__ __volatile__ ("vmwrite %[value], %[encoding]; setna %[ret]" : [ret]"=rm"(ret) : [value]"rm"(value), [encoding]"r"(encoding) @@ -543,10 +563,19 @@ struct vmx_pages { void *vmwrite_hva; uint64_t vmwrite_gpa; void *vmwrite; + + void *vp_assist_hva; + uint64_t vp_assist_gpa; + void *vp_assist; + + void *enlightened_vmcs_hva; + uint64_t enlightened_vmcs_gpa; + void *enlightened_vmcs; }; struct vmx_pages *vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva); bool prepare_for_vmx_operation(struct vmx_pages *vmx); void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp); +bool load_vmcs(struct vmx_pages *vmx); -#endif /* !SELFTEST_KVM_VMX_H */ +#endif /* SELFTEST_KVM_VMX_H */ diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c new file mode 100644 index 000000000000..e8c42506a09d --- /dev/null +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -0,0 +1,317 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * AArch64 code + * + * Copyright (C) 2018, Red Hat, Inc. + */ + +#define _GNU_SOURCE /* for program_invocation_name */ + +#include "kvm_util.h" +#include "../kvm_util_internal.h" +#include "processor.h" + +#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000 +#define DEFAULT_ARM64_GUEST_STACK_VADDR_MIN 0xac0000 + +static uint64_t page_align(struct kvm_vm *vm, uint64_t v) +{ + return (v + vm->page_size) & ~(vm->page_size - 1); +} + +static uint64_t pgd_index(struct kvm_vm *vm, vm_vaddr_t gva) +{ + unsigned int shift = (vm->pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift; + uint64_t mask = (1UL << (vm->va_bits - shift)) - 1; + + return (gva >> shift) & mask; +} + +static uint64_t pud_index(struct kvm_vm *vm, vm_vaddr_t gva) +{ + unsigned int shift = 2 * (vm->page_shift - 3) + vm->page_shift; + uint64_t mask = (1UL << (vm->page_shift - 3)) - 1; + + TEST_ASSERT(vm->pgtable_levels == 4, + "Mode %d does not have 4 page table levels", vm->mode); + + return (gva >> shift) & mask; +} + +static uint64_t pmd_index(struct kvm_vm *vm, vm_vaddr_t gva) +{ + unsigned int shift = (vm->page_shift - 3) + vm->page_shift; + uint64_t mask = (1UL << (vm->page_shift - 3)) - 1; + + TEST_ASSERT(vm->pgtable_levels >= 3, + "Mode %d does not have >= 3 page table levels", vm->mode); + + return (gva >> shift) & mask; +} + +static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva) +{ + uint64_t mask = (1UL << (vm->page_shift - 3)) - 1; + return (gva >> vm->page_shift) & mask; +} + +static uint64_t pte_addr(struct kvm_vm *vm, uint64_t entry) +{ + uint64_t mask = ((1UL << (vm->va_bits - vm->page_shift)) - 1) << vm->page_shift; + return entry & mask; +} + +static uint64_t ptrs_per_pgd(struct kvm_vm *vm) +{ + unsigned int shift = (vm->pgtable_levels - 1) * (vm->page_shift - 3) + vm->page_shift; + return 1 << (vm->va_bits - shift); +} + +static uint64_t ptrs_per_pte(struct kvm_vm *vm) +{ + return 1 << (vm->page_shift - 3); +} + +void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot) +{ + int rc; + + if (!vm->pgd_created) { + vm_paddr_t paddr = vm_phy_pages_alloc(vm, + page_align(vm, ptrs_per_pgd(vm) * 8) / vm->page_size, + KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot); + vm->pgd = paddr; + vm->pgd_created = true; + } +} + +void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, + uint32_t pgd_memslot, uint64_t flags) +{ + uint8_t attr_idx = flags & 7; + uint64_t *ptep; + + TEST_ASSERT((vaddr % vm->page_size) == 0, + "Virtual address not on page boundary,\n" + " vaddr: 0x%lx vm->page_size: 0x%x", vaddr, vm->page_size); + TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, + (vaddr >> vm->page_shift)), + "Invalid virtual address, vaddr: 0x%lx", vaddr); + TEST_ASSERT((paddr % vm->page_size) == 0, + "Physical address not on page boundary,\n" + " paddr: 0x%lx vm->page_size: 0x%x", paddr, vm->page_size); + TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn, + "Physical address beyond beyond maximum supported,\n" + " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", + paddr, vm->max_gfn, vm->page_size); + + ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, vaddr) * 8; + if (!*ptep) { + *ptep = vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot); + *ptep |= 3; + } + + switch (vm->pgtable_levels) { + case 4: + ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, vaddr) * 8; + if (!*ptep) { + *ptep = vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot); + *ptep |= 3; + } + /* fall through */ + case 3: + ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, vaddr) * 8; + if (!*ptep) { + *ptep = vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot); + *ptep |= 3; + } + /* fall through */ + case 2: + ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, vaddr) * 8; + break; + default: + TEST_ASSERT(false, "Page table levels must be 2, 3, or 4"); + } + + *ptep = paddr | 3; + *ptep |= (attr_idx << 2) | (1 << 10) /* Access Flag */; +} + +void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, + uint32_t pgd_memslot) +{ + uint64_t attr_idx = 4; /* NORMAL (See DEFAULT_MAIR_EL1) */ + + _virt_pg_map(vm, vaddr, paddr, pgd_memslot, attr_idx); +} + +vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) +{ + uint64_t *ptep; + + if (!vm->pgd_created) + goto unmapped_gva; + + ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, gva) * 8; + if (!ptep) + goto unmapped_gva; + + switch (vm->pgtable_levels) { + case 4: + ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, gva) * 8; + if (!ptep) + goto unmapped_gva; + /* fall through */ + case 3: + ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, gva) * 8; + if (!ptep) + goto unmapped_gva; + /* fall through */ + case 2: + ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, gva) * 8; + if (!ptep) + goto unmapped_gva; + break; + default: + TEST_ASSERT(false, "Page table levels must be 2, 3, or 4"); + } + + return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1)); + +unmapped_gva: + TEST_ASSERT(false, "No mapping for vm virtual address, " + "gva: 0x%lx", gva); +} + +static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, uint64_t page, int level) +{ +#ifdef DEBUG_VM + static const char * const type[] = { "", "pud", "pmd", "pte" }; + uint64_t pte, *ptep; + + if (level == 4) + return; + + for (pte = page; pte < page + ptrs_per_pte(vm) * 8; pte += 8) { + ptep = addr_gpa2hva(vm, pte); + if (!*ptep) + continue; + printf("%*s%s: %lx: %lx at %p\n", indent, "", type[level], pte, *ptep, ptep); + pte_dump(stream, vm, indent + 1, pte_addr(vm, *ptep), level + 1); + } +#endif +} + +void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) +{ + int level = 4 - (vm->pgtable_levels - 1); + uint64_t pgd, *ptep; + + if (!vm->pgd_created) + return; + + for (pgd = vm->pgd; pgd < vm->pgd + ptrs_per_pgd(vm) * 8; pgd += 8) { + ptep = addr_gpa2hva(vm, pgd); + if (!*ptep) + continue; + printf("%*spgd: %lx: %lx at %p\n", indent, "", pgd, *ptep, ptep); + pte_dump(stream, vm, indent + 1, pte_addr(vm, *ptep), level); + } +} + +struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages, + void *guest_code) +{ + uint64_t ptrs_per_4k_pte = 512; + uint64_t extra_pg_pages = (extra_mem_pages / ptrs_per_4k_pte) * 2; + struct kvm_vm *vm; + + vm = vm_create(VM_MODE_P52V48_4K, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR); + + kvm_vm_elf_load(vm, program_invocation_name, 0, 0); + vm_vcpu_add_default(vm, vcpuid, guest_code); + + return vm; +} + +void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code) +{ + size_t stack_size = vm->page_size == 4096 ? + DEFAULT_STACK_PGS * vm->page_size : + vm->page_size; + uint64_t stack_vaddr = vm_vaddr_alloc(vm, stack_size, + DEFAULT_ARM64_GUEST_STACK_VADDR_MIN, 0, 0); + + vm_vcpu_add(vm, vcpuid, 0, 0); + + set_reg(vm, vcpuid, ARM64_CORE_REG(sp_el1), stack_vaddr + stack_size); + set_reg(vm, vcpuid, ARM64_CORE_REG(regs.pc), (uint64_t)guest_code); +} + +void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_memslot) +{ + struct kvm_vcpu_init init; + uint64_t sctlr_el1, tcr_el1; + + memset(&init, 0, sizeof(init)); + init.target = KVM_ARM_TARGET_GENERIC_V8; + vcpu_ioctl(vm, vcpuid, KVM_ARM_VCPU_INIT, &init); + + /* + * Enable FP/ASIMD to avoid trapping when accessing Q0-Q15 + * registers, which the variable argument list macros do. + */ + set_reg(vm, vcpuid, ARM64_SYS_REG(CPACR_EL1), 3 << 20); + + get_reg(vm, vcpuid, ARM64_SYS_REG(SCTLR_EL1), &sctlr_el1); + get_reg(vm, vcpuid, ARM64_SYS_REG(TCR_EL1), &tcr_el1); + + switch (vm->mode) { + case VM_MODE_P52V48_4K: + TEST_ASSERT(false, "AArch64 does not support 4K sized pages " + "with 52-bit physical address ranges"); + case VM_MODE_P52V48_64K: + tcr_el1 |= 1ul << 14; /* TG0 = 64KB */ + tcr_el1 |= 6ul << 32; /* IPS = 52 bits */ + break; + case VM_MODE_P48V48_4K: + tcr_el1 |= 0ul << 14; /* TG0 = 4KB */ + tcr_el1 |= 5ul << 32; /* IPS = 48 bits */ + break; + case VM_MODE_P48V48_64K: + tcr_el1 |= 1ul << 14; /* TG0 = 64KB */ + tcr_el1 |= 5ul << 32; /* IPS = 48 bits */ + break; + case VM_MODE_P40V48_4K: + tcr_el1 |= 0ul << 14; /* TG0 = 4KB */ + tcr_el1 |= 2ul << 32; /* IPS = 40 bits */ + break; + case VM_MODE_P40V48_64K: + tcr_el1 |= 1ul << 14; /* TG0 = 64KB */ + tcr_el1 |= 2ul << 32; /* IPS = 40 bits */ + break; + default: + TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", vm->mode); + } + + sctlr_el1 |= (1 << 0) | (1 << 2) | (1 << 12) /* M | C | I */; + /* TCR_EL1 |= IRGN0:WBWA | ORGN0:WBWA | SH0:Inner-Shareable */; + tcr_el1 |= (1 << 8) | (1 << 10) | (3 << 12); + tcr_el1 |= (64 - vm->va_bits) /* T0SZ */; + + set_reg(vm, vcpuid, ARM64_SYS_REG(SCTLR_EL1), sctlr_el1); + set_reg(vm, vcpuid, ARM64_SYS_REG(TCR_EL1), tcr_el1); + set_reg(vm, vcpuid, ARM64_SYS_REG(MAIR_EL1), DEFAULT_MAIR_EL1); + set_reg(vm, vcpuid, ARM64_SYS_REG(TTBR0_EL1), vm->pgd); +} + +void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent) +{ + uint64_t pstate, pc; + + get_reg(vm, vcpuid, ARM64_CORE_REG(regs.pstate), &pstate); + get_reg(vm, vcpuid, ARM64_CORE_REG(regs.pc), &pc); + + fprintf(stream, "%*spstate: 0x%.16llx pc: 0x%.16llx\n", + indent, "", pstate, pc); +} diff --git a/tools/testing/selftests/kvm/lib/assert.c b/tools/testing/selftests/kvm/lib/assert.c index cd01144d27c8..6398efe67885 100644 --- a/tools/testing/selftests/kvm/lib/assert.c +++ b/tools/testing/selftests/kvm/lib/assert.c @@ -13,7 +13,7 @@ #include <execinfo.h> #include <sys/syscall.h> -#include "../../kselftest.h" +#include "kselftest.h" /* Dumps the current stack trace to stderr. */ static void __attribute__((noinline)) test_dump_stack(void); diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 6fd8c089cafc..b52cfdefecbf 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -16,10 +16,8 @@ #include <sys/stat.h> #include <linux/kernel.h> -#define KVM_DEV_PATH "/dev/kvm" - #define KVM_UTIL_PGS_PER_HUGEPG 512 -#define KVM_UTIL_MIN_PADDR 0x2000 +#define KVM_UTIL_MIN_PFN 2 /* Aligns x up to the next multiple of size. Size must be a power of 2. */ static void *align(void *x, size_t size) @@ -30,7 +28,8 @@ static void *align(void *x, size_t size) return (void *) (((size_t) x + mask) & ~mask); } -/* Capability +/* + * Capability * * Input Args: * cap - Capability @@ -86,22 +85,33 @@ int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap) return ret; } -static void vm_open(struct kvm_vm *vm, int perm) +static void vm_open(struct kvm_vm *vm, int perm, unsigned long type) { vm->kvm_fd = open(KVM_DEV_PATH, perm); if (vm->kvm_fd < 0) exit(KSFT_SKIP); - /* Create VM. */ - vm->fd = ioctl(vm->kvm_fd, KVM_CREATE_VM, NULL); + vm->fd = ioctl(vm->kvm_fd, KVM_CREATE_VM, type); TEST_ASSERT(vm->fd >= 0, "KVM_CREATE_VM ioctl failed, " "rc: %i errno: %i", vm->fd, errno); } -/* VM Create +const char * const vm_guest_mode_string[] = { + "PA-bits:52, VA-bits:48, 4K pages", + "PA-bits:52, VA-bits:48, 64K pages", + "PA-bits:48, VA-bits:48, 4K pages", + "PA-bits:48, VA-bits:48, 64K pages", + "PA-bits:40, VA-bits:48, 4K pages", + "PA-bits:40, VA-bits:48, 64K pages", +}; +_Static_assert(sizeof(vm_guest_mode_string)/sizeof(char *) == NUM_VM_MODES, + "Missing new mode strings?"); + +/* + * VM Create * * Input Args: - * mode - VM Mode (e.g. VM_MODE_FLAT48PG) + * mode - VM Mode (e.g. VM_MODE_P52V48_4K) * phy_pages - Physical memory pages * perm - permission * @@ -110,46 +120,84 @@ static void vm_open(struct kvm_vm *vm, int perm) * Return: * Pointer to opaque structure that describes the created VM. * - * Creates a VM with the mode specified by mode (e.g. VM_MODE_FLAT48PG). + * Creates a VM with the mode specified by mode (e.g. VM_MODE_P52V48_4K). * When phy_pages is non-zero, a memory region of phy_pages physical pages * is created and mapped starting at guest physical address 0. The file * descriptor to control the created VM is created with the permissions * given by perm (e.g. O_RDWR). */ -struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) +struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, + int perm, unsigned long type) { struct kvm_vm *vm; int kvm_fd; - /* Allocate memory. */ vm = calloc(1, sizeof(*vm)); - TEST_ASSERT(vm != NULL, "Insufficent Memory"); + TEST_ASSERT(vm != NULL, "Insufficient Memory"); vm->mode = mode; - vm_open(vm, perm); + vm->type = type; + vm_open(vm, perm, type); /* Setup mode specific traits. */ switch (vm->mode) { - case VM_MODE_FLAT48PG: + case VM_MODE_P52V48_4K: + vm->pgtable_levels = 4; + vm->pa_bits = 52; + vm->va_bits = 48; vm->page_size = 0x1000; vm->page_shift = 12; - - /* Limit to 48-bit canonical virtual addresses. */ - vm->vpages_valid = sparsebit_alloc(); - sparsebit_set_num(vm->vpages_valid, - 0, (1ULL << (48 - 1)) >> vm->page_shift); - sparsebit_set_num(vm->vpages_valid, - (~((1ULL << (48 - 1)) - 1)) >> vm->page_shift, - (1ULL << (48 - 1)) >> vm->page_shift); - - /* Limit physical addresses to 52-bits. */ - vm->max_gfn = ((1ULL << 52) >> vm->page_shift) - 1; break; - + case VM_MODE_P52V48_64K: + vm->pgtable_levels = 3; + vm->pa_bits = 52; + vm->va_bits = 48; + vm->page_size = 0x10000; + vm->page_shift = 16; + break; + case VM_MODE_P48V48_4K: + vm->pgtable_levels = 4; + vm->pa_bits = 48; + vm->va_bits = 48; + vm->page_size = 0x1000; + vm->page_shift = 12; + break; + case VM_MODE_P48V48_64K: + vm->pgtable_levels = 3; + vm->pa_bits = 48; + vm->va_bits = 48; + vm->page_size = 0x10000; + vm->page_shift = 16; + break; + case VM_MODE_P40V48_4K: + vm->pgtable_levels = 4; + vm->pa_bits = 40; + vm->va_bits = 48; + vm->page_size = 0x1000; + vm->page_shift = 12; + break; + case VM_MODE_P40V48_64K: + vm->pgtable_levels = 3; + vm->pa_bits = 40; + vm->va_bits = 48; + vm->page_size = 0x10000; + vm->page_shift = 16; + break; default: TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", mode); } + /* Limit to VA-bit canonical virtual addresses. */ + vm->vpages_valid = sparsebit_alloc(); + sparsebit_set_num(vm->vpages_valid, + 0, (1ULL << (vm->va_bits - 1)) >> vm->page_shift); + sparsebit_set_num(vm->vpages_valid, + (~((1ULL << (vm->va_bits - 1)) - 1)) >> vm->page_shift, + (1ULL << (vm->va_bits - 1)) >> vm->page_shift); + + /* Limit physical addresses to PA-bits. */ + vm->max_gfn = ((1ULL << vm->pa_bits) >> vm->page_shift) - 1; + /* Allocate and setup memory for guest. */ vm->vpages_mapped = sparsebit_alloc(); if (phy_pages != 0) @@ -159,7 +207,13 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) return vm; } -/* VM Restart +struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) +{ + return _vm_create(mode, phy_pages, perm, 0); +} + +/* + * VM Restart * * Input Args: * vm - VM that has been released before @@ -175,7 +229,7 @@ void kvm_vm_restart(struct kvm_vm *vmp, int perm) { struct userspace_mem_region *region; - vm_open(vmp, perm); + vm_open(vmp, perm, vmp->type); if (vmp->has_irqchip) vm_create_irqchip(vmp); @@ -186,7 +240,8 @@ void kvm_vm_restart(struct kvm_vm *vmp, int perm) " rc: %i errno: %i\n" " slot: %u flags: 0x%x\n" " guest_phys_addr: 0x%lx size: 0x%lx", - ret, errno, region->region.slot, region->region.flags, + ret, errno, region->region.slot, + region->region.flags, region->region.guest_phys_addr, region->region.memory_size); } @@ -202,7 +257,21 @@ void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log) strerror(-ret)); } -/* Userspace Memory Region Find +void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log, + uint64_t first_page, uint32_t num_pages) +{ + struct kvm_clear_dirty_log args = { .dirty_bitmap = log, .slot = slot, + .first_page = first_page, + .num_pages = num_pages }; + int ret; + + ret = ioctl(vm->fd, KVM_CLEAR_DIRTY_LOG, &args); + TEST_ASSERT(ret == 0, "%s: KVM_CLEAR_DIRTY_LOG failed: %s", + strerror(-ret)); +} + +/* + * Userspace Memory Region Find * * Input Args: * vm - Virtual Machine @@ -220,8 +289,8 @@ void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log) * of the regions is returned. Null is returned only when no overlapping * region exists. */ -static struct userspace_mem_region *userspace_mem_region_find( - struct kvm_vm *vm, uint64_t start, uint64_t end) +static struct userspace_mem_region * +userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end) { struct userspace_mem_region *region; @@ -237,7 +306,8 @@ static struct userspace_mem_region *userspace_mem_region_find( return NULL; } -/* KVM Userspace Memory Region Find +/* + * KVM Userspace Memory Region Find * * Input Args: * vm - Virtual Machine @@ -265,7 +335,8 @@ kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start, return ®ion->region; } -/* VCPU Find +/* + * VCPU Find * * Input Args: * vm - Virtual Machine @@ -280,8 +351,7 @@ kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start, * returns a pointer to it. Returns NULL if the VM doesn't contain a VCPU * for the specified vcpuid. */ -struct vcpu *vcpu_find(struct kvm_vm *vm, - uint32_t vcpuid) +struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid) { struct vcpu *vcpup; @@ -293,7 +363,8 @@ struct vcpu *vcpu_find(struct kvm_vm *vm, return NULL; } -/* VM VCPU Remove +/* + * VM VCPU Remove * * Input Args: * vm - Virtual Machine @@ -330,11 +401,9 @@ void kvm_vm_release(struct kvm_vm *vmp) { int ret; - /* Free VCPUs. */ while (vmp->vcpu_head) vm_vcpu_rm(vmp, vmp->vcpu_head->id); - /* Close file descriptor for the VM. */ ret = close(vmp->fd); TEST_ASSERT(ret == 0, "Close of vm fd failed,\n" " vmp->fd: %i rc: %i errno: %i", vmp->fd, ret, errno); @@ -344,7 +413,8 @@ void kvm_vm_release(struct kvm_vm *vmp) " vmp->kvm_fd: %i rc: %i errno: %i", vmp->kvm_fd, ret, errno); } -/* Destroys and frees the VM pointed to by vmp. +/* + * Destroys and frees the VM pointed to by vmp. */ void kvm_vm_free(struct kvm_vm *vmp) { @@ -383,7 +453,8 @@ void kvm_vm_free(struct kvm_vm *vmp) free(vmp); } -/* Memory Compare, host virtual to guest virtual +/* + * Memory Compare, host virtual to guest virtual * * Input Args: * hva - Starting host virtual address @@ -405,23 +476,25 @@ void kvm_vm_free(struct kvm_vm *vmp) * a length of len, to the guest bytes starting at the guest virtual * address given by gva. */ -int kvm_memcmp_hva_gva(void *hva, - struct kvm_vm *vm, vm_vaddr_t gva, size_t len) +int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, vm_vaddr_t gva, size_t len) { size_t amt; - /* Compare a batch of bytes until either a match is found + /* + * Compare a batch of bytes until either a match is found * or all the bytes have been compared. */ for (uintptr_t offset = 0; offset < len; offset += amt) { uintptr_t ptr1 = (uintptr_t)hva + offset; - /* Determine host address for guest virtual address + /* + * Determine host address for guest virtual address * at offset. */ uintptr_t ptr2 = (uintptr_t)addr_gva2hva(vm, gva + offset); - /* Determine amount to compare on this pass. + /* + * Determine amount to compare on this pass. * Don't allow the comparsion to cross a page boundary. */ amt = len - offset; @@ -433,7 +506,8 @@ int kvm_memcmp_hva_gva(void *hva, assert((ptr1 >> vm->page_shift) == ((ptr1 + amt - 1) >> vm->page_shift)); assert((ptr2 >> vm->page_shift) == ((ptr2 + amt - 1) >> vm->page_shift)); - /* Perform the comparison. If there is a difference + /* + * Perform the comparison. If there is a difference * return that result to the caller, otherwise need * to continue on looking for a mismatch. */ @@ -442,109 +516,15 @@ int kvm_memcmp_hva_gva(void *hva, return ret; } - /* No mismatch found. Let the caller know the two memory + /* + * No mismatch found. Let the caller know the two memory * areas are equal. */ return 0; } -/* Allocate an instance of struct kvm_cpuid2 - * - * Input Args: None - * - * Output Args: None - * - * Return: A pointer to the allocated struct. The caller is responsible - * for freeing this struct. - * - * Since kvm_cpuid2 uses a 0-length array to allow a the size of the - * array to be decided at allocation time, allocation is slightly - * complicated. This function uses a reasonable default length for - * the array and performs the appropriate allocation. - */ -static struct kvm_cpuid2 *allocate_kvm_cpuid2(void) -{ - struct kvm_cpuid2 *cpuid; - int nent = 100; - size_t size; - - size = sizeof(*cpuid); - size += nent * sizeof(struct kvm_cpuid_entry2); - cpuid = malloc(size); - if (!cpuid) { - perror("malloc"); - abort(); - } - - cpuid->nent = nent; - - return cpuid; -} - -/* KVM Supported CPUID Get - * - * Input Args: None - * - * Output Args: - * - * Return: The supported KVM CPUID - * - * Get the guest CPUID supported by KVM. - */ -struct kvm_cpuid2 *kvm_get_supported_cpuid(void) -{ - static struct kvm_cpuid2 *cpuid; - int ret; - int kvm_fd; - - if (cpuid) - return cpuid; - - cpuid = allocate_kvm_cpuid2(); - kvm_fd = open(KVM_DEV_PATH, O_RDONLY); - if (kvm_fd < 0) - exit(KSFT_SKIP); - - ret = ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, cpuid); - TEST_ASSERT(ret == 0, "KVM_GET_SUPPORTED_CPUID failed %d %d\n", - ret, errno); - - close(kvm_fd); - return cpuid; -} - -/* Locate a cpuid entry. - * - * Input Args: - * cpuid: The cpuid. - * function: The function of the cpuid entry to find. - * - * Output Args: None - * - * Return: A pointer to the cpuid entry. Never returns NULL. - */ -struct kvm_cpuid_entry2 * -kvm_get_supported_cpuid_index(uint32_t function, uint32_t index) -{ - struct kvm_cpuid2 *cpuid; - struct kvm_cpuid_entry2 *entry = NULL; - int i; - - cpuid = kvm_get_supported_cpuid(); - for (i = 0; i < cpuid->nent; i++) { - if (cpuid->entries[i].function == function && - cpuid->entries[i].index == index) { - entry = &cpuid->entries[i]; - break; - } - } - - TEST_ASSERT(entry, "Guest CPUID entry not found: (EAX=%x, ECX=%x).", - function, index); - return entry; -} - -/* VM Userspace Memory Region Add +/* + * VM Userspace Memory Region Add * * Input Args: * vm - Virtual Machine @@ -586,11 +566,12 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, " vm->max_gfn: 0x%lx vm->page_size: 0x%x", guest_paddr, npages, vm->max_gfn, vm->page_size); - /* Confirm a mem region with an overlapping address doesn't + /* + * Confirm a mem region with an overlapping address doesn't * already exist. */ region = (struct userspace_mem_region *) userspace_mem_region_find( - vm, guest_paddr, guest_paddr + npages * vm->page_size); + vm, guest_paddr, (guest_paddr + npages * vm->page_size) - 1); if (region != NULL) TEST_ASSERT(false, "overlapping userspace_mem_region already " "exists\n" @@ -606,15 +587,10 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, region = region->next) { if (region->region.slot == slot) break; - if ((guest_paddr <= (region->region.guest_phys_addr - + region->region.memory_size)) - && ((guest_paddr + npages * vm->page_size) - >= region->region.guest_phys_addr)) - break; } if (region != NULL) TEST_ASSERT(false, "A mem region with the requested slot " - "or overlapping physical memory range already exists.\n" + "already exists.\n" " requested slot: %u paddr: 0x%lx npages: 0x%lx\n" " existing slot: %u paddr: 0x%lx size: 0x%lx", slot, guest_paddr, npages, @@ -677,7 +653,8 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, vm->userspace_mem_region_head = region; } -/* Memslot to region +/* + * Memslot to region * * Input Args: * vm - Virtual Machine @@ -691,8 +668,8 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, * on error (e.g. currently no memory region using memslot as a KVM * memory slot ID). */ -static struct userspace_mem_region *memslot2region(struct kvm_vm *vm, - uint32_t memslot) +static struct userspace_mem_region * +memslot2region(struct kvm_vm *vm, uint32_t memslot) { struct userspace_mem_region *region; @@ -712,7 +689,8 @@ static struct userspace_mem_region *memslot2region(struct kvm_vm *vm, return region; } -/* VM Memory Region Flags Set +/* + * VM Memory Region Flags Set * * Input Args: * vm - Virtual Machine @@ -730,7 +708,6 @@ void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags) int ret; struct userspace_mem_region *region; - /* Locate memory region. */ region = memslot2region(vm, slot); region->region.flags = flags; @@ -742,7 +719,8 @@ void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags) ret, errno, slot, flags); } -/* VCPU mmap Size +/* + * VCPU mmap Size * * Input Args: None * @@ -772,7 +750,8 @@ static int vcpu_mmap_sz(void) return ret; } -/* VM VCPU Add +/* + * VM VCPU Add * * Input Args: * vm - Virtual Machine @@ -785,7 +764,8 @@ static int vcpu_mmap_sz(void) * Creates and adds to the VM specified by vm and virtual CPU with * the ID given by vcpuid. */ -void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid, int pgd_memslot, int gdt_memslot) +void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid, int pgd_memslot, + int gdt_memslot) { struct vcpu *vcpu; @@ -823,7 +803,8 @@ void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid, int pgd_memslot, int gdt_me vcpu_setup(vm, vcpuid, pgd_memslot, gdt_memslot); } -/* VM Virtual Address Unused Gap +/* + * VM Virtual Address Unused Gap * * Input Args: * vm - Virtual Machine @@ -843,14 +824,14 @@ void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid, int pgd_memslot, int gdt_me * sz unallocated bytes >= vaddr_min is available. */ static vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, - vm_vaddr_t vaddr_min) + vm_vaddr_t vaddr_min) { uint64_t pages = (sz + vm->page_size - 1) >> vm->page_shift; /* Determine lowest permitted virtual page index. */ uint64_t pgidx_start = (vaddr_min + vm->page_size - 1) >> vm->page_shift; if ((pgidx_start * vm->page_size) < vaddr_min) - goto no_va_found; + goto no_va_found; /* Loop over section with enough valid virtual page indexes. */ if (!sparsebit_is_set_num(vm->vpages_valid, @@ -909,7 +890,8 @@ va_found: return pgidx_start * vm->page_size; } -/* VM Virtual Address Allocate +/* + * VM Virtual Address Allocate * * Input Args: * vm - Virtual Machine @@ -930,13 +912,14 @@ va_found: * a page. */ vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min, - uint32_t data_memslot, uint32_t pgd_memslot) + uint32_t data_memslot, uint32_t pgd_memslot) { uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0); virt_pgd_alloc(vm, pgd_memslot); - /* Find an unused range of virtual page addresses of at least + /* + * Find an unused range of virtual page addresses of at least * pages in length. */ vm_vaddr_t vaddr_start = vm_vaddr_unused_gap(vm, sz, vaddr_min); @@ -946,7 +929,8 @@ vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min, pages--, vaddr += vm->page_size) { vm_paddr_t paddr; - paddr = vm_phy_page_alloc(vm, KVM_UTIL_MIN_PADDR, data_memslot); + paddr = vm_phy_page_alloc(vm, + KVM_UTIL_MIN_PFN * vm->page_size, data_memslot); virt_pg_map(vm, vaddr, paddr, pgd_memslot); @@ -990,7 +974,8 @@ void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, } } -/* Address VM Physical to Host Virtual +/* + * Address VM Physical to Host Virtual * * Input Args: * vm - Virtual Machine @@ -1022,7 +1007,8 @@ void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa) return NULL; } -/* Address Host Virtual to VM Physical +/* + * Address Host Virtual to VM Physical * * Input Args: * vm - Virtual Machine @@ -1056,7 +1042,8 @@ vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva) return -1; } -/* VM Create IRQ Chip +/* + * VM Create IRQ Chip * * Input Args: * vm - Virtual Machine @@ -1078,7 +1065,8 @@ void vm_create_irqchip(struct kvm_vm *vm) vm->has_irqchip = true; } -/* VM VCPU State +/* + * VM VCPU State * * Input Args: * vm - Virtual Machine @@ -1100,7 +1088,8 @@ struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid) return vcpu->state; } -/* VM VCPU Run +/* + * VM VCPU Run * * Input Args: * vm - Virtual Machine @@ -1126,13 +1115,14 @@ int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid) int rc; TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); - do { + do { rc = ioctl(vcpu->fd, KVM_RUN, NULL); } while (rc == -1 && errno == EINTR); return rc; } -/* VM VCPU Set MP State +/* + * VM VCPU Set MP State * * Input Args: * vm - Virtual Machine @@ -1147,7 +1137,7 @@ int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid) * by mp_state. */ void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid, - struct kvm_mp_state *mp_state) + struct kvm_mp_state *mp_state) { struct vcpu *vcpu = vcpu_find(vm, vcpuid); int ret; @@ -1159,7 +1149,8 @@ void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid, "rc: %i errno: %i", ret, errno); } -/* VM VCPU Regs Get +/* + * VM VCPU Regs Get * * Input Args: * vm - Virtual Machine @@ -1173,21 +1164,20 @@ void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid, * Obtains the current register state for the VCPU specified by vcpuid * and stores it at the location given by regs. */ -void vcpu_regs_get(struct kvm_vm *vm, - uint32_t vcpuid, struct kvm_regs *regs) +void vcpu_regs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs) { struct vcpu *vcpu = vcpu_find(vm, vcpuid); int ret; TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); - /* Get the regs. */ ret = ioctl(vcpu->fd, KVM_GET_REGS, regs); TEST_ASSERT(ret == 0, "KVM_GET_REGS failed, rc: %i errno: %i", ret, errno); } -/* VM VCPU Regs Set +/* + * VM VCPU Regs Set * * Input Args: * vm - Virtual Machine @@ -1201,165 +1191,46 @@ void vcpu_regs_get(struct kvm_vm *vm, * Sets the regs of the VCPU specified by vcpuid to the values * given by regs. */ -void vcpu_regs_set(struct kvm_vm *vm, - uint32_t vcpuid, struct kvm_regs *regs) +void vcpu_regs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs) { struct vcpu *vcpu = vcpu_find(vm, vcpuid); int ret; TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); - /* Set the regs. */ ret = ioctl(vcpu->fd, KVM_SET_REGS, regs); TEST_ASSERT(ret == 0, "KVM_SET_REGS failed, rc: %i errno: %i", ret, errno); } void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid, - struct kvm_vcpu_events *events) + struct kvm_vcpu_events *events) { struct vcpu *vcpu = vcpu_find(vm, vcpuid); int ret; TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); - /* Get the regs. */ ret = ioctl(vcpu->fd, KVM_GET_VCPU_EVENTS, events); TEST_ASSERT(ret == 0, "KVM_GET_VCPU_EVENTS, failed, rc: %i errno: %i", ret, errno); } void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid, - struct kvm_vcpu_events *events) + struct kvm_vcpu_events *events) { struct vcpu *vcpu = vcpu_find(vm, vcpuid); int ret; TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); - /* Set the regs. */ ret = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, events); TEST_ASSERT(ret == 0, "KVM_SET_VCPU_EVENTS, failed, rc: %i errno: %i", ret, errno); } -/* VCPU Get MSR - * - * Input Args: - * vm - Virtual Machine - * vcpuid - VCPU ID - * msr_index - Index of MSR - * - * Output Args: None - * - * Return: On success, value of the MSR. On failure a TEST_ASSERT is produced. - * - * Get value of MSR for VCPU. - */ -uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index) -{ - struct vcpu *vcpu = vcpu_find(vm, vcpuid); - struct { - struct kvm_msrs header; - struct kvm_msr_entry entry; - } buffer = {}; - int r; - - TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); - buffer.header.nmsrs = 1; - buffer.entry.index = msr_index; - r = ioctl(vcpu->fd, KVM_GET_MSRS, &buffer.header); - TEST_ASSERT(r == 1, "KVM_GET_MSRS IOCTL failed,\n" - " rc: %i errno: %i", r, errno); - - return buffer.entry.data; -} - -/* VCPU Set MSR - * - * Input Args: - * vm - Virtual Machine - * vcpuid - VCPU ID - * msr_index - Index of MSR - * msr_value - New value of MSR - * - * Output Args: None - * - * Return: On success, nothing. On failure a TEST_ASSERT is produced. - * - * Set value of MSR for VCPU. - */ -void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index, - uint64_t msr_value) -{ - struct vcpu *vcpu = vcpu_find(vm, vcpuid); - struct { - struct kvm_msrs header; - struct kvm_msr_entry entry; - } buffer = {}; - int r; - - TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); - memset(&buffer, 0, sizeof(buffer)); - buffer.header.nmsrs = 1; - buffer.entry.index = msr_index; - buffer.entry.data = msr_value; - r = ioctl(vcpu->fd, KVM_SET_MSRS, &buffer.header); - TEST_ASSERT(r == 1, "KVM_SET_MSRS IOCTL failed,\n" - " rc: %i errno: %i", r, errno); -} - -/* VM VCPU Args Set - * - * Input Args: - * vm - Virtual Machine - * vcpuid - VCPU ID - * num - number of arguments - * ... - arguments, each of type uint64_t - * - * Output Args: None - * - * Return: None - * - * Sets the first num function input arguments to the values - * given as variable args. Each of the variable args is expected to - * be of type uint64_t. - */ -void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...) -{ - va_list ap; - struct kvm_regs regs; - - TEST_ASSERT(num >= 1 && num <= 6, "Unsupported number of args,\n" - " num: %u\n", - num); - - va_start(ap, num); - vcpu_regs_get(vm, vcpuid, ®s); - - if (num >= 1) - regs.rdi = va_arg(ap, uint64_t); - - if (num >= 2) - regs.rsi = va_arg(ap, uint64_t); - - if (num >= 3) - regs.rdx = va_arg(ap, uint64_t); - - if (num >= 4) - regs.rcx = va_arg(ap, uint64_t); - - if (num >= 5) - regs.r8 = va_arg(ap, uint64_t); - - if (num >= 6) - regs.r9 = va_arg(ap, uint64_t); - - vcpu_regs_set(vm, vcpuid, ®s); - va_end(ap); -} - -/* VM VCPU System Regs Get +/* + * VM VCPU System Regs Get * * Input Args: * vm - Virtual Machine @@ -1373,22 +1244,20 @@ void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...) * Obtains the current system register state for the VCPU specified by * vcpuid and stores it at the location given by sregs. */ -void vcpu_sregs_get(struct kvm_vm *vm, - uint32_t vcpuid, struct kvm_sregs *sregs) +void vcpu_sregs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs) { struct vcpu *vcpu = vcpu_find(vm, vcpuid); int ret; TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); - /* Get the regs. */ - /* Get the regs. */ ret = ioctl(vcpu->fd, KVM_GET_SREGS, sregs); TEST_ASSERT(ret == 0, "KVM_GET_SREGS failed, rc: %i errno: %i", ret, errno); } -/* VM VCPU System Regs Set +/* + * VM VCPU System Regs Set * * Input Args: * vm - Virtual Machine @@ -1402,27 +1271,25 @@ void vcpu_sregs_get(struct kvm_vm *vm, * Sets the system regs of the VCPU specified by vcpuid to the values * given by sregs. */ -void vcpu_sregs_set(struct kvm_vm *vm, - uint32_t vcpuid, struct kvm_sregs *sregs) +void vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs) { int ret = _vcpu_sregs_set(vm, vcpuid, sregs); TEST_ASSERT(ret == 0, "KVM_RUN IOCTL failed, " "rc: %i errno: %i", ret, errno); } -int _vcpu_sregs_set(struct kvm_vm *vm, - uint32_t vcpuid, struct kvm_sregs *sregs) +int _vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs) { struct vcpu *vcpu = vcpu_find(vm, vcpuid); int ret; TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); - /* Get the regs. */ return ioctl(vcpu->fd, KVM_SET_SREGS, sregs); } -/* VCPU Ioctl +/* + * VCPU Ioctl * * Input Args: * vm - Virtual Machine @@ -1434,8 +1301,18 @@ int _vcpu_sregs_set(struct kvm_vm *vm, * * Issues an arbitrary ioctl on a VCPU fd. */ -void vcpu_ioctl(struct kvm_vm *vm, - uint32_t vcpuid, unsigned long cmd, void *arg) +void vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, + unsigned long cmd, void *arg) +{ + int ret; + + ret = _vcpu_ioctl(vm, vcpuid, cmd, arg); + TEST_ASSERT(ret == 0, "vcpu ioctl %lu failed, rc: %i errno: %i (%s)", + cmd, ret, errno, strerror(errno)); +} + +int _vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, + unsigned long cmd, void *arg) { struct vcpu *vcpu = vcpu_find(vm, vcpuid); int ret; @@ -1443,11 +1320,12 @@ void vcpu_ioctl(struct kvm_vm *vm, TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); ret = ioctl(vcpu->fd, cmd, arg); - TEST_ASSERT(ret == 0, "vcpu ioctl %lu failed, rc: %i errno: %i (%s)", - cmd, ret, errno, strerror(errno)); + + return ret; } -/* VM Ioctl +/* + * VM Ioctl * * Input Args: * vm - Virtual Machine @@ -1467,7 +1345,8 @@ void vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg) cmd, ret, errno, strerror(errno)); } -/* VM Dump +/* + * VM Dump * * Input Args: * vm - Virtual Machine @@ -1514,38 +1393,6 @@ void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) vcpu_dump(stream, vm, vcpu->id, indent + 2); } -/* VM VCPU Dump - * - * Input Args: - * vm - Virtual Machine - * vcpuid - VCPU ID - * indent - Left margin indent amount - * - * Output Args: - * stream - Output FILE stream - * - * Return: None - * - * Dumps the current state of the VCPU specified by vcpuid, within the VM - * given by vm, to the FILE stream given by stream. - */ -void vcpu_dump(FILE *stream, struct kvm_vm *vm, - uint32_t vcpuid, uint8_t indent) -{ - struct kvm_regs regs; - struct kvm_sregs sregs; - - fprintf(stream, "%*scpuid: %u\n", indent, "", vcpuid); - - fprintf(stream, "%*sregs:\n", indent + 2, ""); - vcpu_regs_get(vm, vcpuid, ®s); - regs_dump(stream, ®s, indent + 4); - - fprintf(stream, "%*ssregs:\n", indent + 2, ""); - vcpu_sregs_get(vm, vcpuid, &sregs); - sregs_dump(stream, &sregs, indent + 4); -} - /* Known KVM exit reasons */ static struct exit_reason { unsigned int reason; @@ -1576,7 +1423,8 @@ static struct exit_reason { #endif }; -/* Exit Reason String +/* + * Exit Reason String * * Input Args: * exit_reason - Exit reason @@ -1602,10 +1450,12 @@ const char *exit_reason_str(unsigned int exit_reason) return "Unknown"; } -/* Physical Page Allocate +/* + * Physical Contiguous Page Allocator * * Input Args: * vm - Virtual Machine + * num - number of pages * paddr_min - Physical address minimum * memslot - Memory region to allocate page from * @@ -1614,47 +1464,59 @@ const char *exit_reason_str(unsigned int exit_reason) * Return: * Starting physical address * - * Within the VM specified by vm, locates an available physical page - * at or above paddr_min. If found, the page is marked as in use - * and its address is returned. A TEST_ASSERT failure occurs if no - * page is available at or above paddr_min. + * Within the VM specified by vm, locates a range of available physical + * pages at or above paddr_min. If found, the pages are marked as in use + * and their base address is returned. A TEST_ASSERT failure occurs if + * not enough pages are available at or above paddr_min. */ -vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, - vm_paddr_t paddr_min, uint32_t memslot) +vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, + vm_paddr_t paddr_min, uint32_t memslot) { struct userspace_mem_region *region; - sparsebit_idx_t pg; + sparsebit_idx_t pg, base; + + TEST_ASSERT(num > 0, "Must allocate at least one page"); TEST_ASSERT((paddr_min % vm->page_size) == 0, "Min physical address " "not divisible by page size.\n" " paddr_min: 0x%lx page_size: 0x%x", paddr_min, vm->page_size); - /* Locate memory region. */ region = memslot2region(vm, memslot); + base = pg = paddr_min >> vm->page_shift; - /* Locate next available physical page at or above paddr_min. */ - pg = paddr_min >> vm->page_shift; - - if (!sparsebit_is_set(region->unused_phy_pages, pg)) { - pg = sparsebit_next_set(region->unused_phy_pages, pg); - if (pg == 0) { - fprintf(stderr, "No guest physical page available, " - "paddr_min: 0x%lx page_size: 0x%x memslot: %u", - paddr_min, vm->page_size, memslot); - fputs("---- vm dump ----\n", stderr); - vm_dump(stderr, vm, 2); - abort(); + do { + for (; pg < base + num; ++pg) { + if (!sparsebit_is_set(region->unused_phy_pages, pg)) { + base = pg = sparsebit_next_set(region->unused_phy_pages, pg); + break; + } } + } while (pg && pg != base + num); + + if (pg == 0) { + fprintf(stderr, "No guest physical page available, " + "paddr_min: 0x%lx page_size: 0x%x memslot: %u\n", + paddr_min, vm->page_size, memslot); + fputs("---- vm dump ----\n", stderr); + vm_dump(stderr, vm, 2); + abort(); } - /* Specify page as in use and return its address. */ - sparsebit_clear(region->unused_phy_pages, pg); + for (pg = base; pg < base + num; ++pg) + sparsebit_clear(region->unused_phy_pages, pg); - return pg * vm->page_size; + return base * vm->page_size; +} + +vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min, + uint32_t memslot) +{ + return vm_phy_pages_alloc(vm, 1, paddr_min, memslot); } -/* Address Guest Virtual to Host Virtual +/* + * Address Guest Virtual to Host Virtual * * Input Args: * vm - Virtual Machine @@ -1669,17 +1531,3 @@ void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva) { return addr_gpa2hva(vm, addr_gva2gpa(vm, gva)); } - -void guest_args_read(struct kvm_vm *vm, uint32_t vcpu_id, - struct guest_args *args) -{ - struct kvm_run *run = vcpu_state(vm, vcpu_id); - struct kvm_regs regs; - - memset(®s, 0, sizeof(regs)); - vcpu_regs_get(vm, vcpu_id, ®s); - - args->port = run->io.port; - args->arg0 = regs.rdi; - args->arg1 = regs.rsi; -} diff --git a/tools/testing/selftests/kvm/lib/kvm_util_internal.h b/tools/testing/selftests/kvm/lib/kvm_util_internal.h index 542ed606b338..4595e42c6e29 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util_internal.h +++ b/tools/testing/selftests/kvm/lib/kvm_util_internal.h @@ -1,28 +1,29 @@ /* - * tools/testing/selftests/kvm/lib/kvm_util.c + * tools/testing/selftests/kvm/lib/kvm_util_internal.h * * Copyright (C) 2018, Google LLC. * * This work is licensed under the terms of the GNU GPL, version 2. */ -#ifndef KVM_UTIL_INTERNAL_H -#define KVM_UTIL_INTERNAL_H 1 +#ifndef SELFTEST_KVM_UTIL_INTERNAL_H +#define SELFTEST_KVM_UTIL_INTERNAL_H #include "sparsebit.h" +#define KVM_DEV_PATH "/dev/kvm" + #ifndef BITS_PER_BYTE -#define BITS_PER_BYTE 8 +#define BITS_PER_BYTE 8 #endif #ifndef BITS_PER_LONG -#define BITS_PER_LONG (BITS_PER_BYTE * sizeof(long)) +#define BITS_PER_LONG (BITS_PER_BYTE * sizeof(long)) #endif #define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) -#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_LONG) +#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_LONG) -/* Concrete definition of struct kvm_vm. */ struct userspace_mem_region { struct userspace_mem_region *next, *prev; struct kvm_userspace_memory_region region; @@ -43,16 +44,19 @@ struct vcpu { struct kvm_vm { int mode; + unsigned long type; int kvm_fd; int fd; + unsigned int pgtable_levels; unsigned int page_size; unsigned int page_shift; + unsigned int pa_bits; + unsigned int va_bits; uint64_t max_gfn; struct vcpu *vcpu_head; struct userspace_mem_region *userspace_mem_region_head; struct sparsebit *vpages_valid; struct sparsebit *vpages_mapped; - bool has_irqchip; bool pgd_created; vm_paddr_t pgd; @@ -60,13 +64,11 @@ struct kvm_vm { vm_vaddr_t tss; }; -struct vcpu *vcpu_find(struct kvm_vm *vm, - uint32_t vcpuid); -void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_memslot); +struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid); +void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, + int gdt_memslot); void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent); -void regs_dump(FILE *stream, struct kvm_regs *regs, - uint8_t indent); -void sregs_dump(FILE *stream, struct kvm_sregs *sregs, - uint8_t indent); +void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent); +void sregs_dump(FILE *stream, struct kvm_sregs *sregs, uint8_t indent); -#endif +#endif /* SELFTEST_KVM_UTIL_INTERNAL_H */ diff --git a/tools/testing/selftests/kvm/lib/ucall.c b/tools/testing/selftests/kvm/lib/ucall.c new file mode 100644 index 000000000000..a2ab38be2f47 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/ucall.c @@ -0,0 +1,150 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ucall support. A ucall is a "hypercall to userspace". + * + * Copyright (C) 2018, Red Hat, Inc. + */ +#include "kvm_util.h" +#include "kvm_util_internal.h" + +#define UCALL_PIO_PORT ((uint16_t)0x1000) + +static ucall_type_t ucall_type; +static vm_vaddr_t *ucall_exit_mmio_addr; + +static bool ucall_mmio_init(struct kvm_vm *vm, vm_paddr_t gpa) +{ + if (kvm_userspace_memory_region_find(vm, gpa, gpa + 1)) + return false; + + virt_pg_map(vm, gpa, gpa, 0); + + ucall_exit_mmio_addr = (vm_vaddr_t *)gpa; + sync_global_to_guest(vm, ucall_exit_mmio_addr); + + return true; +} + +void ucall_init(struct kvm_vm *vm, ucall_type_t type, void *arg) +{ + ucall_type = type; + sync_global_to_guest(vm, ucall_type); + + if (type == UCALL_PIO) + return; + + if (type == UCALL_MMIO) { + vm_paddr_t gpa, start, end, step, offset; + unsigned bits; + bool ret; + + if (arg) { + gpa = (vm_paddr_t)arg; + ret = ucall_mmio_init(vm, gpa); + TEST_ASSERT(ret, "Can't set ucall mmio address to %lx", gpa); + return; + } + + /* + * Find an address within the allowed physical and virtual address + * spaces, that does _not_ have a KVM memory region associated with + * it. Identity mapping an address like this allows the guest to + * access it, but as KVM doesn't know what to do with it, it + * will assume it's something userspace handles and exit with + * KVM_EXIT_MMIO. Well, at least that's how it works for AArch64. + * Here we start with a guess that the addresses around 5/8th + * of the allowed space are unmapped and then work both down and + * up from there in 1/16th allowed space sized steps. + * + * Note, we need to use VA-bits - 1 when calculating the allowed + * virtual address space for an identity mapping because the upper + * half of the virtual address space is the two's complement of the + * lower and won't match physical addresses. + */ + bits = vm->va_bits - 1; + bits = vm->pa_bits < bits ? vm->pa_bits : bits; + end = 1ul << bits; + start = end * 5 / 8; + step = end / 16; + for (offset = 0; offset < end - start; offset += step) { + if (ucall_mmio_init(vm, start - offset)) + return; + if (ucall_mmio_init(vm, start + offset)) + return; + } + TEST_ASSERT(false, "Can't find a ucall mmio address"); + } +} + +void ucall_uninit(struct kvm_vm *vm) +{ + ucall_type = 0; + sync_global_to_guest(vm, ucall_type); + ucall_exit_mmio_addr = 0; + sync_global_to_guest(vm, ucall_exit_mmio_addr); +} + +static void ucall_pio_exit(struct ucall *uc) +{ +#ifdef __x86_64__ + asm volatile("in %[port], %%al" + : : [port] "d" (UCALL_PIO_PORT), "D" (uc) : "rax"); +#endif +} + +static void ucall_mmio_exit(struct ucall *uc) +{ + *ucall_exit_mmio_addr = (vm_vaddr_t)uc; +} + +void ucall(uint64_t cmd, int nargs, ...) +{ + struct ucall uc = { + .cmd = cmd, + }; + va_list va; + int i; + + nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS; + + va_start(va, nargs); + for (i = 0; i < nargs; ++i) + uc.args[i] = va_arg(va, uint64_t); + va_end(va); + + switch (ucall_type) { + case UCALL_PIO: + ucall_pio_exit(&uc); + break; + case UCALL_MMIO: + ucall_mmio_exit(&uc); + break; + }; +} + +uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc) +{ + struct kvm_run *run = vcpu_state(vm, vcpu_id); + + memset(uc, 0, sizeof(*uc)); + +#ifdef __x86_64__ + if (ucall_type == UCALL_PIO && run->exit_reason == KVM_EXIT_IO && + run->io.port == UCALL_PIO_PORT) { + struct kvm_regs regs; + vcpu_regs_get(vm, vcpu_id, ®s); + memcpy(uc, addr_gva2hva(vm, (vm_vaddr_t)regs.rdi), sizeof(*uc)); + return uc->cmd; + } +#endif + if (ucall_type == UCALL_MMIO && run->exit_reason == KVM_EXIT_MMIO && + run->mmio.phys_addr == (uint64_t)ucall_exit_mmio_addr) { + vm_vaddr_t gva; + TEST_ASSERT(run->mmio.is_write && run->mmio.len == 8, + "Unexpected ucall exit mmio address access"); + gva = *(vm_vaddr_t *)run->mmio.data; + memcpy(uc, addr_gva2hva(vm, gva), sizeof(*uc)); + } + + return uc->cmd; +} diff --git a/tools/testing/selftests/kvm/lib/x86.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index a3122f1949a8..f28127f4a3af 100644 --- a/tools/testing/selftests/kvm/lib/x86.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -1,5 +1,5 @@ /* - * tools/testing/selftests/kvm/lib/x86.c + * tools/testing/selftests/kvm/lib/x86_64/processor.c * * Copyright (C) 2018, Google LLC. * @@ -10,8 +10,8 @@ #include "test_util.h" #include "kvm_util.h" -#include "kvm_util_internal.h" -#include "x86.h" +#include "../kvm_util_internal.h" +#include "processor.h" /* Minimum physical address used for virtual translation tables. */ #define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000 @@ -231,7 +231,7 @@ void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot) { int rc; - TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use " + TEST_ASSERT(vm->mode == VM_MODE_P52V48_4K, "Attempt to use " "unknown or unsupported guest mode, mode: 0x%x", vm->mode); /* If needed, create page map l4 table. */ @@ -264,7 +264,7 @@ void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, uint16_t index[4]; struct pageMapL4Entry *pml4e; - TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use " + TEST_ASSERT(vm->mode == VM_MODE_P52V48_4K, "Attempt to use " "unknown or unsupported guest mode, mode: 0x%x", vm->mode); TEST_ASSERT((vaddr % vm->page_size) == 0, @@ -551,7 +551,7 @@ vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) struct pageTableEntry *pte; void *hva; - TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use " + TEST_ASSERT(vm->mode == VM_MODE_P52V48_4K, "Attempt to use " "unknown or unsupported guest mode, mode: 0x%x", vm->mode); index[0] = (gva >> 12) & 0x1ffu; @@ -624,9 +624,9 @@ void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_memslot) kvm_setup_gdt(vm, &sregs.gdt, gdt_memslot, pgd_memslot); switch (vm->mode) { - case VM_MODE_FLAT48PG: + case VM_MODE_P52V48_4K: sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG; - sregs.cr4 |= X86_CR4_PAE; + sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR; sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX); kvm_seg_set_unusable(&sregs.ldt); @@ -672,6 +672,102 @@ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code) vcpu_set_mp_state(vm, vcpuid, &mp_state); } +/* Allocate an instance of struct kvm_cpuid2 + * + * Input Args: None + * + * Output Args: None + * + * Return: A pointer to the allocated struct. The caller is responsible + * for freeing this struct. + * + * Since kvm_cpuid2 uses a 0-length array to allow a the size of the + * array to be decided at allocation time, allocation is slightly + * complicated. This function uses a reasonable default length for + * the array and performs the appropriate allocation. + */ +static struct kvm_cpuid2 *allocate_kvm_cpuid2(void) +{ + struct kvm_cpuid2 *cpuid; + int nent = 100; + size_t size; + + size = sizeof(*cpuid); + size += nent * sizeof(struct kvm_cpuid_entry2); + cpuid = malloc(size); + if (!cpuid) { + perror("malloc"); + abort(); + } + + cpuid->nent = nent; + + return cpuid; +} + +/* KVM Supported CPUID Get + * + * Input Args: None + * + * Output Args: + * + * Return: The supported KVM CPUID + * + * Get the guest CPUID supported by KVM. + */ +struct kvm_cpuid2 *kvm_get_supported_cpuid(void) +{ + static struct kvm_cpuid2 *cpuid; + int ret; + int kvm_fd; + + if (cpuid) + return cpuid; + + cpuid = allocate_kvm_cpuid2(); + kvm_fd = open(KVM_DEV_PATH, O_RDONLY); + if (kvm_fd < 0) + exit(KSFT_SKIP); + + ret = ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, cpuid); + TEST_ASSERT(ret == 0, "KVM_GET_SUPPORTED_CPUID failed %d %d\n", + ret, errno); + + close(kvm_fd); + return cpuid; +} + +/* Locate a cpuid entry. + * + * Input Args: + * cpuid: The cpuid. + * function: The function of the cpuid entry to find. + * + * Output Args: None + * + * Return: A pointer to the cpuid entry. Never returns NULL. + */ +struct kvm_cpuid_entry2 * +kvm_get_supported_cpuid_index(uint32_t function, uint32_t index) +{ + struct kvm_cpuid2 *cpuid; + struct kvm_cpuid_entry2 *entry = NULL; + int i; + + cpuid = kvm_get_supported_cpuid(); + for (i = 0; i < cpuid->nent; i++) { + if (cpuid->entries[i].function == function && + cpuid->entries[i].index == index) { + entry = &cpuid->entries[i]; + break; + } + } + + TEST_ASSERT(entry, "Guest CPUID entry not found: (EAX=%x, ECX=%x).", + function, index); + return entry; +} + /* VM VCPU CPUID Set * * Input Args: @@ -698,6 +794,7 @@ void vcpu_set_cpuid(struct kvm_vm *vm, rc, errno); } + /* Create a VM with reasonable defaults * * Input Args: @@ -726,7 +823,7 @@ struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages, uint64_t extra_pg_pages = extra_mem_pages / 512 * 2; /* Create VM */ - vm = vm_create(VM_MODE_FLAT48PG, + vm = vm_create(VM_MODE_P52V48_4K, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR); @@ -742,6 +839,154 @@ struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages, return vm; } +/* VCPU Get MSR + * + * Input Args: + * vm - Virtual Machine + * vcpuid - VCPU ID + * msr_index - Index of MSR + * + * Output Args: None + * + * Return: On success, value of the MSR. On failure a TEST_ASSERT is produced. + * + * Get value of MSR for VCPU. + */ +uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index) +{ + struct vcpu *vcpu = vcpu_find(vm, vcpuid); + struct { + struct kvm_msrs header; + struct kvm_msr_entry entry; + } buffer = {}; + int r; + + TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); + buffer.header.nmsrs = 1; + buffer.entry.index = msr_index; + r = ioctl(vcpu->fd, KVM_GET_MSRS, &buffer.header); + TEST_ASSERT(r == 1, "KVM_GET_MSRS IOCTL failed,\n" + " rc: %i errno: %i", r, errno); + + return buffer.entry.data; +} + +/* VCPU Set MSR + * + * Input Args: + * vm - Virtual Machine + * vcpuid - VCPU ID + * msr_index - Index of MSR + * msr_value - New value of MSR + * + * Output Args: None + * + * Return: On success, nothing. On failure a TEST_ASSERT is produced. + * + * Set value of MSR for VCPU. + */ +void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index, + uint64_t msr_value) +{ + struct vcpu *vcpu = vcpu_find(vm, vcpuid); + struct { + struct kvm_msrs header; + struct kvm_msr_entry entry; + } buffer = {}; + int r; + + TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); + memset(&buffer, 0, sizeof(buffer)); + buffer.header.nmsrs = 1; + buffer.entry.index = msr_index; + buffer.entry.data = msr_value; + r = ioctl(vcpu->fd, KVM_SET_MSRS, &buffer.header); + TEST_ASSERT(r == 1, "KVM_SET_MSRS IOCTL failed,\n" + " rc: %i errno: %i", r, errno); +} + +/* VM VCPU Args Set + * + * Input Args: + * vm - Virtual Machine + * vcpuid - VCPU ID + * num - number of arguments + * ... - arguments, each of type uint64_t + * + * Output Args: None + * + * Return: None + * + * Sets the first num function input arguments to the values + * given as variable args. Each of the variable args is expected to + * be of type uint64_t. + */ +void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...) +{ + va_list ap; + struct kvm_regs regs; + + TEST_ASSERT(num >= 1 && num <= 6, "Unsupported number of args,\n" + " num: %u\n", + num); + + va_start(ap, num); + vcpu_regs_get(vm, vcpuid, ®s); + + if (num >= 1) + regs.rdi = va_arg(ap, uint64_t); + + if (num >= 2) + regs.rsi = va_arg(ap, uint64_t); + + if (num >= 3) + regs.rdx = va_arg(ap, uint64_t); + + if (num >= 4) + regs.rcx = va_arg(ap, uint64_t); + + if (num >= 5) + regs.r8 = va_arg(ap, uint64_t); + + if (num >= 6) + regs.r9 = va_arg(ap, uint64_t); + + vcpu_regs_set(vm, vcpuid, ®s); + va_end(ap); +} + +/* + * VM VCPU Dump + * + * Input Args: + * vm - Virtual Machine + * vcpuid - VCPU ID + * indent - Left margin indent amount + * + * Output Args: + * stream - Output FILE stream + * + * Return: None + * + * Dumps the current state of the VCPU specified by vcpuid, within the VM + * given by vm, to the FILE stream given by stream. + */ +void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent) +{ + struct kvm_regs regs; + struct kvm_sregs sregs; + + fprintf(stream, "%*scpuid: %u\n", indent, "", vcpuid); + + fprintf(stream, "%*sregs:\n", indent + 2, ""); + vcpu_regs_get(vm, vcpuid, ®s); + regs_dump(stream, ®s, indent + 4); + + fprintf(stream, "%*ssregs:\n", indent + 2, ""); + vcpu_sregs_get(vm, vcpuid, &sregs); + sregs_dump(stream, &sregs, indent + 4); +} + struct kvm_x86_state { struct kvm_vcpu_events events; struct kvm_mp_state mp_state; diff --git a/tools/testing/selftests/kvm/lib/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c index b987c3c970eb..771ba6bf751c 100644 --- a/tools/testing/selftests/kvm/lib/vmx.c +++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c @@ -1,5 +1,5 @@ /* - * tools/testing/selftests/kvm/lib/x86.c + * tools/testing/selftests/kvm/lib/x86_64/vmx.c * * Copyright (C) 2018, Google LLC. * @@ -10,9 +10,11 @@ #include "test_util.h" #include "kvm_util.h" -#include "x86.h" +#include "processor.h" #include "vmx.h" +bool enable_evmcs; + /* Allocate memory regions for nested VMX tests. * * Input Args: @@ -62,6 +64,20 @@ vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva) vmx->vmwrite_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmwrite); memset(vmx->vmwrite_hva, 0, getpagesize()); + /* Setup of a region of guest memory for the VP Assist page. */ + vmx->vp_assist = (void *)vm_vaddr_alloc(vm, getpagesize(), + 0x10000, 0, 0); + vmx->vp_assist_hva = addr_gva2hva(vm, (uintptr_t)vmx->vp_assist); + vmx->vp_assist_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vp_assist); + + /* Setup of a region of guest memory for the enlightened VMCS. */ + vmx->enlightened_vmcs = (void *)vm_vaddr_alloc(vm, getpagesize(), + 0x10000, 0, 0); + vmx->enlightened_vmcs_hva = + addr_gva2hva(vm, (uintptr_t)vmx->enlightened_vmcs); + vmx->enlightened_vmcs_gpa = + addr_gva2gpa(vm, (uintptr_t)vmx->enlightened_vmcs); + *p_vmx_gva = vmx_gva; return vmx; } @@ -107,18 +123,31 @@ bool prepare_for_vmx_operation(struct vmx_pages *vmx) if (vmxon(vmx->vmxon_gpa)) return false; - /* Load a VMCS. */ - *(uint32_t *)(vmx->vmcs) = vmcs_revision(); - if (vmclear(vmx->vmcs_gpa)) - return false; - - if (vmptrld(vmx->vmcs_gpa)) - return false; + return true; +} - /* Setup shadow VMCS, do not load it yet. */ - *(uint32_t *)(vmx->shadow_vmcs) = vmcs_revision() | 0x80000000ul; - if (vmclear(vmx->shadow_vmcs_gpa)) - return false; +bool load_vmcs(struct vmx_pages *vmx) +{ + if (!enable_evmcs) { + /* Load a VMCS. */ + *(uint32_t *)(vmx->vmcs) = vmcs_revision(); + if (vmclear(vmx->vmcs_gpa)) + return false; + + if (vmptrld(vmx->vmcs_gpa)) + return false; + + /* Setup shadow VMCS, do not load it yet. */ + *(uint32_t *)(vmx->shadow_vmcs) = + vmcs_revision() | 0x80000000ul; + if (vmclear(vmx->shadow_vmcs_gpa)) + return false; + } else { + if (evmcs_vmptrld(vmx->enlightened_vmcs_gpa, + vmx->enlightened_vmcs)) + return false; + current_evmcs->revision_id = vmcs_revision(); + } return true; } diff --git a/tools/testing/selftests/kvm/cr4_cpuid_sync_test.c b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c index 11ec358bf969..d503a51fad30 100644 --- a/tools/testing/selftests/kvm/cr4_cpuid_sync_test.c +++ b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c @@ -17,7 +17,7 @@ #include "test_util.h" #include "kvm_util.h" -#include "x86.h" +#include "processor.h" #define X86_FEATURE_XSAVE (1<<26) #define X86_FEATURE_OSXSAVE (1<<27) @@ -67,6 +67,7 @@ int main(int argc, char *argv[]) struct kvm_vm *vm; struct kvm_sregs sregs; struct kvm_cpuid_entry2 *entry; + struct ucall uc; int rc; entry = kvm_get_supported_cpuid_entry(1); @@ -87,21 +88,20 @@ int main(int argc, char *argv[]) rc = _vcpu_run(vm, VCPU_ID); if (run->exit_reason == KVM_EXIT_IO) { - switch (run->io.port) { - case GUEST_PORT_SYNC: + switch (get_ucall(vm, VCPU_ID, &uc)) { + case UCALL_SYNC: /* emulate hypervisor clearing CR4.OSXSAVE */ vcpu_sregs_get(vm, VCPU_ID, &sregs); sregs.cr4 &= ~X86_CR4_OSXSAVE; vcpu_sregs_set(vm, VCPU_ID, &sregs); break; - case GUEST_PORT_ABORT: + case UCALL_ABORT: TEST_ASSERT(false, "Guest CR4 bit (OSXSAVE) unsynchronized with CPUID bit."); break; - case GUEST_PORT_DONE: + case UCALL_DONE: goto done; default: - TEST_ASSERT(false, "Unknown port 0x%x.", - run->io.port); + TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd); } } } diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c new file mode 100644 index 000000000000..c49c2a28b0eb --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c @@ -0,0 +1,166 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2018, Red Hat, Inc. + * + * Tests for Enlightened VMCS, including nested guest state. + */ +#define _GNU_SOURCE /* for program_invocation_short_name */ +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> + +#include "test_util.h" + +#include "kvm_util.h" + +#include "vmx.h" + +#define VCPU_ID 5 + +static bool have_nested_state; + +void l2_guest_code(void) +{ + GUEST_SYNC(6); + + GUEST_SYNC(7); + + /* Done, exit to L1 and never come back. */ + vmcall(); +} + +void l1_guest_code(struct vmx_pages *vmx_pages) +{ +#define L2_GUEST_STACK_SIZE 64 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + + enable_vp_assist(vmx_pages->vp_assist_gpa, vmx_pages->vp_assist); + + GUEST_ASSERT(vmx_pages->vmcs_gpa); + GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); + GUEST_SYNC(3); + GUEST_ASSERT(load_vmcs(vmx_pages)); + GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa); + + GUEST_SYNC(4); + GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa); + + prepare_vmcs(vmx_pages, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + GUEST_SYNC(5); + GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa); + GUEST_ASSERT(!vmlaunch()); + GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa); + GUEST_SYNC(8); + GUEST_ASSERT(!vmresume()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + GUEST_SYNC(9); +} + +void guest_code(struct vmx_pages *vmx_pages) +{ + GUEST_SYNC(1); + GUEST_SYNC(2); + + if (vmx_pages) + l1_guest_code(vmx_pages); + + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + struct vmx_pages *vmx_pages = NULL; + vm_vaddr_t vmx_pages_gva = 0; + + struct kvm_regs regs1, regs2; + struct kvm_vm *vm; + struct kvm_run *run; + struct kvm_x86_state *state; + struct ucall uc; + int stage; + uint16_t evmcs_ver; + struct kvm_enable_cap enable_evmcs_cap = { + .cap = KVM_CAP_HYPERV_ENLIGHTENED_VMCS, + .args[0] = (unsigned long)&evmcs_ver + }; + + struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1); + + /* Create VM */ + vm = vm_create_default(VCPU_ID, 0, guest_code); + + vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); + + if (!kvm_check_cap(KVM_CAP_NESTED_STATE) || + !kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) { + printf("capabilities not available, skipping test\n"); + exit(KSFT_SKIP); + } + + vcpu_ioctl(vm, VCPU_ID, KVM_ENABLE_CAP, &enable_evmcs_cap); + + /* KVM should return supported EVMCS version range */ + TEST_ASSERT(((evmcs_ver >> 8) >= (evmcs_ver & 0xff)) && + (evmcs_ver & 0xff) > 0, + "Incorrect EVMCS version range: %x:%x\n", + evmcs_ver & 0xff, evmcs_ver >> 8); + + run = vcpu_state(vm, VCPU_ID); + + vcpu_regs_get(vm, VCPU_ID, ®s1); + + vmx_pages = vcpu_alloc_vmx(vm, &vmx_pages_gva); + vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva); + + for (stage = 1;; stage++) { + _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Stage %d: unexpected exit reason: %u (%s),\n", + stage, run->exit_reason, + exit_reason_str(run->exit_reason)); + + memset(®s1, 0, sizeof(regs1)); + vcpu_regs_get(vm, VCPU_ID, ®s1); + switch (get_ucall(vm, VCPU_ID, &uc)) { + case UCALL_ABORT: + TEST_ASSERT(false, "%s at %s:%d", (const char *)uc.args[0], + __FILE__, uc.args[1]); + /* NOT REACHED */ + case UCALL_SYNC: + break; + case UCALL_DONE: + goto done; + default: + TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd); + } + + /* UCALL_SYNC is handled here. */ + TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") && + uc.args[1] == stage, "Unexpected register values vmexit #%lx, got %lx", + stage, (ulong)uc.args[1]); + + state = vcpu_save_state(vm, VCPU_ID); + kvm_vm_release(vm); + + /* Restore state in a new VM. */ + kvm_vm_restart(vm, O_RDWR); + vm_vcpu_add(vm, VCPU_ID, 0, 0); + vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); + vcpu_load_state(vm, VCPU_ID, state); + run = vcpu_state(vm, VCPU_ID); + free(state); + + memset(®s2, 0, sizeof(regs2)); + vcpu_regs_get(vm, VCPU_ID, ®s2); + TEST_ASSERT(!memcmp(®s1, ®s2, sizeof(regs2)), + "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx", + (ulong) regs2.rdi, (ulong) regs2.rsi); + } + +done: + kvm_vm_free(vm); +} diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c new file mode 100644 index 000000000000..264425f75806 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c @@ -0,0 +1,157 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test for x86 KVM_CAP_HYPERV_CPUID + * + * Copyright (C) 2018, Red Hat, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2. + * + */ + +#define _GNU_SOURCE /* for program_invocation_short_name */ +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" + +#define VCPU_ID 0 + +static void guest_code(void) +{ +} + +static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries, + int evmcs_enabled) +{ + int i; + + if (!evmcs_enabled) + TEST_ASSERT(hv_cpuid_entries->nent == 6, + "KVM_GET_SUPPORTED_HV_CPUID should return 6 entries" + " when Enlightened VMCS is disabled (returned %d)", + hv_cpuid_entries->nent); + else + TEST_ASSERT(hv_cpuid_entries->nent == 7, + "KVM_GET_SUPPORTED_HV_CPUID should return 7 entries" + " when Enlightened VMCS is enabled (returned %d)", + hv_cpuid_entries->nent); + + for (i = 0; i < hv_cpuid_entries->nent; i++) { + struct kvm_cpuid_entry2 *entry = &hv_cpuid_entries->entries[i]; + + TEST_ASSERT((entry->function >= 0x40000000) && + (entry->function <= 0x4000000A), + "function %lx is our of supported range", + entry->function); + + TEST_ASSERT(entry->index == 0, + ".index field should be zero"); + + TEST_ASSERT(entry->index == 0, + ".index field should be zero"); + + TEST_ASSERT(entry->flags == 0, + ".flags field should be zero"); + + TEST_ASSERT(entry->padding[0] == entry->padding[1] + == entry->padding[2] == 0, + ".index field should be zero"); + + /* + * If needed for debug: + * fprintf(stdout, + * "CPUID%lx EAX=0x%lx EBX=0x%lx ECX=0x%lx EDX=0x%lx\n", + * entry->function, entry->eax, entry->ebx, entry->ecx, + * entry->edx); + */ + } + +} + +void test_hv_cpuid_e2big(struct kvm_vm *vm) +{ + static struct kvm_cpuid2 cpuid = {.nent = 0}; + int ret; + + ret = _vcpu_ioctl(vm, VCPU_ID, KVM_GET_SUPPORTED_HV_CPUID, &cpuid); + + TEST_ASSERT(ret == -1 && errno == E2BIG, + "KVM_GET_SUPPORTED_HV_CPUID didn't fail with -E2BIG when" + " it should have: %d %d", ret, errno); +} + + +struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(struct kvm_vm *vm) +{ + int nent = 20; /* should be enough */ + static struct kvm_cpuid2 *cpuid; + int ret; + + cpuid = malloc(sizeof(*cpuid) + nent * sizeof(struct kvm_cpuid_entry2)); + + if (!cpuid) { + perror("malloc"); + abort(); + } + + cpuid->nent = nent; + + vcpu_ioctl(vm, VCPU_ID, KVM_GET_SUPPORTED_HV_CPUID, cpuid); + + return cpuid; +} + + +int main(int argc, char *argv[]) +{ + struct kvm_vm *vm; + int rv; + uint16_t evmcs_ver; + struct kvm_cpuid2 *hv_cpuid_entries; + struct kvm_enable_cap enable_evmcs_cap = { + .cap = KVM_CAP_HYPERV_ENLIGHTENED_VMCS, + .args[0] = (unsigned long)&evmcs_ver + }; + + /* Tell stdout not to buffer its content */ + setbuf(stdout, NULL); + + rv = kvm_check_cap(KVM_CAP_HYPERV_CPUID); + if (!rv) { + fprintf(stderr, + "KVM_CAP_HYPERV_CPUID not supported, skip test\n"); + exit(KSFT_SKIP); + } + + /* Create VM */ + vm = vm_create_default(VCPU_ID, 0, guest_code); + + test_hv_cpuid_e2big(vm); + + hv_cpuid_entries = kvm_get_supported_hv_cpuid(vm); + if (!hv_cpuid_entries) + return 1; + + test_hv_cpuid(hv_cpuid_entries, 0); + + free(hv_cpuid_entries); + + vcpu_ioctl(vm, VCPU_ID, KVM_ENABLE_CAP, &enable_evmcs_cap); + + hv_cpuid_entries = kvm_get_supported_hv_cpuid(vm); + if (!hv_cpuid_entries) + return 1; + + test_hv_cpuid(hv_cpuid_entries, 1); + + free(hv_cpuid_entries); + + kvm_vm_free(vm); + + return 0; +} diff --git a/tools/testing/selftests/kvm/platform_info_test.c b/tools/testing/selftests/kvm/x86_64/platform_info_test.c index 3764e7121265..eb3e7a838cb4 100644 --- a/tools/testing/selftests/kvm/platform_info_test.c +++ b/tools/testing/selftests/kvm/x86_64/platform_info_test.c @@ -19,7 +19,7 @@ #include "test_util.h" #include "kvm_util.h" -#include "x86.h" +#include "processor.h" #define VCPU_ID 0 #define MSR_PLATFORM_INFO_MAX_TURBO_RATIO 0xff00 @@ -48,7 +48,7 @@ static void set_msr_platform_info_enabled(struct kvm_vm *vm, bool enable) static void test_msr_platform_info_enabled(struct kvm_vm *vm) { struct kvm_run *run = vcpu_state(vm, VCPU_ID); - struct guest_args args; + struct ucall uc; set_msr_platform_info_enabled(vm, true); vcpu_run(vm, VCPU_ID); @@ -56,11 +56,11 @@ static void test_msr_platform_info_enabled(struct kvm_vm *vm) "Exit_reason other than KVM_EXIT_IO: %u (%s),\n", run->exit_reason, exit_reason_str(run->exit_reason)); - guest_args_read(vm, VCPU_ID, &args); - TEST_ASSERT(args.port == GUEST_PORT_SYNC, - "Received IO from port other than PORT_HOST_SYNC: %u\n", - run->io.port); - TEST_ASSERT((args.arg1 & MSR_PLATFORM_INFO_MAX_TURBO_RATIO) == + get_ucall(vm, VCPU_ID, &uc); + TEST_ASSERT(uc.cmd == UCALL_SYNC, + "Received ucall other than UCALL_SYNC: %u\n", + ucall); + TEST_ASSERT((uc.args[1] & MSR_PLATFORM_INFO_MAX_TURBO_RATIO) == MSR_PLATFORM_INFO_MAX_TURBO_RATIO, "Expected MSR_PLATFORM_INFO to have max turbo ratio mask: %i.", MSR_PLATFORM_INFO_MAX_TURBO_RATIO); diff --git a/tools/testing/selftests/kvm/set_sregs_test.c b/tools/testing/selftests/kvm/x86_64/set_sregs_test.c index 881419d5746e..35640e8e95bc 100644 --- a/tools/testing/selftests/kvm/set_sregs_test.c +++ b/tools/testing/selftests/kvm/x86_64/set_sregs_test.c @@ -22,7 +22,7 @@ #include "test_util.h" #include "kvm_util.h" -#include "x86.h" +#include "processor.h" #define VCPU_ID 5 diff --git a/tools/testing/selftests/kvm/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c index 900e3e9dfb9f..4b3f556265f1 100644 --- a/tools/testing/selftests/kvm/state_test.c +++ b/tools/testing/selftests/kvm/x86_64/state_test.c @@ -17,7 +17,7 @@ #include "test_util.h" #include "kvm_util.h" -#include "x86.h" +#include "processor.h" #include "vmx.h" #define VCPU_ID 5 @@ -26,20 +26,20 @@ static bool have_nested_state; void l2_guest_code(void) { - GUEST_SYNC(5); + GUEST_SYNC(6); /* Exit to L1 */ vmcall(); /* L1 has now set up a shadow VMCS for us. */ GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee); - GUEST_SYNC(9); + GUEST_SYNC(10); GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee); GUEST_ASSERT(!vmwrite(GUEST_RIP, 0xc0fffee)); - GUEST_SYNC(10); + GUEST_SYNC(11); GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0fffee); GUEST_ASSERT(!vmwrite(GUEST_RIP, 0xc0ffffee)); - GUEST_SYNC(11); + GUEST_SYNC(12); /* Done, exit to L1 and never come back. */ vmcall(); @@ -52,15 +52,17 @@ void l1_guest_code(struct vmx_pages *vmx_pages) GUEST_ASSERT(vmx_pages->vmcs_gpa); GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); + GUEST_SYNC(3); + GUEST_ASSERT(load_vmcs(vmx_pages)); GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa); - GUEST_SYNC(3); + GUEST_SYNC(4); GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa); prepare_vmcs(vmx_pages, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]); - GUEST_SYNC(4); + GUEST_SYNC(5); GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa); GUEST_ASSERT(!vmlaunch()); GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa); @@ -72,7 +74,7 @@ void l1_guest_code(struct vmx_pages *vmx_pages) GUEST_ASSERT(!vmresume()); GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); - GUEST_SYNC(6); + GUEST_SYNC(7); GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); GUEST_ASSERT(!vmresume()); @@ -85,12 +87,12 @@ void l1_guest_code(struct vmx_pages *vmx_pages) GUEST_ASSERT(!vmptrld(vmx_pages->shadow_vmcs_gpa)); GUEST_ASSERT(vmlaunch()); - GUEST_SYNC(7); + GUEST_SYNC(8); GUEST_ASSERT(vmlaunch()); GUEST_ASSERT(vmresume()); vmwrite(GUEST_RIP, 0xc0ffee); - GUEST_SYNC(8); + GUEST_SYNC(9); GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee); GUEST_ASSERT(!vmptrld(vmx_pages->vmcs_gpa)); @@ -101,7 +103,7 @@ void l1_guest_code(struct vmx_pages *vmx_pages) GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffffee); GUEST_ASSERT(vmlaunch()); GUEST_ASSERT(vmresume()); - GUEST_SYNC(12); + GUEST_SYNC(13); GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffffee); GUEST_ASSERT(vmlaunch()); GUEST_ASSERT(vmresume()); @@ -127,6 +129,7 @@ int main(int argc, char *argv[]) struct kvm_vm *vm; struct kvm_run *run; struct kvm_x86_state *state; + struct ucall uc; int stage; struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1); @@ -149,29 +152,29 @@ int main(int argc, char *argv[]) for (stage = 1;; stage++) { _vcpu_run(vm, VCPU_ID); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, - "Unexpected exit reason: %u (%s),\n", - run->exit_reason, + "Stage %d: unexpected exit reason: %u (%s),\n", + stage, run->exit_reason, exit_reason_str(run->exit_reason)); memset(®s1, 0, sizeof(regs1)); vcpu_regs_get(vm, VCPU_ID, ®s1); - switch (run->io.port) { - case GUEST_PORT_ABORT: - TEST_ASSERT(false, "%s at %s:%d", (const char *) regs1.rdi, - __FILE__, regs1.rsi); + switch (get_ucall(vm, VCPU_ID, &uc)) { + case UCALL_ABORT: + TEST_ASSERT(false, "%s at %s:%d", (const char *)uc.args[0], + __FILE__, uc.args[1]); /* NOT REACHED */ - case GUEST_PORT_SYNC: + case UCALL_SYNC: break; - case GUEST_PORT_DONE: + case UCALL_DONE: goto done; default: - TEST_ASSERT(false, "Unknown port 0x%x.", run->io.port); + TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd); } - /* PORT_SYNC is handled here. */ - TEST_ASSERT(!strcmp((const char *)regs1.rdi, "hello") && - regs1.rsi == stage, "Unexpected register values vmexit #%lx, got %lx", - stage, (ulong) regs1.rsi); + /* UCALL_SYNC is handled here. */ + TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") && + uc.args[1] == stage, "Unexpected register values vmexit #%lx, got %lx", + stage, (ulong)uc.args[1]); state = vcpu_save_state(vm, VCPU_ID); kvm_vm_release(vm); diff --git a/tools/testing/selftests/kvm/sync_regs_test.c b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c index 213343e5dff9..c8478ce9ea77 100644 --- a/tools/testing/selftests/kvm/sync_regs_test.c +++ b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c @@ -19,7 +19,7 @@ #include "test_util.h" #include "kvm_util.h" -#include "x86.h" +#include "processor.h" #define VCPU_ID 5 diff --git a/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c index 49bcc68b0235..18fa64db0d7a 100644 --- a/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c @@ -1,5 +1,5 @@ /* - * gtests/tests/vmx_tsc_adjust_test.c + * vmx_tsc_adjust_test * * Copyright (C) 2018, Google LLC. * @@ -22,13 +22,13 @@ #include "test_util.h" #include "kvm_util.h" -#include "x86.h" +#include "processor.h" #include "vmx.h" #include <string.h> #include <sys/ioctl.h> -#include "../kselftest.h" +#include "kselftest.h" #ifndef MSR_IA32_TSC_ADJUST #define MSR_IA32_TSC_ADJUST 0x3b @@ -94,6 +94,7 @@ static void l1_guest_code(struct vmx_pages *vmx_pages) check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE); GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); + GUEST_ASSERT(load_vmcs(vmx_pages)); /* Prepare the VMCS for L2 execution. */ prepare_vmcs(vmx_pages, l2_guest_code, @@ -146,26 +147,25 @@ int main(int argc, char *argv[]) for (;;) { volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID); - struct guest_args args; + struct ucall uc; vcpu_run(vm, VCPU_ID); - guest_args_read(vm, VCPU_ID, &args); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n", run->exit_reason, exit_reason_str(run->exit_reason)); - switch (args.port) { - case GUEST_PORT_ABORT: - TEST_ASSERT(false, "%s", (const char *) args.arg0); + switch (get_ucall(vm, VCPU_ID, &uc)) { + case UCALL_ABORT: + TEST_ASSERT(false, "%s", (const char *)uc.args[0]); /* NOT REACHED */ - case GUEST_PORT_SYNC: - report(args.arg1); + case UCALL_SYNC: + report(uc.args[1]); break; - case GUEST_PORT_DONE: + case UCALL_DONE: goto done; default: - TEST_ASSERT(false, "Unknown port 0x%x.", args.port); + TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd); } } diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index 0a8e75886224..8b0f16409ed7 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -16,18 +16,18 @@ TEST_GEN_PROGS := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS)) TEST_GEN_PROGS_EXTENDED := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS_EXTENDED)) TEST_GEN_FILES := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_FILES)) +ifdef KSFT_KHDR_INSTALL top_srcdir ?= ../../../.. include $(top_srcdir)/scripts/subarch.include ARCH ?= $(SUBARCH) -all: $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) - .PHONY: khdr khdr: make ARCH=$(ARCH) -C $(top_srcdir) headers_install -ifdef KSFT_KHDR_INSTALL -$(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES):| khdr +all: khdr $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) +else +all: $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) endif .ONESHELL: diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 78b24cf76f40..6f81130605d7 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -3,6 +3,7 @@ socket psock_fanout psock_snd psock_tpacket +reuseport_addr_any reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa @@ -14,3 +15,5 @@ udpgso_bench_rx udpgso_bench_tx tcp_inq tls +txring_overwrite +ip_defrag diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 919aa2ac00af..1e6d14d2825c 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -4,20 +4,23 @@ CFLAGS = -Wall -Wl,--no-as-needed -O2 -g CFLAGS += -I../../../../usr/include/ -TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh -TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh +TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh \ + rtnetlink.sh xfrm_policy.sh +TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh ip_defrag.sh TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh +TEST_PROGS += udpgro_bench.sh udpgro.sh test_vxlan_under_vrf.sh reuseport_addr_any.sh +TEST_PROGS += test_vxlan_fdb_changelink.sh TEST_PROGS_EXTENDED := in_netns.sh TEST_GEN_FILES = socket -TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy -TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd -TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx +TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any +TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd txring_overwrite +TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx ip_defrag TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls KSFT_KHDR_INSTALL := 1 include ../lib.mk -$(OUTPUT)/reuseport_bpf_numa: LDFLAGS += -lnuma +$(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma $(OUTPUT)/tcp_mmap: LDFLAGS += -lpthread $(OUTPUT)/tcp_inq: LDFLAGS += -lpthread diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index cd3a2f1545b5..5821bdd98d20 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -14,3 +14,17 @@ CONFIG_IPV6_VTI=y CONFIG_DUMMY=y CONFIG_BRIDGE=y CONFIG_VLAN_8021Q=y +CONFIG_NETFILTER=y +CONFIG_NETFILTER_ADVANCED=y +CONFIG_NF_CONNTRACK=m +CONFIG_NF_NAT_IPV6=m +CONFIG_NF_NAT_IPV4=m +CONFIG_IP6_NF_IPTABLES=m +CONFIG_IP_NF_IPTABLES=m +CONFIG_IP6_NF_NAT=m +CONFIG_IP_NF_NAT=m +CONFIG_NF_TABLES=m +CONFIG_NF_TABLES_IPV6=y +CONFIG_NF_TABLES_IPV4=y +CONFIG_NFT_CHAIN_NAT_IPV6=m +CONFIG_NFT_CHAIN_NAT_IPV4=m diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index 0f45633bd634..1080ff55a788 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -9,11 +9,11 @@ ret=0 ksft_skip=4 # all tests in this script. Can be overridden with -t option -TESTS="unregister down carrier nexthop ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric" +TESTS="unregister down carrier nexthop ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics ipv4_route_metrics" VERBOSE=0 PAUSE_ON_FAIL=no PAUSE=no -IP="ip -netns testns" +IP="ip -netns ns1" log_test() { @@ -47,8 +47,10 @@ log_test() setup() { set -e - ip netns add testns + ip netns add ns1 $IP link set dev lo up + ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=1 + ip netns exec ns1 sysctl -qw net.ipv6.conf.all.forwarding=1 $IP link add dummy0 type dummy $IP link set dev dummy0 up @@ -61,7 +63,8 @@ setup() cleanup() { $IP link del dev dummy0 &> /dev/null - ip netns del testns + ip netns del ns1 + ip netns del ns2 &> /dev/null } get_linklocal() @@ -385,6 +388,7 @@ fib_carrier_unicast_test() set -e $IP link set dev dummy0 carrier off + sleep 1 set +e echo " Carrier down" @@ -639,11 +643,14 @@ add_initial_route6() check_route6() { - local pfx="2001:db8:104::/64" + local pfx local expected="$1" local out local rc=0 + set -- $expected + pfx=$1 + out=$($IP -6 ro ls match ${pfx} | sed -e 's/ pref medium//') [ "${out}" = "${expected}" ] && return 0 @@ -690,28 +697,33 @@ route_setup() [ "${VERBOSE}" = "1" ] && set -x set -e - $IP li add red up type vrf table 101 + ip netns add ns2 + ip -netns ns2 link set dev lo up + ip netns exec ns2 sysctl -qw net.ipv4.ip_forward=1 + ip netns exec ns2 sysctl -qw net.ipv6.conf.all.forwarding=1 + $IP li add veth1 type veth peer name veth2 $IP li add veth3 type veth peer name veth4 $IP li set veth1 up $IP li set veth3 up - $IP li set veth2 vrf red up - $IP li set veth4 vrf red up - $IP li add dummy1 type dummy - $IP li set dummy1 vrf red up - - $IP -6 addr add 2001:db8:101::1/64 dev veth1 - $IP -6 addr add 2001:db8:101::2/64 dev veth2 - $IP -6 addr add 2001:db8:103::1/64 dev veth3 - $IP -6 addr add 2001:db8:103::2/64 dev veth4 - $IP -6 addr add 2001:db8:104::1/64 dev dummy1 + $IP li set veth2 netns ns2 up + $IP li set veth4 netns ns2 up + ip -netns ns2 li add dummy1 type dummy + ip -netns ns2 li set dummy1 up + $IP -6 addr add 2001:db8:101::1/64 dev veth1 nodad + $IP -6 addr add 2001:db8:103::1/64 dev veth3 nodad $IP addr add 172.16.101.1/24 dev veth1 - $IP addr add 172.16.101.2/24 dev veth2 $IP addr add 172.16.103.1/24 dev veth3 - $IP addr add 172.16.103.2/24 dev veth4 - $IP addr add 172.16.104.1/24 dev dummy1 + + ip -netns ns2 -6 addr add 2001:db8:101::2/64 dev veth2 nodad + ip -netns ns2 -6 addr add 2001:db8:103::2/64 dev veth4 nodad + ip -netns ns2 -6 addr add 2001:db8:104::1/64 dev dummy1 nodad + + ip -netns ns2 addr add 172.16.101.2/24 dev veth2 + ip -netns ns2 addr add 172.16.103.2/24 dev veth4 + ip -netns ns2 addr add 172.16.104.1/24 dev dummy1 set +ex } @@ -944,7 +956,7 @@ ipv6_addr_metric_test() log_test $rc 0 "Modify metric of address" # verify prefix route removed on down - run_cmd "ip netns exec testns sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1" + run_cmd "ip netns exec ns1 sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1" run_cmd "$IP li set dev dummy2 down" rc=$? if [ $rc -eq 0 ]; then @@ -967,6 +979,77 @@ ipv6_addr_metric_test() cleanup } +ipv6_route_metrics_test() +{ + local rc + + echo + echo "IPv6 routes with metrics" + + route_setup + + # + # single path with metrics + # + run_cmd "$IP -6 ro add 2001:db8:111::/64 via 2001:db8:101::2 mtu 1400" + rc=$? + if [ $rc -eq 0 ]; then + check_route6 "2001:db8:111::/64 via 2001:db8:101::2 dev veth1 metric 1024 mtu 1400" + rc=$? + fi + log_test $rc 0 "Single path route with mtu metric" + + + # + # multipath via separate routes with metrics + # + run_cmd "$IP -6 ro add 2001:db8:112::/64 via 2001:db8:101::2 mtu 1400" + run_cmd "$IP -6 ro append 2001:db8:112::/64 via 2001:db8:103::2" + rc=$? + if [ $rc -eq 0 ]; then + check_route6 "2001:db8:112::/64 metric 1024 mtu 1400 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1" + rc=$? + fi + log_test $rc 0 "Multipath route via 2 single routes with mtu metric on first" + + # second route is coalesced to first to make a multipath route. + # MTU of the second path is hidden from display! + run_cmd "$IP -6 ro add 2001:db8:113::/64 via 2001:db8:101::2" + run_cmd "$IP -6 ro append 2001:db8:113::/64 via 2001:db8:103::2 mtu 1400" + rc=$? + if [ $rc -eq 0 ]; then + check_route6 "2001:db8:113::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1" + rc=$? + fi + log_test $rc 0 "Multipath route via 2 single routes with mtu metric on 2nd" + + run_cmd "$IP -6 ro del 2001:db8:113::/64 via 2001:db8:101::2" + if [ $? -eq 0 ]; then + check_route6 "2001:db8:113::/64 via 2001:db8:103::2 dev veth3 metric 1024 mtu 1400" + log_test $? 0 " MTU of second leg" + fi + + # + # multipath with metrics + # + run_cmd "$IP -6 ro add 2001:db8:115::/64 mtu 1400 nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2" + rc=$? + if [ $rc -eq 0 ]; then + check_route6 "2001:db8:115::/64 metric 1024 mtu 1400 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1" + rc=$? + fi + log_test $rc 0 "Multipath route with mtu metric" + + $IP -6 ro add 2001:db8:104::/64 via 2001:db8:101::2 mtu 1300 + run_cmd "ip netns exec ns1 ping6 -w1 -c1 -s 1500 2001:db8:104::1" + log_test $? 0 "Using route with mtu metric" + + run_cmd "$IP -6 ro add 2001:db8:114::/64 via 2001:db8:101::2 congctl lock foo" + log_test $? 2 "Invalid metric (fails metric_convert)" + + route_cleanup +} + # add route for a prefix, flushing any existing routes first # expected to be the first step of a test add_route() @@ -1005,11 +1088,15 @@ add_initial_route() check_route() { - local pfx="172.16.104.0/24" + local pfx local expected="$1" local out local rc=0 + set -- $expected + pfx=$1 + [ "${pfx}" = "unreachable" ] && pfx=$2 + out=$($IP ro ls match ${pfx}) [ "${out}" = "${expected}" ] && return 0 @@ -1319,6 +1406,43 @@ ipv4_addr_metric_test() cleanup } +ipv4_route_metrics_test() +{ + local rc + + echo + echo "IPv4 route add / append tests" + + route_setup + + run_cmd "$IP ro add 172.16.111.0/24 via 172.16.101.2 mtu 1400" + rc=$? + if [ $rc -eq 0 ]; then + check_route "172.16.111.0/24 via 172.16.101.2 dev veth1 mtu 1400" + rc=$? + fi + log_test $rc 0 "Single path route with mtu metric" + + + run_cmd "$IP ro add 172.16.112.0/24 mtu 1400 nexthop via 172.16.101.2 nexthop via 172.16.103.2" + rc=$? + if [ $rc -eq 0 ]; then + check_route "172.16.112.0/24 mtu 1400 nexthop via 172.16.101.2 dev veth1 weight 1 nexthop via 172.16.103.2 dev veth3 weight 1" + rc=$? + fi + log_test $rc 0 "Multipath route with mtu metric" + + $IP ro add 172.16.104.0/24 via 172.16.101.2 mtu 1300 + run_cmd "ip netns exec ns1 ping -w1 -c1 -s 1500 172.16.104.1" + log_test $? 0 "Using route with mtu metric" + + run_cmd "$IP ro add 172.16.111.0/24 via 172.16.101.2 congctl lock foo" + log_test $? 2 "Invalid metric (fails metric_convert)" + + route_cleanup +} + + ################################################################################ # usage @@ -1385,6 +1509,8 @@ do ipv4_route_test|ipv4_rt) ipv4_route_test;; ipv6_addr_metric) ipv6_addr_metric_test;; ipv4_addr_metric) ipv4_addr_metric_test;; + ipv6_route_metrics) ipv6_route_metrics_test;; + ipv4_route_metrics) ipv4_route_metrics_test;; help) echo "Test names: $TESTS"; exit 0;; esac diff --git a/tools/testing/selftests/net/forwarding/bridge_sticky_fdb.sh b/tools/testing/selftests/net/forwarding/bridge_sticky_fdb.sh new file mode 100755 index 000000000000..1f8ef0eff862 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/bridge_sticky_fdb.sh @@ -0,0 +1,69 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS="sticky" +NUM_NETIFS=4 +TEST_MAC=de:ad:be:ef:13:37 +source lib.sh + +switch_create() +{ + ip link add dev br0 type bridge + + ip link set dev $swp1 master br0 + ip link set dev $swp2 master br0 + + ip link set dev br0 up + ip link set dev $h1 up + ip link set dev $swp1 up + ip link set dev $h2 up + ip link set dev $swp2 up +} + +switch_destroy() +{ + ip link set dev $swp2 down + ip link set dev $h2 down + ip link set dev $swp1 down + ip link set dev $h1 down + + ip link del dev br0 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + h2=${NETIFS[p3]} + swp2=${NETIFS[p4]} + + switch_create +} + +cleanup() +{ + pre_cleanup + switch_destroy +} + +sticky() +{ + bridge fdb add $TEST_MAC dev $swp1 master static sticky + check_err $? "Could not add fdb entry" + bridge fdb del $TEST_MAC dev $swp1 vlan 1 master static sticky + $MZ $h2 -c 1 -a $TEST_MAC -t arp "request" -q + bridge -j fdb show br br0 brport $swp1\ + | jq -e ".[] | select(.mac == \"$TEST_MAC\")" &> /dev/null + check_err $? "Did not find FDB record when should" + + log_test "Sticky fdb entry" +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh index d8313d0438b7..b90dff8d3a94 100755 --- a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh +++ b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh @@ -1,7 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding" +ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding vlan_deletion extern_learn" NUM_NETIFS=4 CHECK_TC="yes" source lib.sh @@ -96,6 +96,51 @@ flooding() flood_test $swp2 $h1 $h2 } +vlan_deletion() +{ + # Test that the deletion of a VLAN on a bridge port does not affect + # the PVID VLAN + log_info "Add and delete a VLAN on bridge port $swp1" + + bridge vlan add vid 10 dev $swp1 + bridge vlan del vid 10 dev $swp1 + + ping_ipv4 + ping_ipv6 +} + +extern_learn() +{ + local mac=de:ad:be:ef:13:37 + local ageing_time + + # Test that externally learned FDB entries can roam, but not age out + RET=0 + + bridge fdb add de:ad:be:ef:13:37 dev $swp1 master extern_learn vlan 1 + + bridge fdb show brport $swp1 | grep -q de:ad:be:ef:13:37 + check_err $? "Did not find FDB entry when should" + + # Wait for 10 seconds after the ageing time to make sure the FDB entry + # was not aged out + ageing_time=$(bridge_ageing_time_get br0) + sleep $((ageing_time + 10)) + + bridge fdb show brport $swp1 | grep -q de:ad:be:ef:13:37 + check_err $? "FDB entry was aged out when should not" + + $MZ $h2 -c 1 -p 64 -a $mac -t ip -q + + bridge fdb show brport $swp2 | grep -q de:ad:be:ef:13:37 + check_err $? "FDB entry did not roam when should" + + log_test "Externally learned FDB entry - ageing & roaming" + + bridge fdb del de:ad:be:ef:13:37 dev $swp2 master vlan 1 &> /dev/null + bridge fdb del de:ad:be:ef:13:37 dev $swp1 master vlan 1 &> /dev/null +} + trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index ca53b539aa2d..3f248d1f5b91 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -15,6 +15,8 @@ PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} PAUSE_ON_CLEANUP=${PAUSE_ON_CLEANUP:=no} NETIF_TYPE=${NETIF_TYPE:=veth} NETIF_CREATE=${NETIF_CREATE:=yes} +MCD=${MCD:=smcrouted} +MC_CLI=${MC_CLI:=smcroutectl} relative_path="${BASH_SOURCE%/*}" if [[ "$relative_path" == "${BASH_SOURCE}" ]]; then @@ -104,7 +106,7 @@ create_netif_veth() { local i - for i in $(eval echo {1..$NUM_NETIFS}); do + for ((i = 1; i <= NUM_NETIFS; ++i)); do local j=$((i+1)) ip link show dev ${NETIFS[p$i]} &> /dev/null @@ -135,7 +137,7 @@ if [[ "$NETIF_CREATE" = "yes" ]]; then create_netif fi -for i in $(eval echo {1..$NUM_NETIFS}); do +for ((i = 1; i <= NUM_NETIFS; ++i)); do ip link show dev ${NETIFS[p$i]} &> /dev/null if [[ $? -ne 0 ]]; then echo "SKIP: could not find all required interfaces" @@ -251,7 +253,7 @@ lldpad_app_wait_set() { local dev=$1; shift - while lldptool -t -i $dev -V APP -c app | grep -q pending; do + while lldptool -t -i $dev -V APP -c app | grep -Eq "pending|unknown"; do echo "$dev: waiting for lldpad to push pending APP updates" sleep 5 done @@ -477,11 +479,24 @@ master_name_get() ip -j link show dev $if_name | jq -r '.[]["master"]' } +link_stats_get() +{ + local if_name=$1; shift + local dir=$1; shift + local stat=$1; shift + + ip -j -s link show dev $if_name \ + | jq '.[]["stats64"]["'$dir'"]["'$stat'"]' +} + link_stats_tx_packets_get() { - local if_name=$1 + link_stats_get $1 tx packets +} - ip -j -s link show dev $if_name | jq '.[]["stats64"]["tx"]["packets"]' +link_stats_rx_errors_get() +{ + link_stats_get $1 rx errors } tc_rule_stats_get() @@ -494,6 +509,14 @@ tc_rule_stats_get() | jq '.[1].options.actions[].stats.packets' } +ethtool_stats_get() +{ + local dev=$1; shift + local stat=$1; shift + + ethtool -S $dev | grep "^ *$stat:" | head -n 1 | cut -d: -f2 +} + mac_get() { local if_name=$1 @@ -541,6 +564,23 @@ forwarding_restore() sysctl_restore net.ipv4.conf.all.forwarding } +declare -A MTU_ORIG +mtu_set() +{ + local dev=$1; shift + local mtu=$1; shift + + MTU_ORIG["$dev"]=$(ip -j link show dev $dev | jq -e '.[].mtu') + ip link set dev $dev mtu $mtu +} + +mtu_restore() +{ + local dev=$1; shift + + ip link set dev $dev mtu ${MTU_ORIG["$dev"]} +} + tc_offload_check() { local num_netifs=${1:-$NUM_NETIFS} @@ -758,6 +798,17 @@ multipath_eval() log_info "Expected ratio $weights_ratio Measured ratio $packets_ratio" } +in_ns() +{ + local name=$1; shift + + ip netns exec $name bash <<-EOF + NUM_NETIFS=0 + source lib.sh + $(for a in "$@"; do printf "%q${IFS:0:1}" "$a"; done) + EOF +} + ############################################################################## # Tests @@ -765,10 +816,11 @@ ping_do() { local if_name=$1 local dip=$2 + local args=$3 local vrf_name vrf_name=$(master_name_get $if_name) - ip vrf exec $vrf_name $PING $dip -c 10 -i 0.1 -w 2 &> /dev/null + ip vrf exec $vrf_name $PING $args $dip -c 10 -i 0.1 -w 2 &> /dev/null } ping_test() @@ -777,17 +829,18 @@ ping_test() ping_do $1 $2 check_err $? - log_test "ping" + log_test "ping$3" } ping6_do() { local if_name=$1 local dip=$2 + local args=$3 local vrf_name vrf_name=$(master_name_get $if_name) - ip vrf exec $vrf_name $PING6 $dip -c 10 -i 0.1 -w 2 &> /dev/null + ip vrf exec $vrf_name $PING6 $args $dip -c 10 -i 0.1 -w 2 &> /dev/null } ping6_test() @@ -796,7 +849,7 @@ ping6_test() ping6_do $1 $2 check_err $? - log_test "ping6" + log_test "ping6$3" } learning_test() diff --git a/tools/testing/selftests/net/forwarding/router_multicast.sh b/tools/testing/selftests/net/forwarding/router_multicast.sh new file mode 100755 index 000000000000..109e6d785169 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/router_multicast.sh @@ -0,0 +1,311 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +------------------+ +# | H1 (v$h1) | +# | 2001:db8:1::2/64 | +# | 198.51.100.2/28 | +# | $h1 + | +# +-------------|----+ +# | +# +-------------|-------------------------------+ +# | SW1 | | +# | $rp1 + | +# | 198.51.100.1/28 | +# | 2001:db8:1::1/64 | +# | | +# | 2001:db8:2::1/64 2001:db8:3::1/64 | +# | 198.51.100.17/28 198.51.100.33/28 | +# | $rp2 + $rp3 + | +# +--------------|--------------------------|---+ +# | | +# | | +# +--------------|---+ +--------------|---+ +# | H2 (v$h2) | | | H3 (v$h3) | | +# | $h2 + | | $h3 + | +# | 198.51.100.18/28 | | 198.51.100.34/28 | +# | 2001:db8:2::2/64 | | 2001:db8:3::2/64 | +# +------------------+ +------------------+ +# + +ALL_TESTS="mcast_v4 mcast_v6" +NUM_NETIFS=6 +source lib.sh +source tc_common.sh + +require_command $MCD +require_command $MC_CLI +table_name=selftests + +h1_create() +{ + simple_if_init $h1 198.51.100.2/28 2001:db8:1::2/64 + + ip route add 198.51.100.16/28 vrf v$h1 nexthop via 198.51.100.1 + ip route add 198.51.100.32/28 vrf v$h1 nexthop via 198.51.100.1 + + ip route add 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::1 + ip route add 2001:db8:3::/64 vrf v$h1 nexthop via 2001:db8:1::1 +} + +h1_destroy() +{ + ip route del 2001:db8:3::/64 vrf v$h1 + ip route del 2001:db8:2::/64 vrf v$h1 + + ip route del 198.51.100.32/28 vrf v$h1 + ip route del 198.51.100.16/28 vrf v$h1 + + simple_if_fini $h1 198.51.100.2/28 2001:db8:1::2/64 +} + +h2_create() +{ + simple_if_init $h2 198.51.100.18/28 2001:db8:2::2/64 + + ip route add 198.51.100.0/28 vrf v$h2 nexthop via 198.51.100.17 + ip route add 198.51.100.32/28 vrf v$h2 nexthop via 198.51.100.17 + + ip route add 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:2::1 + ip route add 2001:db8:3::/64 vrf v$h2 nexthop via 2001:db8:2::1 + + tc qdisc add dev $h2 ingress +} + +h2_destroy() +{ + tc qdisc del dev $h2 ingress + + ip route del 2001:db8:3::/64 vrf v$h2 + ip route del 2001:db8:1::/64 vrf v$h2 + + ip route del 198.51.100.32/28 vrf v$h2 + ip route del 198.51.100.0/28 vrf v$h2 + + simple_if_fini $h2 198.51.100.18/28 2001:db8:2::2/64 +} + +h3_create() +{ + simple_if_init $h3 198.51.100.34/28 2001:db8:3::2/64 + + ip route add 198.51.100.0/28 vrf v$h3 nexthop via 198.51.100.33 + ip route add 198.51.100.16/28 vrf v$h3 nexthop via 198.51.100.33 + + ip route add 2001:db8:1::/64 vrf v$h3 nexthop via 2001:db8:3::1 + ip route add 2001:db8:2::/64 vrf v$h3 nexthop via 2001:db8:3::1 + + tc qdisc add dev $h3 ingress +} + +h3_destroy() +{ + tc qdisc del dev $h3 ingress + + ip route del 2001:db8:2::/64 vrf v$h3 + ip route del 2001:db8:1::/64 vrf v$h3 + + ip route del 198.51.100.16/28 vrf v$h3 + ip route del 198.51.100.0/28 vrf v$h3 + + simple_if_fini $h3 198.51.100.34/28 2001:db8:3::2/64 +} + +router_create() +{ + ip link set dev $rp1 up + ip link set dev $rp2 up + ip link set dev $rp3 up + + ip address add 198.51.100.1/28 dev $rp1 + ip address add 198.51.100.17/28 dev $rp2 + ip address add 198.51.100.33/28 dev $rp3 + + ip address add 2001:db8:1::1/64 dev $rp1 + ip address add 2001:db8:2::1/64 dev $rp2 + ip address add 2001:db8:3::1/64 dev $rp3 +} + +router_destroy() +{ + ip address del 2001:db8:3::1/64 dev $rp3 + ip address del 2001:db8:2::1/64 dev $rp2 + ip address del 2001:db8:1::1/64 dev $rp1 + + ip address del 198.51.100.33/28 dev $rp3 + ip address del 198.51.100.17/28 dev $rp2 + ip address del 198.51.100.1/28 dev $rp1 + + ip link set dev $rp3 down + ip link set dev $rp2 down + ip link set dev $rp1 down +} + +start_mcd() +{ + SMCROUTEDIR="$(mktemp -d)" + + for ((i = 1; i <= $NUM_NETIFS; ++i)); do + echo "phyint ${NETIFS[p$i]} enable" >> \ + $SMCROUTEDIR/$table_name.conf + done + + $MCD -N -I $table_name -f $SMCROUTEDIR/$table_name.conf \ + -P $SMCROUTEDIR/$table_name.pid +} + +kill_mcd() +{ + pkill $MCD + rm -rf $SMCROUTEDIR +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + rp1=${NETIFS[p2]} + + rp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + rp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + start_mcd + + vrf_prepare + + h1_create + h2_create + h3_create + + router_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + router_destroy + + h3_destroy + h2_destroy + h1_destroy + + vrf_cleanup + + kill_mcd +} + +create_mcast_sg() +{ + local if_name=$1; shift + local s_addr=$1; shift + local mcast=$1; shift + local dest_ifs=${@} + + $MC_CLI -I $table_name add $if_name $s_addr $mcast $dest_ifs +} + +delete_mcast_sg() +{ + local if_name=$1; shift + local s_addr=$1; shift + local mcast=$1; shift + local dest_ifs=${@} + + $MC_CLI -I $table_name remove $if_name $s_addr $mcast $dest_ifs +} + +mcast_v4() +{ + # Add two interfaces to an MC group, send a packet to the MC group and + # verify packets are received on both. Then delete the route and verify + # packets are no longer received. + + RET=0 + + tc filter add dev $h2 ingress protocol ip pref 1 handle 122 flower \ + dst_ip 225.1.2.3 action drop + tc filter add dev $h3 ingress protocol ip pref 1 handle 133 flower \ + dst_ip 225.1.2.3 action drop + + create_mcast_sg $rp1 198.51.100.2 225.1.2.3 $rp2 $rp3 + + # Send frames with the corresponding L2 destination address. + $MZ $h1 -c 5 -p 128 -t udp -a 00:11:22:33:44:55 -b 01:00:5e:01:02:03 \ + -A 198.51.100.2 -B 225.1.2.3 -q + + tc_check_packets "dev $h2 ingress" 122 5 + check_err $? "Multicast not received on first host" + tc_check_packets "dev $h3 ingress" 133 5 + check_err $? "Multicast not received on second host" + + delete_mcast_sg $rp1 198.51.100.2 225.1.2.3 $rp2 $rp3 + + $MZ $h1 -c 5 -p 128 -t udp -a 00:11:22:33:44:55 -b 01:00:5e:01:02:03 \ + -A 198.51.100.2 -B 225.1.2.3 -q + + tc_check_packets "dev $h2 ingress" 122 5 + check_err $? "Multicast received on host although deleted" + tc_check_packets "dev $h3 ingress" 133 5 + check_err $? "Multicast received on second host although deleted" + + tc filter del dev $h3 ingress protocol ip pref 1 handle 133 flower + tc filter del dev $h2 ingress protocol ip pref 1 handle 122 flower + + log_test "mcast IPv4" +} + +mcast_v6() +{ + # Add two interfaces to an MC group, send a packet to the MC group and + # verify packets are received on both. Then delete the route and verify + # packets are no longer received. + + RET=0 + + tc filter add dev $h2 ingress protocol ipv6 pref 1 handle 122 flower \ + dst_ip ff0e::3 action drop + tc filter add dev $h3 ingress protocol ipv6 pref 1 handle 133 flower \ + dst_ip ff0e::3 action drop + + create_mcast_sg $rp1 2001:db8:1::2 ff0e::3 $rp2 $rp3 + + # Send frames with the corresponding L2 destination address. + $MZ $h1 -6 -c 5 -p 128 -t udp -a 00:11:22:33:44:55 \ + -b 33:33:00:00:00:03 -A 2001:db8:1::2 -B ff0e::3 -q + + tc_check_packets "dev $h2 ingress" 122 5 + check_err $? "Multicast not received on first host" + tc_check_packets "dev $h3 ingress" 133 5 + check_err $? "Multicast not received on second host" + + delete_mcast_sg $rp1 2001:db8:1::2 ff0e::3 $rp2 $rp3 + + $MZ $h1 -6 -c 5 -p 128 -t udp -a 00:11:22:33:44:55 \ + -b 33:33:00:00:00:03 -A 2001:db8:1::2 -B ff0e::3 -q + + tc_check_packets "dev $h2 ingress" 122 5 + check_err $? "Multicast received on first host although deleted" + tc_check_packets "dev $h3 ingress" 133 5 + check_err $? "Multicast received on second host although deleted" + + tc filter del dev $h3 ingress protocol ipv6 pref 1 handle 133 flower + tc filter del dev $h2 ingress protocol ipv6 pref 1 handle 122 flower + + log_test "mcast IPv6" +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/router_vid_1.sh b/tools/testing/selftests/net/forwarding/router_vid_1.sh new file mode 100755 index 000000000000..a7306c7ac06d --- /dev/null +++ b/tools/testing/selftests/net/forwarding/router_vid_1.sh @@ -0,0 +1,135 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS="ping_ipv4 ping_ipv6" +NUM_NETIFS=4 +source lib.sh + +h1_create() +{ + vrf_create "vrf-h1" + ip link set dev vrf-h1 up + + ip link set dev $h1 up + vlan_create $h1 1 vrf-h1 192.0.2.2/24 2001:db8:1::2/64 + + ip route add 198.51.100.0/24 vrf vrf-h1 nexthop via 192.0.2.1 + ip route add 2001:db8:2::/64 vrf vrf-h1 nexthop via 2001:db8:1::1 +} + +h1_destroy() +{ + ip route del 2001:db8:2::/64 vrf vrf-h1 + ip route del 198.51.100.0/24 vrf vrf-h1 + + vlan_destroy $h1 1 + ip link set dev $h1 down + + ip link set dev vrf-h1 down + vrf_destroy "vrf-h1" +} + +h2_create() +{ + vrf_create "vrf-h2" + ip link set dev vrf-h2 up + + ip link set dev $h2 up + vlan_create $h2 1 vrf-h2 198.51.100.2/24 2001:db8:2::2/64 + + ip route add 192.0.2.0/24 vrf vrf-h2 nexthop via 198.51.100.1 + ip route add 2001:db8:1::/64 vrf vrf-h2 nexthop via 2001:db8:2::1 +} + +h2_destroy() +{ + ip route del 2001:db8:1::/64 vrf vrf-h2 + ip route del 192.0.2.0/24 vrf vrf-h2 + + vlan_destroy $h2 1 + ip link set dev $h2 down + + ip link set dev vrf-h2 down + vrf_destroy "vrf-h2" +} + +router_create() +{ + ip link set dev $rp1 up + ip link add link $rp1 name $rp1.1 up type vlan id 1 + + ip address add 192.0.2.1/24 dev $rp1.1 + ip address add 2001:db8:1::1/64 dev $rp1.1 + + ip link set dev $rp2 up + ip link add link $rp2 name $rp2.1 up type vlan id 1 + + ip address add 198.51.100.1/24 dev $rp2.1 + ip address add 2001:db8:2::1/64 dev $rp2.1 +} + +router_destroy() +{ + ip address del 2001:db8:2::1/64 dev $rp2.1 + ip address del 198.51.100.1/24 dev $rp2.1 + + ip link del dev $rp2.1 + ip link set dev $rp2 down + + ip address del 2001:db8:1::1/64 dev $rp1.1 + ip address del 192.0.2.1/24 dev $rp1.1 + + ip link del dev $rp1.1 + ip link set dev $rp1 down +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + rp1=${NETIFS[p2]} + + rp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + + h1_create + h2_create + + router_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + router_destroy + + h2_destroy + h1_destroy + + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1.1 198.51.100.2 +} + +ping_ipv6() +{ + ping6_test $h1.1 2001:db8:2::2 +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh new file mode 100755 index 000000000000..bb10e33690b2 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh @@ -0,0 +1,786 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +--------------------+ +----------------------+ +# | H1 (vrf) | | H2 (vrf) | +# | + $h1 | | + $h2 | +# | | 192.0.2.1/28 | | | 192.0.2.2/28 | +# +----|---------------+ +--|-------------------+ +# | | +# +----|--------------------------------------------------|-------------------+ +# | SW | | | +# | +--|--------------------------------------------------|-----------------+ | +# | | + $swp1 BR1 (802.1d) + $swp2 | | +# | | | | +# | | + vx1 (vxlan) | | +# | | local 192.0.2.17 | | +# | | remote 192.0.2.34 192.0.2.50 | | +# | | id 1000 dstport $VXPORT | | +# | +-----------------------------------------------------------------------+ | +# | | +# | 192.0.2.32/28 via 192.0.2.18 | +# | 192.0.2.48/28 via 192.0.2.18 | +# | | +# | + $rp1 | +# | | 192.0.2.17/28 | +# +----|----------------------------------------------------------------------+ +# | +# +----|--------------------------------------------------------+ +# | | VRP2 (vrf) | +# | + $rp2 | +# | 192.0.2.18/28 | +# | | (maybe) HW +# ============================================================================= +# | | (likely) SW +# | + v1 (veth) + v3 (veth) | +# | | 192.0.2.33/28 | 192.0.2.49/28 | +# +----|---------------------------------------|----------------+ +# | | +# +----|------------------------------+ +----|------------------------------+ +# | + v2 (veth) NS1 (netns) | | + v4 (veth) NS2 (netns) | +# | 192.0.2.34/28 | | 192.0.2.50/28 | +# | | | | +# | 192.0.2.16/28 via 192.0.2.33 | | 192.0.2.16/28 via 192.0.2.49 | +# | 192.0.2.50/32 via 192.0.2.33 | | 192.0.2.34/32 via 192.0.2.49 | +# | | | | +# | +-------------------------------+ | | +-------------------------------+ | +# | | BR2 (802.1d) | | | | BR2 (802.1d) | | +# | | + vx2 (vxlan) | | | | + vx2 (vxlan) | | +# | | local 192.0.2.34 | | | | local 192.0.2.50 | | +# | | remote 192.0.2.17 | | | | remote 192.0.2.17 | | +# | | remote 192.0.2.50 | | | | remote 192.0.2.34 | | +# | | id 1000 dstport $VXPORT | | | | id 1000 dstport $VXPORT | | +# | | | | | | | | +# | | + w1 (veth) | | | | + w1 (veth) | | +# | +--|----------------------------+ | | +--|----------------------------+ | +# | | | | | | +# | +--|----------------------------+ | | +--|----------------------------+ | +# | | | VW2 (vrf) | | | | | VW2 (vrf) | | +# | | + w2 (veth) | | | | + w2 (veth) | | +# | | 192.0.2.3/28 | | | | 192.0.2.4/28 | | +# | +-------------------------------+ | | +-------------------------------+ | +# +-----------------------------------+ +-----------------------------------+ + +: ${VXPORT:=4789} +export VXPORT + +: ${ALL_TESTS:=" + ping_ipv4 + test_flood + test_unicast + test_ttl + test_tos + test_ecn_encap + test_ecn_decap + reapply_config + ping_ipv4 + test_flood + test_unicast + test_learning + "} + +NUM_NETIFS=6 +source lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/28 + tc qdisc add dev $h1 clsact +} + +h1_destroy() +{ + tc qdisc del dev $h1 clsact + simple_if_fini $h1 192.0.2.1/28 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.2/28 + tc qdisc add dev $h2 clsact +} + +h2_destroy() +{ + tc qdisc del dev $h2 clsact + simple_if_fini $h2 192.0.2.2/28 +} + +rp1_set_addr() +{ + ip address add dev $rp1 192.0.2.17/28 + + ip route add 192.0.2.32/28 nexthop via 192.0.2.18 + ip route add 192.0.2.48/28 nexthop via 192.0.2.18 +} + +rp1_unset_addr() +{ + ip route del 192.0.2.48/28 nexthop via 192.0.2.18 + ip route del 192.0.2.32/28 nexthop via 192.0.2.18 + + ip address del dev $rp1 192.0.2.17/28 +} + +switch_create() +{ + ip link add name br1 type bridge vlan_filtering 0 mcast_snooping 0 + # Make sure the bridge uses the MAC address of the local port and not + # that of the VxLAN's device. + ip link set dev br1 address $(mac_get $swp1) + ip link set dev br1 up + + ip link set dev $rp1 up + rp1_set_addr + + ip link add name vx1 type vxlan id 1000 \ + local 192.0.2.17 dstport "$VXPORT" \ + nolearning noudpcsum tos inherit ttl 100 + ip link set dev vx1 up + + ip link set dev vx1 master br1 + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + + ip link set dev $swp2 master br1 + ip link set dev $swp2 up + + bridge fdb append dev vx1 00:00:00:00:00:00 dst 192.0.2.34 self + bridge fdb append dev vx1 00:00:00:00:00:00 dst 192.0.2.50 self +} + +switch_destroy() +{ + rp1_unset_addr + ip link set dev $rp1 down + + bridge fdb del dev vx1 00:00:00:00:00:00 dst 192.0.2.50 self + bridge fdb del dev vx1 00:00:00:00:00:00 dst 192.0.2.34 self + + ip link set dev vx1 nomaster + ip link set dev vx1 down + ip link del dev vx1 + + ip link set dev $swp2 down + ip link set dev $swp2 nomaster + + ip link set dev $swp1 down + ip link set dev $swp1 nomaster + + ip link set dev br1 down + ip link del dev br1 +} + +vrp2_create() +{ + simple_if_init $rp2 192.0.2.18/28 + __simple_if_init v1 v$rp2 192.0.2.33/28 + __simple_if_init v3 v$rp2 192.0.2.49/28 + tc qdisc add dev v1 clsact +} + +vrp2_destroy() +{ + tc qdisc del dev v1 clsact + __simple_if_fini v3 192.0.2.49/28 + __simple_if_fini v1 192.0.2.33/28 + simple_if_fini $rp2 192.0.2.18/28 +} + +ns_init_common() +{ + local in_if=$1; shift + local in_addr=$1; shift + local other_in_addr=$1; shift + local nh_addr=$1; shift + local host_addr=$1; shift + + ip link set dev $in_if up + ip address add dev $in_if $in_addr/28 + tc qdisc add dev $in_if clsact + + ip link add name br2 type bridge vlan_filtering 0 + ip link set dev br2 up + + ip link add name w1 type veth peer name w2 + + ip link set dev w1 master br2 + ip link set dev w1 up + + ip link add name vx2 type vxlan id 1000 local $in_addr dstport "$VXPORT" + ip link set dev vx2 up + bridge fdb append dev vx2 00:00:00:00:00:00 dst 192.0.2.17 self + bridge fdb append dev vx2 00:00:00:00:00:00 dst $other_in_addr self + + ip link set dev vx2 master br2 + tc qdisc add dev vx2 clsact + + simple_if_init w2 $host_addr/28 + + ip route add 192.0.2.16/28 nexthop via $nh_addr + ip route add $other_in_addr/32 nexthop via $nh_addr +} +export -f ns_init_common + +ns1_create() +{ + ip netns add ns1 + ip link set dev v2 netns ns1 + in_ns ns1 \ + ns_init_common v2 192.0.2.34 192.0.2.50 192.0.2.33 192.0.2.3 +} + +ns1_destroy() +{ + ip netns exec ns1 ip link set dev v2 netns 1 + ip netns del ns1 +} + +ns2_create() +{ + ip netns add ns2 + ip link set dev v4 netns ns2 + in_ns ns2 \ + ns_init_common v4 192.0.2.50 192.0.2.34 192.0.2.49 192.0.2.4 +} + +ns2_destroy() +{ + ip netns exec ns2 ip link set dev v4 netns 1 + ip netns del ns2 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + rp1=${NETIFS[p5]} + rp2=${NETIFS[p6]} + + vrf_prepare + forwarding_enable + + h1_create + h2_create + switch_create + + ip link add name v1 type veth peer name v2 + ip link add name v3 type veth peer name v4 + vrp2_create + ns1_create + ns2_create + + r1_mac=$(in_ns ns1 mac_get w2) + r2_mac=$(in_ns ns2 mac_get w2) + h2_mac=$(mac_get $h2) +} + +cleanup() +{ + pre_cleanup + + ns2_destroy + ns1_destroy + vrp2_destroy + ip link del dev v3 + ip link del dev v1 + + switch_destroy + h2_destroy + h1_destroy + + forwarding_restore + vrf_cleanup +} + +# For the first round of tests, vx1 is the first device to get attached to the +# bridge, and that at the point that the local IP is already configured. Try the +# other scenario of attaching the device to an already-offloaded bridge, and +# only then attach the local IP. +reapply_config() +{ + echo "Reapplying configuration" + + bridge fdb del dev vx1 00:00:00:00:00:00 dst 192.0.2.50 self + bridge fdb del dev vx1 00:00:00:00:00:00 dst 192.0.2.34 self + rp1_unset_addr + ip link set dev vx1 nomaster + sleep 5 + + ip link set dev vx1 master br1 + bridge fdb append dev vx1 00:00:00:00:00:00 dst 192.0.2.34 self + bridge fdb append dev vx1 00:00:00:00:00:00 dst 192.0.2.50 self + sleep 1 + rp1_set_addr + sleep 5 +} + +ping_ipv4() +{ + ping_test $h1 192.0.2.2 ": local->local" + ping_test $h1 192.0.2.3 ": local->remote 1" + ping_test $h1 192.0.2.4 ": local->remote 2" +} + +maybe_in_ns() +{ + echo ${1:+in_ns} $1 +} + +__flood_counter_add_del() +{ + local add_del=$1; shift + local dev=$1; shift + local ns=$1; shift + + # Putting the ICMP capture both to HW and to SW will end up + # double-counting the packets that are trapped to slow path, such as for + # the unicast test. Adding either skip_hw or skip_sw fixes this problem, + # but with skip_hw, the flooded packets are not counted at all, because + # those are dropped due to MAC address mismatch; and skip_sw is a no-go + # for veth-based topologies. + # + # So try to install with skip_sw and fall back to skip_sw if that fails. + + $(maybe_in_ns $ns) __icmp_capture_add_del \ + $add_del 100 "" $dev skip_sw 2>/dev/null || \ + $(maybe_in_ns $ns) __icmp_capture_add_del \ + $add_del 100 "" $dev skip_hw +} + +flood_counter_install() +{ + __flood_counter_add_del add "$@" +} + +flood_counter_uninstall() +{ + __flood_counter_add_del del "$@" +} + +flood_fetch_stat() +{ + local dev=$1; shift + local ns=$1; shift + + $(maybe_in_ns $ns) tc_rule_stats_get $dev 100 ingress +} + +flood_fetch_stats() +{ + local counters=("${@}") + local counter + + for counter in "${counters[@]}"; do + flood_fetch_stat $counter + done +} + +vxlan_flood_test() +{ + local mac=$1; shift + local dst=$1; shift + local -a expects=("${@}") + + local -a counters=($h2 "vx2 ns1" "vx2 ns2") + local counter + local key + + for counter in "${counters[@]}"; do + flood_counter_install $counter + done + + local -a t0s=($(flood_fetch_stats "${counters[@]}")) + $MZ $h1 -c 10 -d 100msec -p 64 -b $mac -B $dst -t icmp -q + sleep 1 + local -a t1s=($(flood_fetch_stats "${counters[@]}")) + + for key in ${!t0s[@]}; do + local delta=$((t1s[$key] - t0s[$key])) + local expect=${expects[$key]} + + ((expect == delta)) + check_err $? "${counters[$key]}: Expected to capture $expect packets, got $delta." + done + + for counter in "${counters[@]}"; do + flood_counter_uninstall $counter + done +} + +__test_flood() +{ + local mac=$1; shift + local dst=$1; shift + local what=$1; shift + + RET=0 + + vxlan_flood_test $mac $dst 10 10 10 + + log_test "VXLAN: $what" +} + +test_flood() +{ + __test_flood de:ad:be:ef:13:37 192.0.2.100 "flood" +} + +vxlan_fdb_add_del() +{ + local add_del=$1; shift + local mac=$1; shift + local dev=$1; shift + local dst=$1; shift + + bridge fdb $add_del dev $dev $mac self static permanent \ + ${dst:+dst} $dst 2>/dev/null + bridge fdb $add_del dev $dev $mac master static 2>/dev/null +} + +__test_unicast() +{ + local mac=$1; shift + local dst=$1; shift + local hit_idx=$1; shift + local what=$1; shift + + RET=0 + + local -a expects=(0 0 0) + expects[$hit_idx]=10 + + vxlan_flood_test $mac $dst "${expects[@]}" + + log_test "VXLAN: $what" +} + +test_unicast() +{ + local -a targets=("$h2_mac $h2" + "$r1_mac vx1 192.0.2.34" + "$r2_mac vx1 192.0.2.50") + local target + + for target in "${targets[@]}"; do + vxlan_fdb_add_del add $target + done + + __test_unicast $h2_mac 192.0.2.2 0 "local MAC unicast" + __test_unicast $r1_mac 192.0.2.3 1 "remote MAC 1 unicast" + __test_unicast $r2_mac 192.0.2.4 2 "remote MAC 2 unicast" + + for target in "${targets[@]}"; do + vxlan_fdb_add_del del $target + done +} + +vxlan_ping_test() +{ + local ping_dev=$1; shift + local ping_dip=$1; shift + local ping_args=$1; shift + local capture_dev=$1; shift + local capture_dir=$1; shift + local capture_pref=$1; shift + local expect=$1; shift + + local t0=$(tc_rule_stats_get $capture_dev $capture_pref $capture_dir) + ping_do $ping_dev $ping_dip "$ping_args" + local t1=$(tc_rule_stats_get $capture_dev $capture_pref $capture_dir) + local delta=$((t1 - t0)) + + # Tolerate a couple stray extra packets. + ((expect <= delta && delta <= expect + 2)) + check_err $? "$capture_dev: Expected to capture $expect packets, got $delta." +} + +test_ttl() +{ + RET=0 + + tc filter add dev v1 egress pref 77 prot ip \ + flower ip_ttl 99 action pass + vxlan_ping_test $h1 192.0.2.3 "" v1 egress 77 10 + tc filter del dev v1 egress pref 77 prot ip + + log_test "VXLAN: envelope TTL" +} + +test_tos() +{ + RET=0 + + tc filter add dev v1 egress pref 77 prot ip \ + flower ip_tos 0x40 action pass + vxlan_ping_test $h1 192.0.2.3 "-Q 0x40" v1 egress 77 10 + vxlan_ping_test $h1 192.0.2.3 "-Q 0x30" v1 egress 77 0 + tc filter del dev v1 egress pref 77 prot ip + + log_test "VXLAN: envelope TOS inheritance" +} + +__test_ecn_encap() +{ + local q=$1; shift + local tos=$1; shift + + RET=0 + + tc filter add dev v1 egress pref 77 prot ip \ + flower ip_tos $tos action pass + sleep 1 + vxlan_ping_test $h1 192.0.2.3 "-Q $q" v1 egress 77 10 + tc filter del dev v1 egress pref 77 prot ip + + log_test "VXLAN: ECN encap: $q->$tos" +} + +test_ecn_encap() +{ + # In accordance with INET_ECN_encapsulate() + __test_ecn_encap 0x00 0x00 + __test_ecn_encap 0x01 0x01 + __test_ecn_encap 0x02 0x02 + __test_ecn_encap 0x03 0x02 +} + +vxlan_encapped_ping_do() +{ + local count=$1; shift + local dev=$1; shift + local next_hop_mac=$1; shift + local dest_ip=$1; shift + local dest_mac=$1; shift + local inner_tos=$1; shift + local outer_tos=$1; shift + + $MZ $dev -c $count -d 100msec -q \ + -b $next_hop_mac -B $dest_ip \ + -t udp tos=$outer_tos,sp=23456,dp=$VXPORT,p=$(: + )"08:"$( : VXLAN flags + )"00:00:00:"$( : VXLAN reserved + )"00:03:e8:"$( : VXLAN VNI + )"00:"$( : VXLAN reserved + )"$dest_mac:"$( : ETH daddr + )"$(mac_get w2):"$( : ETH saddr + )"08:00:"$( : ETH type + )"45:"$( : IP version + IHL + )"$inner_tos:"$( : IP TOS + )"00:54:"$( : IP total length + )"99:83:"$( : IP identification + )"40:00:"$( : IP flags + frag off + )"40:"$( : IP TTL + )"01:"$( : IP proto + )"00:00:"$( : IP header csum + )"c0:00:02:03:"$( : IP saddr: 192.0.2.3 + )"c0:00:02:01:"$( : IP daddr: 192.0.2.1 + )"08:"$( : ICMP type + )"00:"$( : ICMP code + )"8b:f2:"$( : ICMP csum + )"1f:6a:"$( : ICMP request identifier + )"00:01:"$( : ICMP request sequence number + )"4f:ff:c5:5b:00:00:00:00:"$( : ICMP payload + )"6d:74:0b:00:00:00:00:00:"$( : + )"10:11:12:13:14:15:16:17:"$( : + )"18:19:1a:1b:1c:1d:1e:1f:"$( : + )"20:21:22:23:24:25:26:27:"$( : + )"28:29:2a:2b:2c:2d:2e:2f:"$( : + )"30:31:32:33:34:35:36:37" +} +export -f vxlan_encapped_ping_do + +vxlan_encapped_ping_test() +{ + local ping_dev=$1; shift + local nh_dev=$1; shift + local ping_dip=$1; shift + local inner_tos=$1; shift + local outer_tos=$1; shift + local stat_get=$1; shift + local expect=$1; shift + + local t0=$($stat_get) + + in_ns ns1 \ + vxlan_encapped_ping_do 10 $ping_dev $(mac_get $nh_dev) \ + $ping_dip $(mac_get $h1) \ + $inner_tos $outer_tos + + local t1=$($stat_get) + local delta=$((t1 - t0)) + + # Tolerate a couple stray extra packets. + ((expect <= delta && delta <= expect + 2)) + check_err $? "Expected to capture $expect packets, got $delta." +} +export -f vxlan_encapped_ping_test + +__test_ecn_decap() +{ + local orig_inner_tos=$1; shift + local orig_outer_tos=$1; shift + local decapped_tos=$1; shift + + RET=0 + + tc filter add dev $h1 ingress pref 77 prot ip \ + flower ip_tos $decapped_tos action drop + sleep 1 + vxlan_encapped_ping_test v2 v1 192.0.2.17 \ + $orig_inner_tos $orig_outer_tos \ + "tc_rule_stats_get $h1 77 ingress" 10 + tc filter del dev $h1 ingress pref 77 + + log_test "VXLAN: ECN decap: $orig_outer_tos/$orig_inner_tos->$decapped_tos" +} + +test_ecn_decap_error() +{ + local orig_inner_tos=00 + local orig_outer_tos=03 + + RET=0 + + vxlan_encapped_ping_test v2 v1 192.0.2.17 \ + $orig_inner_tos $orig_outer_tos \ + "link_stats_rx_errors_get vx1" 10 + + log_test "VXLAN: ECN decap: $orig_outer_tos/$orig_inner_tos->error" +} + +test_ecn_decap() +{ + # In accordance with INET_ECN_decapsulate() + __test_ecn_decap 00 00 0x00 + __test_ecn_decap 01 01 0x01 + __test_ecn_decap 02 01 0x02 + __test_ecn_decap 01 03 0x03 + __test_ecn_decap 02 03 0x03 + test_ecn_decap_error +} + +test_learning() +{ + local mac=de:ad:be:ef:13:37 + local dst=192.0.2.100 + + # Enable learning on the VxLAN device and set ageing time to 10 seconds + ip link set dev br1 type bridge ageing_time 1000 + ip link set dev vx1 type vxlan ageing 10 + ip link set dev vx1 type vxlan learning + reapply_config + + # Check that flooding works + RET=0 + + vxlan_flood_test $mac $dst 10 10 10 + + log_test "VXLAN: flood before learning" + + # Send a packet with source mac set to $mac from host w2 and check that + # a corresponding entry is created in VxLAN device vx1 + RET=0 + + in_ns ns1 $MZ w2 -c 1 -p 64 -a $mac -b ff:ff:ff:ff:ff:ff -B $dst \ + -t icmp -q + sleep 1 + + bridge fdb show brport vx1 | grep $mac | grep -q self + check_err $? + bridge fdb show brport vx1 | grep $mac | grep -q -v self + check_err $? + + log_test "VXLAN: show learned FDB entry" + + # Repeat first test and check that packets only reach host w2 in ns1 + RET=0 + + vxlan_flood_test $mac $dst 0 10 0 + + log_test "VXLAN: learned FDB entry" + + # Delete the learned FDB entry from the VxLAN and bridge devices and + # check that packets are flooded + RET=0 + + bridge fdb del dev vx1 $mac master self + sleep 1 + + vxlan_flood_test $mac $dst 10 10 10 + + log_test "VXLAN: deletion of learned FDB entry" + + # Re-learn the first FDB entry and check that it is correctly aged-out + RET=0 + + in_ns ns1 $MZ w2 -c 1 -p 64 -a $mac -b ff:ff:ff:ff:ff:ff -B $dst \ + -t icmp -q + sleep 1 + + bridge fdb show brport vx1 | grep $mac | grep -q self + check_err $? + bridge fdb show brport vx1 | grep $mac | grep -q -v self + check_err $? + + vxlan_flood_test $mac $dst 0 10 0 + + sleep 20 + + bridge fdb show brport vx1 | grep $mac | grep -q self + check_fail $? + bridge fdb show brport vx1 | grep $mac | grep -q -v self + check_fail $? + + vxlan_flood_test $mac $dst 10 10 10 + + log_test "VXLAN: Ageing of learned FDB entry" + + # Toggle learning on the bridge port and check that the bridge's FDB + # is populated only when it should + RET=0 + + ip link set dev vx1 type bridge_slave learning off + + in_ns ns1 $MZ w2 -c 1 -p 64 -a $mac -b ff:ff:ff:ff:ff:ff -B $dst \ + -t icmp -q + sleep 1 + + bridge fdb show brport vx1 | grep $mac | grep -q -v self + check_fail $? + + ip link set dev vx1 type bridge_slave learning on + + in_ns ns1 $MZ w2 -c 1 -p 64 -a $mac -b ff:ff:ff:ff:ff:ff -B $dst \ + -t icmp -q + sleep 1 + + bridge fdb show brport vx1 | grep $mac | grep -q -v self + check_err $? + + log_test "VXLAN: learning toggling on bridge port" + + # Restore previous settings + ip link set dev vx1 type vxlan nolearning + ip link set dev vx1 type vxlan ageing 300 + ip link set dev br1 type bridge ageing_time 30000 + reapply_config +} + +test_all() +{ + echo "Running tests with UDP port $VXPORT" + tests_run +} + +trap cleanup EXIT + +setup_prepare +setup_wait +test_all + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_port_8472.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_port_8472.sh new file mode 100755 index 000000000000..3bf3da69195f --- /dev/null +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_port_8472.sh @@ -0,0 +1,10 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# A wrapper to run VXLAN tests with an unusual port number. + +VXPORT=8472 +ALL_TESTS=" + ping_ipv4 +" +source vxlan_bridge_1d.sh diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh new file mode 100755 index 000000000000..a5789721ba92 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh @@ -0,0 +1,860 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +-----------------------+ +------------------------+ +# | H1 (vrf) | | H2 (vrf) | +# | + $h1.10 | | + $h2.10 | +# | | 192.0.2.1/28 | | | 192.0.2.2/28 | +# | | | | | | +# | | + $h1.20 | | | + $h2.20 | +# | \ | 198.51.100.1/24 | | \ | 198.51.100.2/24 | +# | \| | | \| | +# | + $h1 | | + $h2 | +# +----|------------------+ +----|-------------------+ +# | | +# +----|--------------------------------------------------|-------------------+ +# | SW | | | +# | +--|--------------------------------------------------|-----------------+ | +# | | + $swp1 BR1 (802.1q) + $swp2 | | +# | | vid 10 vid 10 | | +# | | vid 20 vid 20 | | +# | | | | +# | | + vx10 (vxlan) + vx20 (vxlan) | | +# | | local 192.0.2.17 local 192.0.2.17 | | +# | | remote 192.0.2.34 192.0.2.50 remote 192.0.2.34 192.0.2.50 | | +# | | id 1000 dstport $VXPORT id 2000 dstport $VXPORT | | +# | | vid 10 pvid untagged vid 20 pvid untagged | | +# | +-----------------------------------------------------------------------+ | +# | | +# | 192.0.2.32/28 via 192.0.2.18 | +# | 192.0.2.48/28 via 192.0.2.18 | +# | | +# | + $rp1 | +# | | 192.0.2.17/28 | +# +----|----------------------------------------------------------------------+ +# | +# +----|--------------------------------------------------------+ +# | | VRP2 (vrf) | +# | + $rp2 | +# | 192.0.2.18/28 | +# | | (maybe) HW +# ============================================================================= +# | | (likely) SW +# | + v1 (veth) + v3 (veth) | +# | | 192.0.2.33/28 | 192.0.2.49/28 | +# +----|---------------------------------------|----------------+ +# | | +# +----|------------------------------+ +----|------------------------------+ +# | + v2 (veth) NS1 (netns) | | + v4 (veth) NS2 (netns) | +# | 192.0.2.34/28 | | 192.0.2.50/28 | +# | | | | +# | 192.0.2.16/28 via 192.0.2.33 | | 192.0.2.16/28 via 192.0.2.49 | +# | 192.0.2.50/32 via 192.0.2.33 | | 192.0.2.34/32 via 192.0.2.49 | +# | | | | +# | +-------------------------------+ | | +-------------------------------+ | +# | | BR2 (802.1q) | | | | BR2 (802.1q) | | +# | | + vx10 (vxlan) | | | | + vx10 (vxlan) | | +# | | local 192.0.2.34 | | | | local 192.0.2.50 | | +# | | remote 192.0.2.17 | | | | remote 192.0.2.17 | | +# | | remote 192.0.2.50 | | | | remote 192.0.2.34 | | +# | | id 1000 dstport $VXPORT | | | | id 1000 dstport $VXPORT | | +# | | vid 10 pvid untagged | | | | vid 10 pvid untagged | | +# | | | | | | | | +# | | + vx20 (vxlan) | | | | + vx20 (vxlan) | | +# | | local 192.0.2.34 | | | | local 192.0.2.50 | | +# | | remote 192.0.2.17 | | | | remote 192.0.2.17 | | +# | | remote 192.0.2.50 | | | | remote 192.0.2.34 | | +# | | id 2000 dstport $VXPORT | | | | id 2000 dstport $VXPORT | | +# | | vid 20 pvid untagged | | | | vid 20 pvid untagged | | +# | | | | | | | | +# | | + w1 (veth) | | | | + w1 (veth) | | +# | | | vid 10 | | | | | vid 10 | | +# | | | vid 20 | | | | | vid 20 | | +# | +--|----------------------------+ | | +--|----------------------------+ | +# | | | | | | +# | +--|----------------------------+ | | +--|----------------------------+ | +# | | + w2 (veth) VW2 (vrf) | | | | + w2 (veth) VW2 (vrf) | | +# | | |\ | | | | |\ | | +# | | | + w2.10 | | | | | + w2.10 | | +# | | | 192.0.2.3/28 | | | | | 192.0.2.4/28 | | +# | | | | | | | | | | +# | | + w2.20 | | | | + w2.20 | | +# | | 198.51.100.3/24 | | | | 198.51.100.4/24 | | +# | +-------------------------------+ | | +-------------------------------+ | +# +-----------------------------------+ +-----------------------------------+ + +: ${VXPORT:=4789} +export VXPORT + +: ${ALL_TESTS:=" + ping_ipv4 + test_flood + test_unicast + reapply_config + ping_ipv4 + test_flood + test_unicast + test_learning + test_pvid + "} + +NUM_NETIFS=6 +source lib.sh + +h1_create() +{ + simple_if_init $h1 + tc qdisc add dev $h1 clsact + vlan_create $h1 10 v$h1 192.0.2.1/28 + vlan_create $h1 20 v$h1 198.51.100.1/24 +} + +h1_destroy() +{ + vlan_destroy $h1 20 + vlan_destroy $h1 10 + tc qdisc del dev $h1 clsact + simple_if_fini $h1 +} + +h2_create() +{ + simple_if_init $h2 + tc qdisc add dev $h2 clsact + vlan_create $h2 10 v$h2 192.0.2.2/28 + vlan_create $h2 20 v$h2 198.51.100.2/24 +} + +h2_destroy() +{ + vlan_destroy $h2 20 + vlan_destroy $h2 10 + tc qdisc del dev $h2 clsact + simple_if_fini $h2 +} + +rp1_set_addr() +{ + ip address add dev $rp1 192.0.2.17/28 + + ip route add 192.0.2.32/28 nexthop via 192.0.2.18 + ip route add 192.0.2.48/28 nexthop via 192.0.2.18 +} + +rp1_unset_addr() +{ + ip route del 192.0.2.48/28 nexthop via 192.0.2.18 + ip route del 192.0.2.32/28 nexthop via 192.0.2.18 + + ip address del dev $rp1 192.0.2.17/28 +} + +switch_create() +{ + ip link add name br1 type bridge vlan_filtering 1 vlan_default_pvid 0 \ + mcast_snooping 0 + # Make sure the bridge uses the MAC address of the local port and not + # that of the VxLAN's device. + ip link set dev br1 address $(mac_get $swp1) + ip link set dev br1 up + + ip link set dev $rp1 up + rp1_set_addr + + ip link add name vx10 type vxlan id 1000 \ + local 192.0.2.17 dstport "$VXPORT" \ + nolearning noudpcsum tos inherit ttl 100 + ip link set dev vx10 up + + ip link set dev vx10 master br1 + bridge vlan add vid 10 dev vx10 pvid untagged + + ip link add name vx20 type vxlan id 2000 \ + local 192.0.2.17 dstport "$VXPORT" \ + nolearning noudpcsum tos inherit ttl 100 + ip link set dev vx20 up + + ip link set dev vx20 master br1 + bridge vlan add vid 20 dev vx20 pvid untagged + + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + bridge vlan add vid 10 dev $swp1 + bridge vlan add vid 20 dev $swp1 + + ip link set dev $swp2 master br1 + ip link set dev $swp2 up + bridge vlan add vid 10 dev $swp2 + bridge vlan add vid 20 dev $swp2 + + bridge fdb append dev vx10 00:00:00:00:00:00 dst 192.0.2.34 self + bridge fdb append dev vx10 00:00:00:00:00:00 dst 192.0.2.50 self + + bridge fdb append dev vx20 00:00:00:00:00:00 dst 192.0.2.34 self + bridge fdb append dev vx20 00:00:00:00:00:00 dst 192.0.2.50 self +} + +switch_destroy() +{ + bridge fdb del dev vx20 00:00:00:00:00:00 dst 192.0.2.50 self + bridge fdb del dev vx20 00:00:00:00:00:00 dst 192.0.2.34 self + + bridge fdb del dev vx10 00:00:00:00:00:00 dst 192.0.2.50 self + bridge fdb del dev vx10 00:00:00:00:00:00 dst 192.0.2.34 self + + bridge vlan del vid 20 dev $swp2 + bridge vlan del vid 10 dev $swp2 + ip link set dev $swp2 down + ip link set dev $swp2 nomaster + + bridge vlan del vid 20 dev $swp1 + bridge vlan del vid 10 dev $swp1 + ip link set dev $swp1 down + ip link set dev $swp1 nomaster + + bridge vlan del vid 20 dev vx20 + ip link set dev vx20 nomaster + + ip link set dev vx20 down + ip link del dev vx20 + + bridge vlan del vid 10 dev vx10 + ip link set dev vx10 nomaster + + ip link set dev vx10 down + ip link del dev vx10 + + rp1_unset_addr + ip link set dev $rp1 down + + ip link set dev br1 down + ip link del dev br1 +} + +vrp2_create() +{ + simple_if_init $rp2 192.0.2.18/28 + __simple_if_init v1 v$rp2 192.0.2.33/28 + __simple_if_init v3 v$rp2 192.0.2.49/28 + tc qdisc add dev v1 clsact +} + +vrp2_destroy() +{ + tc qdisc del dev v1 clsact + __simple_if_fini v3 192.0.2.49/28 + __simple_if_fini v1 192.0.2.33/28 + simple_if_fini $rp2 192.0.2.18/28 +} + +ns_init_common() +{ + local in_if=$1; shift + local in_addr=$1; shift + local other_in_addr=$1; shift + local nh_addr=$1; shift + local host_addr1=$1; shift + local host_addr2=$1; shift + + ip link set dev $in_if up + ip address add dev $in_if $in_addr/28 + tc qdisc add dev $in_if clsact + + ip link add name br2 type bridge vlan_filtering 1 vlan_default_pvid 0 + ip link set dev br2 up + + ip link add name w1 type veth peer name w2 + + ip link set dev w1 master br2 + ip link set dev w1 up + + bridge vlan add vid 10 dev w1 + bridge vlan add vid 20 dev w1 + + ip link add name vx10 type vxlan id 1000 local $in_addr \ + dstport "$VXPORT" + ip link set dev vx10 up + bridge fdb append dev vx10 00:00:00:00:00:00 dst 192.0.2.17 self + bridge fdb append dev vx10 00:00:00:00:00:00 dst $other_in_addr self + + ip link set dev vx10 master br2 + tc qdisc add dev vx10 clsact + + bridge vlan add vid 10 dev vx10 pvid untagged + + ip link add name vx20 type vxlan id 2000 local $in_addr \ + dstport "$VXPORT" + ip link set dev vx20 up + bridge fdb append dev vx20 00:00:00:00:00:00 dst 192.0.2.17 self + bridge fdb append dev vx20 00:00:00:00:00:00 dst $other_in_addr self + + ip link set dev vx20 master br2 + tc qdisc add dev vx20 clsact + + bridge vlan add vid 20 dev vx20 pvid untagged + + simple_if_init w2 + vlan_create w2 10 vw2 $host_addr1/28 + vlan_create w2 20 vw2 $host_addr2/24 + + ip route add 192.0.2.16/28 nexthop via $nh_addr + ip route add $other_in_addr/32 nexthop via $nh_addr +} +export -f ns_init_common + +ns1_create() +{ + ip netns add ns1 + ip link set dev v2 netns ns1 + in_ns ns1 \ + ns_init_common v2 192.0.2.34 192.0.2.50 192.0.2.33 192.0.2.3 \ + 198.51.100.3 +} + +ns1_destroy() +{ + ip netns exec ns1 ip link set dev v2 netns 1 + ip netns del ns1 +} + +ns2_create() +{ + ip netns add ns2 + ip link set dev v4 netns ns2 + in_ns ns2 \ + ns_init_common v4 192.0.2.50 192.0.2.34 192.0.2.49 192.0.2.4 \ + 198.51.100.4 +} + +ns2_destroy() +{ + ip netns exec ns2 ip link set dev v4 netns 1 + ip netns del ns2 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + rp1=${NETIFS[p5]} + rp2=${NETIFS[p6]} + + vrf_prepare + forwarding_enable + + h1_create + h2_create + switch_create + + ip link add name v1 type veth peer name v2 + ip link add name v3 type veth peer name v4 + vrp2_create + ns1_create + ns2_create + + r1_mac=$(in_ns ns1 mac_get w2) + r2_mac=$(in_ns ns2 mac_get w2) + h2_mac=$(mac_get $h2) +} + +cleanup() +{ + pre_cleanup + + ns2_destroy + ns1_destroy + vrp2_destroy + ip link del dev v3 + ip link del dev v1 + + switch_destroy + h2_destroy + h1_destroy + + forwarding_restore + vrf_cleanup +} + +# For the first round of tests, vx10 and vx20 were the first devices to get +# attached to the bridge, and that at the point that the local IP is already +# configured. Try the other scenario of attaching these devices to a bridge +# that already has local ports members, and only then assign the local IP. +reapply_config() +{ + log_info "Reapplying configuration" + + bridge fdb del dev vx20 00:00:00:00:00:00 dst 192.0.2.50 self + bridge fdb del dev vx20 00:00:00:00:00:00 dst 192.0.2.34 self + + bridge fdb del dev vx10 00:00:00:00:00:00 dst 192.0.2.50 self + bridge fdb del dev vx10 00:00:00:00:00:00 dst 192.0.2.34 self + + ip link set dev vx20 nomaster + ip link set dev vx10 nomaster + + rp1_unset_addr + sleep 5 + + ip link set dev vx10 master br1 + bridge vlan add vid 10 dev vx10 pvid untagged + + ip link set dev vx20 master br1 + bridge vlan add vid 20 dev vx20 pvid untagged + + bridge fdb append dev vx10 00:00:00:00:00:00 dst 192.0.2.34 self + bridge fdb append dev vx10 00:00:00:00:00:00 dst 192.0.2.50 self + + bridge fdb append dev vx20 00:00:00:00:00:00 dst 192.0.2.34 self + bridge fdb append dev vx20 00:00:00:00:00:00 dst 192.0.2.50 self + + rp1_set_addr + sleep 5 +} + +ping_ipv4() +{ + ping_test $h1.10 192.0.2.2 ": local->local vid 10" + ping_test $h1.20 198.51.100.2 ": local->local vid 20" + ping_test $h1.10 192.0.2.3 ": local->remote 1 vid 10" + ping_test $h1.10 192.0.2.4 ": local->remote 2 vid 10" + ping_test $h1.20 198.51.100.3 ": local->remote 1 vid 20" + ping_test $h1.20 198.51.100.4 ": local->remote 2 vid 20" +} + +maybe_in_ns() +{ + echo ${1:+in_ns} $1 +} + +__flood_counter_add_del() +{ + local add_del=$1; shift + local dev=$1; shift + local ns=$1; shift + + # Putting the ICMP capture both to HW and to SW will end up + # double-counting the packets that are trapped to slow path, such as for + # the unicast test. Adding either skip_hw or skip_sw fixes this problem, + # but with skip_hw, the flooded packets are not counted at all, because + # those are dropped due to MAC address mismatch; and skip_sw is a no-go + # for veth-based topologies. + # + # So try to install with skip_sw and fall back to skip_sw if that fails. + + $(maybe_in_ns $ns) __icmp_capture_add_del \ + $add_del 100 "" $dev skip_sw 2>/dev/null || \ + $(maybe_in_ns $ns) __icmp_capture_add_del \ + $add_del 100 "" $dev skip_hw +} + +flood_counter_install() +{ + __flood_counter_add_del add "$@" +} + +flood_counter_uninstall() +{ + __flood_counter_add_del del "$@" +} + +flood_fetch_stat() +{ + local dev=$1; shift + local ns=$1; shift + + $(maybe_in_ns $ns) tc_rule_stats_get $dev 100 ingress +} + +flood_fetch_stats() +{ + local counters=("${@}") + local counter + + for counter in "${counters[@]}"; do + flood_fetch_stat $counter + done +} + +vxlan_flood_test() +{ + local mac=$1; shift + local dst=$1; shift + local vid=$1; shift + local -a expects=("${@}") + + local -a counters=($h2 "vx10 ns1" "vx20 ns1" "vx10 ns2" "vx20 ns2") + local counter + local key + + # Packets reach the local host tagged whereas they reach the VxLAN + # devices untagged. In order to be able to use the same filter for + # all counters, make sure the packets also reach the local host + # untagged + bridge vlan add vid $vid dev $swp2 untagged + for counter in "${counters[@]}"; do + flood_counter_install $counter + done + + local -a t0s=($(flood_fetch_stats "${counters[@]}")) + $MZ $h1 -Q $vid -c 10 -d 100msec -p 64 -b $mac -B $dst -t icmp -q + sleep 1 + local -a t1s=($(flood_fetch_stats "${counters[@]}")) + + for key in ${!t0s[@]}; do + local delta=$((t1s[$key] - t0s[$key])) + local expect=${expects[$key]} + + ((expect == delta)) + check_err $? "${counters[$key]}: Expected to capture $expect packets, got $delta." + done + + for counter in "${counters[@]}"; do + flood_counter_uninstall $counter + done + bridge vlan add vid $vid dev $swp2 +} + +__test_flood() +{ + local mac=$1; shift + local dst=$1; shift + local vid=$1; shift + local what=$1; shift + local -a expects=("${@}") + + RET=0 + + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + log_test "VXLAN: $what" +} + +test_flood() +{ + __test_flood de:ad:be:ef:13:37 192.0.2.100 10 "flood vlan 10" \ + 10 10 0 10 0 + __test_flood ca:fe:be:ef:13:37 198.51.100.100 20 "flood vlan 20" \ + 10 0 10 0 10 +} + +vxlan_fdb_add_del() +{ + local add_del=$1; shift + local vid=$1; shift + local mac=$1; shift + local dev=$1; shift + local dst=$1; shift + + bridge fdb $add_del dev $dev $mac self static permanent \ + ${dst:+dst} $dst 2>/dev/null + bridge fdb $add_del dev $dev $mac master static vlan $vid 2>/dev/null +} + +__test_unicast() +{ + local mac=$1; shift + local dst=$1; shift + local hit_idx=$1; shift + local vid=$1; shift + local what=$1; shift + + RET=0 + + local -a expects=(0 0 0 0 0) + expects[$hit_idx]=10 + + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + log_test "VXLAN: $what" +} + +test_unicast() +{ + local -a targets=("$h2_mac $h2" + "$r1_mac vx10 192.0.2.34" + "$r2_mac vx10 192.0.2.50") + local target + + log_info "unicast vlan 10" + + for target in "${targets[@]}"; do + vxlan_fdb_add_del add 10 $target + done + + __test_unicast $h2_mac 192.0.2.2 0 10 "local MAC unicast" + __test_unicast $r1_mac 192.0.2.3 1 10 "remote MAC 1 unicast" + __test_unicast $r2_mac 192.0.2.4 3 10 "remote MAC 2 unicast" + + for target in "${targets[@]}"; do + vxlan_fdb_add_del del 10 $target + done + + log_info "unicast vlan 20" + + targets=("$h2_mac $h2" "$r1_mac vx20 192.0.2.34" \ + "$r2_mac vx20 192.0.2.50") + + for target in "${targets[@]}"; do + vxlan_fdb_add_del add 20 $target + done + + __test_unicast $h2_mac 198.51.100.2 0 20 "local MAC unicast" + __test_unicast $r1_mac 198.51.100.3 2 20 "remote MAC 1 unicast" + __test_unicast $r2_mac 198.51.100.4 4 20 "remote MAC 2 unicast" + + for target in "${targets[@]}"; do + vxlan_fdb_add_del del 20 $target + done +} + +test_pvid() +{ + local -a expects=(0 0 0 0 0) + local mac=de:ad:be:ef:13:37 + local dst=192.0.2.100 + local vid=10 + + # Check that flooding works + RET=0 + + expects[0]=10; expects[1]=10; expects[3]=10 + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + log_test "VXLAN: flood before pvid off" + + # Toggle PVID off and test that flood to remote hosts does not work + RET=0 + + bridge vlan add vid 10 dev vx10 + + expects[0]=10; expects[1]=0; expects[3]=0 + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + log_test "VXLAN: flood after pvid off" + + # Toggle PVID on and test that flood to remote hosts does work + RET=0 + + bridge vlan add vid 10 dev vx10 pvid untagged + + expects[0]=10; expects[1]=10; expects[3]=10 + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + log_test "VXLAN: flood after pvid on" + + # Add a new VLAN and test that it does not affect flooding + RET=0 + + bridge vlan add vid 30 dev vx10 + + expects[0]=10; expects[1]=10; expects[3]=10 + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + bridge vlan del vid 30 dev vx10 + + log_test "VXLAN: flood after vlan add" + + # Remove currently mapped VLAN and test that flood to remote hosts does + # not work + RET=0 + + bridge vlan del vid 10 dev vx10 + + expects[0]=10; expects[1]=0; expects[3]=0 + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + log_test "VXLAN: flood after vlan delete" + + # Re-add the VLAN and test that flood to remote hosts does work + RET=0 + + bridge vlan add vid 10 dev vx10 pvid untagged + + expects[0]=10; expects[1]=10; expects[3]=10 + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + log_test "VXLAN: flood after vlan re-add" +} + +vxlan_ping_test() +{ + local ping_dev=$1; shift + local ping_dip=$1; shift + local ping_args=$1; shift + local capture_dev=$1; shift + local capture_dir=$1; shift + local capture_pref=$1; shift + local expect=$1; shift + + local t0=$(tc_rule_stats_get $capture_dev $capture_pref $capture_dir) + ping_do $ping_dev $ping_dip "$ping_args" + local t1=$(tc_rule_stats_get $capture_dev $capture_pref $capture_dir) + local delta=$((t1 - t0)) + + # Tolerate a couple stray extra packets. + ((expect <= delta && delta <= expect + 2)) + check_err $? "$capture_dev: Expected to capture $expect packets, got $delta." +} + +__test_learning() +{ + local -a expects=(0 0 0 0 0) + local mac=$1; shift + local dst=$1; shift + local vid=$1; shift + local idx1=$1; shift + local idx2=$1; shift + local vx=vx$vid + + # Check that flooding works + RET=0 + + expects[0]=10; expects[$idx1]=10; expects[$idx2]=10 + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + log_test "VXLAN: flood before learning" + + # Send a packet with source mac set to $mac from host w2 and check that + # a corresponding entry is created in the VxLAN device + RET=0 + + in_ns ns1 $MZ w2 -Q $vid -c 1 -p 64 -a $mac -b ff:ff:ff:ff:ff:ff \ + -B $dst -t icmp -q + sleep 1 + + bridge fdb show brport $vx | grep $mac | grep -q self + check_err $? + bridge fdb show brport $vx | grep $mac | grep "vlan $vid" \ + | grep -q -v self + check_err $? + + log_test "VXLAN: show learned FDB entry" + + # Repeat first test and check that packets only reach host w2 in ns1 + RET=0 + + expects[0]=0; expects[$idx1]=10; expects[$idx2]=0 + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + log_test "VXLAN: learned FDB entry" + + # Delete the learned FDB entry from the VxLAN and bridge devices and + # check that packets are flooded + RET=0 + + bridge fdb del dev $vx $mac master self vlan $vid + sleep 1 + + expects[0]=10; expects[$idx1]=10; expects[$idx2]=10 + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + log_test "VXLAN: deletion of learned FDB entry" + + # Re-learn the first FDB entry and check that it is correctly aged-out + RET=0 + + in_ns ns1 $MZ w2 -Q $vid -c 1 -p 64 -a $mac -b ff:ff:ff:ff:ff:ff \ + -B $dst -t icmp -q + sleep 1 + + bridge fdb show brport $vx | grep $mac | grep -q self + check_err $? + bridge fdb show brport $vx | grep $mac | grep "vlan $vid" \ + | grep -q -v self + check_err $? + + expects[0]=0; expects[$idx1]=10; expects[$idx2]=0 + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + sleep 20 + + bridge fdb show brport $vx | grep $mac | grep -q self + check_fail $? + bridge fdb show brport $vx | grep $mac | grep "vlan $vid" \ + | grep -q -v self + check_fail $? + + expects[0]=10; expects[$idx1]=10; expects[$idx2]=10 + vxlan_flood_test $mac $dst $vid "${expects[@]}" + + log_test "VXLAN: Ageing of learned FDB entry" + + # Toggle learning on the bridge port and check that the bridge's FDB + # is populated only when it should + RET=0 + + ip link set dev $vx type bridge_slave learning off + + in_ns ns1 $MZ w2 -Q $vid -c 1 -p 64 -a $mac -b ff:ff:ff:ff:ff:ff \ + -B $dst -t icmp -q + sleep 1 + + bridge fdb show brport $vx | grep $mac | grep "vlan $vid" \ + | grep -q -v self + check_fail $? + + ip link set dev $vx type bridge_slave learning on + + in_ns ns1 $MZ w2 -Q $vid -c 1 -p 64 -a $mac -b ff:ff:ff:ff:ff:ff \ + -B $dst -t icmp -q + sleep 1 + + bridge fdb show brport $vx | grep $mac | grep "vlan $vid" \ + | grep -q -v self + check_err $? + + log_test "VXLAN: learning toggling on bridge port" +} + +test_learning() +{ + local mac=de:ad:be:ef:13:37 + local dst=192.0.2.100 + local vid=10 + + # Enable learning on the VxLAN devices and set ageing time to 10 seconds + ip link set dev br1 type bridge ageing_time 1000 + ip link set dev vx10 type vxlan ageing 10 + ip link set dev vx10 type vxlan learning + ip link set dev vx20 type vxlan ageing 10 + ip link set dev vx20 type vxlan learning + reapply_config + + log_info "learning vlan 10" + + __test_learning $mac $dst $vid 1 3 + + log_info "learning vlan 20" + + mac=ca:fe:be:ef:13:37 + dst=198.51.100.100 + vid=20 + + __test_learning $mac $dst $vid 2 4 + + # Restore previous settings + ip link set dev vx20 type vxlan nolearning + ip link set dev vx20 type vxlan ageing 300 + ip link set dev vx10 type vxlan nolearning + ip link set dev vx10 type vxlan ageing 300 + ip link set dev br1 type bridge ageing_time 30000 + reapply_config +} + +test_all() +{ + log_info "Running tests with UDP port $VXPORT" + tests_run +} + +trap cleanup EXIT + +setup_prepare +setup_wait +test_all + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_port_8472.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_port_8472.sh new file mode 100755 index 000000000000..b1b2d1a3164f --- /dev/null +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_port_8472.sh @@ -0,0 +1,10 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# A wrapper to run VXLAN tests with an unusual port number. + +VXPORT=8472 +ALL_TESTS=" + ping_ipv4 +" +source vxlan_bridge_1q.sh diff --git a/tools/testing/selftests/net/ip_defrag.c b/tools/testing/selftests/net/ip_defrag.c new file mode 100644 index 000000000000..5d56cc0838f6 --- /dev/null +++ b/tools/testing/selftests/net/ip_defrag.c @@ -0,0 +1,471 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE + +#include <arpa/inet.h> +#include <errno.h> +#include <error.h> +#include <linux/in.h> +#include <netinet/ip.h> +#include <netinet/ip6.h> +#include <netinet/udp.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <unistd.h> + +static bool cfg_do_ipv4; +static bool cfg_do_ipv6; +static bool cfg_verbose; +static bool cfg_overlap; +static unsigned short cfg_port = 9000; + +const struct in_addr addr4 = { .s_addr = __constant_htonl(INADDR_LOOPBACK + 2) }; +const struct in6_addr addr6 = IN6ADDR_LOOPBACK_INIT; + +#define IP4_HLEN (sizeof(struct iphdr)) +#define IP6_HLEN (sizeof(struct ip6_hdr)) +#define UDP_HLEN (sizeof(struct udphdr)) + +/* IPv6 fragment header lenth. */ +#define FRAG_HLEN 8 + +static int payload_len; +static int max_frag_len; + +#define MSG_LEN_MAX 60000 /* Max UDP payload length. */ + +#define IP4_MF (1u << 13) /* IPv4 MF flag. */ +#define IP6_MF (1) /* IPv6 MF flag. */ + +#define CSUM_MANGLED_0 (0xffff) + +static uint8_t udp_payload[MSG_LEN_MAX]; +static uint8_t ip_frame[IP_MAXPACKET]; +static uint32_t ip_id = 0xabcd; +static int msg_counter; +static int frag_counter; +static unsigned int seed; + +/* Receive a UDP packet. Validate it matches udp_payload. */ +static void recv_validate_udp(int fd_udp) +{ + ssize_t ret; + static uint8_t recv_buff[MSG_LEN_MAX]; + + ret = recv(fd_udp, recv_buff, payload_len, 0); + msg_counter++; + + if (cfg_overlap) { + if (ret != -1) + error(1, 0, "recv: expected timeout; got %d", + (int)ret); + if (errno != ETIMEDOUT && errno != EAGAIN) + error(1, errno, "recv: expected timeout: %d", + errno); + return; /* OK */ + } + + if (ret == -1) + error(1, errno, "recv: payload_len = %d max_frag_len = %d", + payload_len, max_frag_len); + if (ret != payload_len) + error(1, 0, "recv: wrong size: %d vs %d", (int)ret, payload_len); + if (memcmp(udp_payload, recv_buff, payload_len)) + error(1, 0, "recv: wrong data"); +} + +static uint32_t raw_checksum(uint8_t *buf, int len, uint32_t sum) +{ + int i; + + for (i = 0; i < (len & ~1U); i += 2) { + sum += (u_int16_t)ntohs(*((u_int16_t *)(buf + i))); + if (sum > 0xffff) + sum -= 0xffff; + } + + if (i < len) { + sum += buf[i] << 8; + if (sum > 0xffff) + sum -= 0xffff; + } + + return sum; +} + +static uint16_t udp_checksum(struct ip *iphdr, struct udphdr *udphdr) +{ + uint32_t sum = 0; + uint16_t res; + + sum = raw_checksum((uint8_t *)&iphdr->ip_src, 2 * sizeof(iphdr->ip_src), + IPPROTO_UDP + (uint32_t)(UDP_HLEN + payload_len)); + sum = raw_checksum((uint8_t *)udphdr, UDP_HLEN, sum); + sum = raw_checksum((uint8_t *)udp_payload, payload_len, sum); + res = 0xffff & ~sum; + if (res) + return htons(res); + else + return CSUM_MANGLED_0; +} + +static uint16_t udp6_checksum(struct ip6_hdr *iphdr, struct udphdr *udphdr) +{ + uint32_t sum = 0; + uint16_t res; + + sum = raw_checksum((uint8_t *)&iphdr->ip6_src, 2 * sizeof(iphdr->ip6_src), + IPPROTO_UDP); + sum = raw_checksum((uint8_t *)&udphdr->len, sizeof(udphdr->len), sum); + sum = raw_checksum((uint8_t *)udphdr, UDP_HLEN, sum); + sum = raw_checksum((uint8_t *)udp_payload, payload_len, sum); + res = 0xffff & ~sum; + if (res) + return htons(res); + else + return CSUM_MANGLED_0; +} + +static void send_fragment(int fd_raw, struct sockaddr *addr, socklen_t alen, + int offset, bool ipv6) +{ + int frag_len; + int res; + int payload_offset = offset > 0 ? offset - UDP_HLEN : 0; + uint8_t *frag_start = ipv6 ? ip_frame + IP6_HLEN + FRAG_HLEN : + ip_frame + IP4_HLEN; + + if (offset == 0) { + struct udphdr udphdr; + udphdr.source = htons(cfg_port + 1); + udphdr.dest = htons(cfg_port); + udphdr.len = htons(UDP_HLEN + payload_len); + udphdr.check = 0; + if (ipv6) + udphdr.check = udp6_checksum((struct ip6_hdr *)ip_frame, &udphdr); + else + udphdr.check = udp_checksum((struct ip *)ip_frame, &udphdr); + memcpy(frag_start, &udphdr, UDP_HLEN); + } + + if (ipv6) { + struct ip6_hdr *ip6hdr = (struct ip6_hdr *)ip_frame; + struct ip6_frag *fraghdr = (struct ip6_frag *)(ip_frame + IP6_HLEN); + if (payload_len - payload_offset <= max_frag_len && offset > 0) { + /* This is the last fragment. */ + frag_len = FRAG_HLEN + payload_len - payload_offset; + fraghdr->ip6f_offlg = htons(offset); + } else { + frag_len = FRAG_HLEN + max_frag_len; + fraghdr->ip6f_offlg = htons(offset | IP6_MF); + } + ip6hdr->ip6_plen = htons(frag_len); + if (offset == 0) + memcpy(frag_start + UDP_HLEN, udp_payload, + frag_len - FRAG_HLEN - UDP_HLEN); + else + memcpy(frag_start, udp_payload + payload_offset, + frag_len - FRAG_HLEN); + frag_len += IP6_HLEN; + } else { + struct ip *iphdr = (struct ip *)ip_frame; + if (payload_len - payload_offset <= max_frag_len && offset > 0) { + /* This is the last fragment. */ + frag_len = IP4_HLEN + payload_len - payload_offset; + iphdr->ip_off = htons(offset / 8); + } else { + frag_len = IP4_HLEN + max_frag_len; + iphdr->ip_off = htons(offset / 8 | IP4_MF); + } + iphdr->ip_len = htons(frag_len); + if (offset == 0) + memcpy(frag_start + UDP_HLEN, udp_payload, + frag_len - IP4_HLEN - UDP_HLEN); + else + memcpy(frag_start, udp_payload + payload_offset, + frag_len - IP4_HLEN); + } + + res = sendto(fd_raw, ip_frame, frag_len, 0, addr, alen); + if (res < 0) + error(1, errno, "send_fragment"); + if (res != frag_len) + error(1, 0, "send_fragment: %d vs %d", res, frag_len); + + frag_counter++; +} + +static void send_udp_frags(int fd_raw, struct sockaddr *addr, + socklen_t alen, bool ipv6) +{ + struct ip *iphdr = (struct ip *)ip_frame; + struct ip6_hdr *ip6hdr = (struct ip6_hdr *)ip_frame; + const bool ipv4 = !ipv6; + int res; + int offset; + int frag_len; + + /* Send the UDP datagram using raw IP fragments: the 0th fragment + * has the UDP header; other fragments are pieces of udp_payload + * split in chunks of frag_len size. + * + * Odd fragments (1st, 3rd, 5th, etc.) are sent out first, then + * even fragments (0th, 2nd, etc.) are sent out. + */ + if (ipv6) { + struct ip6_frag *fraghdr = (struct ip6_frag *)(ip_frame + IP6_HLEN); + ((struct sockaddr_in6 *)addr)->sin6_port = 0; + memset(ip6hdr, 0, sizeof(*ip6hdr)); + ip6hdr->ip6_flow = htonl(6<<28); /* Version. */ + ip6hdr->ip6_nxt = IPPROTO_FRAGMENT; + ip6hdr->ip6_hops = 255; + ip6hdr->ip6_src = addr6; + ip6hdr->ip6_dst = addr6; + fraghdr->ip6f_nxt = IPPROTO_UDP; + fraghdr->ip6f_reserved = 0; + fraghdr->ip6f_ident = htonl(ip_id++); + } else { + memset(iphdr, 0, sizeof(*iphdr)); + iphdr->ip_hl = 5; + iphdr->ip_v = 4; + iphdr->ip_tos = 0; + iphdr->ip_id = htons(ip_id++); + iphdr->ip_ttl = 0x40; + iphdr->ip_p = IPPROTO_UDP; + iphdr->ip_src.s_addr = htonl(INADDR_LOOPBACK); + iphdr->ip_dst = addr4; + iphdr->ip_sum = 0; + } + + /* Occasionally test in-order fragments. */ + if (!cfg_overlap && (rand() % 100 < 15)) { + offset = 0; + while (offset < (UDP_HLEN + payload_len)) { + send_fragment(fd_raw, addr, alen, offset, ipv6); + offset += max_frag_len; + } + return; + } + + /* Occasionally test IPv4 "runs" (see net/ipv4/ip_fragment.c) */ + if (ipv4 && !cfg_overlap && (rand() % 100 < 20) && + (payload_len > 9 * max_frag_len)) { + offset = 6 * max_frag_len; + while (offset < (UDP_HLEN + payload_len)) { + send_fragment(fd_raw, addr, alen, offset, ipv6); + offset += max_frag_len; + } + offset = 3 * max_frag_len; + while (offset < 6 * max_frag_len) { + send_fragment(fd_raw, addr, alen, offset, ipv6); + offset += max_frag_len; + } + offset = 0; + while (offset < 3 * max_frag_len) { + send_fragment(fd_raw, addr, alen, offset, ipv6); + offset += max_frag_len; + } + return; + } + + /* Odd fragments. */ + offset = max_frag_len; + while (offset < (UDP_HLEN + payload_len)) { + send_fragment(fd_raw, addr, alen, offset, ipv6); + /* IPv4 ignores duplicates, so randomly send a duplicate. */ + if (ipv4 && (1 == rand() % 100)) + send_fragment(fd_raw, addr, alen, offset, ipv6); + offset += 2 * max_frag_len; + } + + if (cfg_overlap) { + /* Send an extra random fragment. */ + if (ipv6) { + struct ip6_frag *fraghdr = (struct ip6_frag *)(ip_frame + IP6_HLEN); + /* sendto() returns EINVAL if offset + frag_len is too small. */ + offset = rand() % (UDP_HLEN + payload_len - 1); + frag_len = max_frag_len + rand() % 256; + /* In IPv6 if !!(frag_len % 8), the fragment is dropped. */ + frag_len &= ~0x7; + fraghdr->ip6f_offlg = htons(offset / 8 | IP6_MF); + ip6hdr->ip6_plen = htons(frag_len); + frag_len += IP6_HLEN; + } else { + /* In IPv4, duplicates and some fragments completely inside + * previously sent fragments are dropped/ignored. So + * random offset and frag_len can result in a dropped + * fragment instead of a dropped queue/packet. So we + * hard-code offset and frag_len. + * + * See ade446403bfb ("net: ipv4: do not handle duplicate + * fragments as overlapping"). + */ + if (max_frag_len * 4 < payload_len || max_frag_len < 16) { + /* not enough payload to play with random offset and frag_len. */ + offset = 8; + frag_len = IP4_HLEN + UDP_HLEN + max_frag_len; + } else { + offset = rand() % (payload_len / 2); + frag_len = 2 * max_frag_len + 1 + rand() % 256; + } + iphdr->ip_off = htons(offset / 8 | IP4_MF); + iphdr->ip_len = htons(frag_len); + } + res = sendto(fd_raw, ip_frame, frag_len, 0, addr, alen); + if (res < 0) + error(1, errno, "sendto overlap: %d", frag_len); + if (res != frag_len) + error(1, 0, "sendto overlap: %d vs %d", (int)res, frag_len); + frag_counter++; + } + + /* Event fragments. */ + offset = 0; + while (offset < (UDP_HLEN + payload_len)) { + send_fragment(fd_raw, addr, alen, offset, ipv6); + /* IPv4 ignores duplicates, so randomly send a duplicate. */ + if (ipv4 && (1 == rand() % 100)) + send_fragment(fd_raw, addr, alen, offset, ipv6); + offset += 2 * max_frag_len; + } +} + +static void run_test(struct sockaddr *addr, socklen_t alen, bool ipv6) +{ + int fd_tx_raw, fd_rx_udp; + /* Frag queue timeout is set to one second in the calling script; + * socket timeout should be just a bit longer to avoid tests interfering + * with each other. + */ + struct timeval tv = { .tv_sec = 1, .tv_usec = 10 }; + int idx; + int min_frag_len = ipv6 ? 1280 : 8; + + /* Initialize the payload. */ + for (idx = 0; idx < MSG_LEN_MAX; ++idx) + udp_payload[idx] = idx % 256; + + /* Open sockets. */ + fd_tx_raw = socket(addr->sa_family, SOCK_RAW, IPPROTO_RAW); + if (fd_tx_raw == -1) + error(1, errno, "socket tx_raw"); + + fd_rx_udp = socket(addr->sa_family, SOCK_DGRAM, 0); + if (fd_rx_udp == -1) + error(1, errno, "socket rx_udp"); + if (bind(fd_rx_udp, addr, alen)) + error(1, errno, "bind"); + /* Fail fast. */ + if (setsockopt(fd_rx_udp, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv))) + error(1, errno, "setsockopt rcv timeout"); + + for (payload_len = min_frag_len; payload_len < MSG_LEN_MAX; + payload_len += (rand() % 4096)) { + if (cfg_verbose) + printf("payload_len: %d\n", payload_len); + + if (cfg_overlap) { + /* With overlaps, one send/receive pair below takes + * at least one second (== timeout) to run, so there + * is not enough test time to run a nested loop: + * the full overlap test takes 20-30 seconds. + */ + max_frag_len = min_frag_len + + rand() % (1500 - FRAG_HLEN - min_frag_len); + send_udp_frags(fd_tx_raw, addr, alen, ipv6); + recv_validate_udp(fd_rx_udp); + } else { + /* Without overlaps, each packet reassembly (== one + * send/receive pair below) takes very little time to + * run, so we can easily afford more thourough testing + * with a nested loop: the full non-overlap test takes + * less than one second). + */ + max_frag_len = min_frag_len; + do { + send_udp_frags(fd_tx_raw, addr, alen, ipv6); + recv_validate_udp(fd_rx_udp); + max_frag_len += 8 * (rand() % 8); + } while (max_frag_len < (1500 - FRAG_HLEN) && + max_frag_len <= payload_len); + } + } + + /* Cleanup. */ + if (close(fd_tx_raw)) + error(1, errno, "close tx_raw"); + if (close(fd_rx_udp)) + error(1, errno, "close rx_udp"); + + if (cfg_verbose) + printf("processed %d messages, %d fragments\n", + msg_counter, frag_counter); + + fprintf(stderr, "PASS\n"); +} + + +static void run_test_v4(void) +{ + struct sockaddr_in addr = {0}; + + addr.sin_family = AF_INET; + addr.sin_port = htons(cfg_port); + addr.sin_addr = addr4; + + run_test((void *)&addr, sizeof(addr), false /* !ipv6 */); +} + +static void run_test_v6(void) +{ + struct sockaddr_in6 addr = {0}; + + addr.sin6_family = AF_INET6; + addr.sin6_port = htons(cfg_port); + addr.sin6_addr = addr6; + + run_test((void *)&addr, sizeof(addr), true /* ipv6 */); +} + +static void parse_opts(int argc, char **argv) +{ + int c; + + while ((c = getopt(argc, argv, "46ov")) != -1) { + switch (c) { + case '4': + cfg_do_ipv4 = true; + break; + case '6': + cfg_do_ipv6 = true; + break; + case 'o': + cfg_overlap = true; + break; + case 'v': + cfg_verbose = true; + break; + default: + error(1, 0, "%s: parse error", argv[0]); + } + } +} + +int main(int argc, char **argv) +{ + parse_opts(argc, argv); + seed = time(NULL); + srand(seed); + /* Print the seed to track/reproduce potential failures. */ + printf("seed = %d\n", seed); + + if (cfg_do_ipv4) + run_test_v4(); + if (cfg_do_ipv6) + run_test_v6(); + + return 0; +} diff --git a/tools/testing/selftests/net/ip_defrag.sh b/tools/testing/selftests/net/ip_defrag.sh new file mode 100755 index 000000000000..7dd79a9efb17 --- /dev/null +++ b/tools/testing/selftests/net/ip_defrag.sh @@ -0,0 +1,46 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# Run a couple of IP defragmentation tests. + +set +x +set -e + +readonly NETNS="ns-$(mktemp -u XXXXXX)" + +setup() { + ip netns add "${NETNS}" + ip -netns "${NETNS}" link set lo up + + ip netns exec "${NETNS}" sysctl -w net.ipv4.ipfrag_high_thresh=9000000 >/dev/null 2>&1 + ip netns exec "${NETNS}" sysctl -w net.ipv4.ipfrag_low_thresh=7000000 >/dev/null 2>&1 + ip netns exec "${NETNS}" sysctl -w net.ipv4.ipfrag_time=1 >/dev/null 2>&1 + + ip netns exec "${NETNS}" sysctl -w net.ipv6.ip6frag_high_thresh=9000000 >/dev/null 2>&1 + ip netns exec "${NETNS}" sysctl -w net.ipv6.ip6frag_low_thresh=7000000 >/dev/null 2>&1 + ip netns exec "${NETNS}" sysctl -w net.ipv6.ip6frag_time=1 >/dev/null 2>&1 + + # DST cache can get full with a lot of frags, with GC not keeping up with the test. + ip netns exec "${NETNS}" sysctl -w net.ipv6.route.max_size=65536 >/dev/null 2>&1 +} + +cleanup() { + ip netns del "${NETNS}" +} + +trap cleanup EXIT +setup + +echo "ipv4 defrag" +ip netns exec "${NETNS}" ./ip_defrag -4 + +echo "ipv4 defrag with overlaps" +ip netns exec "${NETNS}" ./ip_defrag -4o + +echo "ipv6 defrag" +ip netns exec "${NETNS}" ./ip_defrag -6 + +echo "ipv6 defrag with overlaps" +ip netns exec "${NETNS}" ./ip_defrag -6o + +echo "all tests done" diff --git a/tools/testing/selftests/net/msg_zerocopy.c b/tools/testing/selftests/net/msg_zerocopy.c index 406cc70c571d..4b02933cab8a 100644 --- a/tools/testing/selftests/net/msg_zerocopy.c +++ b/tools/testing/selftests/net/msg_zerocopy.c @@ -651,12 +651,13 @@ static void do_flush_datagram(int fd, int type) static void do_rx(int domain, int type, int protocol) { + const int cfg_receiver_wait_ms = 400; uint64_t tstop; int fd; fd = do_setup_rx(domain, type, protocol); - tstop = gettimeofday_ms() + cfg_runtime_ms; + tstop = gettimeofday_ms() + cfg_runtime_ms + cfg_receiver_wait_ms; do { if (type == SOCK_STREAM) do_flush_tcp(fd); diff --git a/tools/testing/selftests/net/msg_zerocopy.sh b/tools/testing/selftests/net/msg_zerocopy.sh index c43c6debda06..825ffec85cea 100755 --- a/tools/testing/selftests/net/msg_zerocopy.sh +++ b/tools/testing/selftests/net/msg_zerocopy.sh @@ -25,6 +25,8 @@ readonly path_sysctl_mem="net.core.optmem_max" if [[ "$#" -eq "0" ]]; then $0 4 tcp -t 1 $0 6 tcp -t 1 + $0 4 udp -t 1 + $0 6 udp -t 1 echo "OK. All tests passed" exit 0 fi diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index 0ab9423d009f..912b2dc50be3 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -6,6 +6,67 @@ # # Tests currently implemented: # +# - pmtu_ipv4 +# Set up two namespaces, A and B, with two paths between them over routers +# R1 and R2 (also implemented with namespaces), with different MTUs: +# +# segment a_r1 segment b_r1 a_r1: 2000 +# .--------------R1--------------. a_r2: 1500 +# A B a_r3: 2000 +# '--------------R2--------------' a_r4: 1400 +# segment a_r2 segment b_r2 +# +# Check that PMTU exceptions with the correct PMTU are created. Then +# decrease and increase the MTU of the local link for one of the paths, +# A to R1, checking that route exception PMTU changes accordingly over +# this path. Also check that locked exceptions are created when an ICMP +# message advertising a PMTU smaller than net.ipv4.route.min_pmtu is +# received +# +# - pmtu_ipv6 +# Same as pmtu_ipv4, except for locked PMTU tests, using IPv6 +# +# - pmtu_ipv4_vxlan4_exception +# Set up the same network topology as pmtu_ipv4, create a VXLAN tunnel +# over IPv4 between A and B, routed via R1. On the link between R1 and B, +# set a MTU lower than the VXLAN MTU and the MTU on the link between A and +# R1. Send IPv4 packets, exceeding the MTU between R1 and B, over VXLAN +# from A to B and check that the PMTU exception is created with the right +# value on A +# +# - pmtu_ipv6_vxlan4_exception +# Same as pmtu_ipv4_vxlan4_exception, but send IPv6 packets from A to B +# +# - pmtu_ipv4_vxlan6_exception +# Same as pmtu_ipv4_vxlan4_exception, but use IPv6 transport from A to B +# +# - pmtu_ipv6_vxlan6_exception +# Same as pmtu_ipv4_vxlan6_exception, but send IPv6 packets from A to B +# +# - pmtu_ipv4_geneve4_exception +# Same as pmtu_ipv4_vxlan4_exception, but using a GENEVE tunnel instead of +# VXLAN +# +# - pmtu_ipv6_geneve4_exception +# Same as pmtu_ipv6_vxlan4_exception, but using a GENEVE tunnel instead of +# VXLAN +# +# - pmtu_ipv4_geneve6_exception +# Same as pmtu_ipv4_vxlan6_exception, but using a GENEVE tunnel instead of +# VXLAN +# +# - pmtu_ipv6_geneve6_exception +# Same as pmtu_ipv6_vxlan6_exception, but using a GENEVE tunnel instead of +# VXLAN +# +# - pmtu_ipv{4,6}_fou{4,6}_exception +# Same as pmtu_ipv4_vxlan4, but using a direct IPv4/IPv6 encapsulation +# (FoU) over IPv4/IPv6, instead of VXLAN +# +# - pmtu_ipv{4,6}_fou{4,6}_exception +# Same as pmtu_ipv4_vxlan4, but using a generic UDP IPv4/IPv6 +# encapsulation (GUE) over IPv4/IPv6, instead of VXLAN +# # - pmtu_vti4_exception # Set up vti tunnel on top of veth, with xfrm states and policies, in two # namespaces with matching endpoints. Check that route exception is not @@ -42,6 +103,15 @@ # and check that configured MTU is used on link creation and changes, and # that MTU is properly calculated instead when MTU is not configured from # userspace +# +# - cleanup_ipv4_exception +# Similar to pmtu_ipv4_vxlan4_exception, but explicitly generate PMTU +# exceptions on multiple CPUs and check that the veth device tear-down +# happens in a timely manner +# +# - cleanup_ipv6_exception +# Same as above, but use IPv6 transport from A to B + # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 @@ -50,18 +120,75 @@ ksft_skip=4 which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping) tests=" + pmtu_ipv4_exception ipv4: PMTU exceptions + pmtu_ipv6_exception ipv6: PMTU exceptions + pmtu_ipv4_vxlan4_exception IPv4 over vxlan4: PMTU exceptions + pmtu_ipv6_vxlan4_exception IPv6 over vxlan4: PMTU exceptions + pmtu_ipv4_vxlan6_exception IPv4 over vxlan6: PMTU exceptions + pmtu_ipv6_vxlan6_exception IPv6 over vxlan6: PMTU exceptions + pmtu_ipv4_geneve4_exception IPv4 over geneve4: PMTU exceptions + pmtu_ipv6_geneve4_exception IPv6 over geneve4: PMTU exceptions + pmtu_ipv4_geneve6_exception IPv4 over geneve6: PMTU exceptions + pmtu_ipv6_geneve6_exception IPv6 over geneve6: PMTU exceptions + pmtu_ipv4_fou4_exception IPv4 over fou4: PMTU exceptions + pmtu_ipv6_fou4_exception IPv6 over fou4: PMTU exceptions + pmtu_ipv4_fou6_exception IPv4 over fou6: PMTU exceptions + pmtu_ipv6_fou6_exception IPv6 over fou6: PMTU exceptions + pmtu_ipv4_gue4_exception IPv4 over gue4: PMTU exceptions + pmtu_ipv6_gue4_exception IPv6 over gue4: PMTU exceptions + pmtu_ipv4_gue6_exception IPv4 over gue6: PMTU exceptions + pmtu_ipv6_gue6_exception IPv6 over gue6: PMTU exceptions pmtu_vti6_exception vti6: PMTU exceptions pmtu_vti4_exception vti4: PMTU exceptions pmtu_vti4_default_mtu vti4: default MTU assignment pmtu_vti6_default_mtu vti6: default MTU assignment pmtu_vti4_link_add_mtu vti4: MTU setting on link creation pmtu_vti6_link_add_mtu vti6: MTU setting on link creation - pmtu_vti6_link_change_mtu vti6: MTU changes on link changes" + pmtu_vti6_link_change_mtu vti6: MTU changes on link changes + cleanup_ipv4_exception ipv4: cleanup of cached exceptions + cleanup_ipv6_exception ipv6: cleanup of cached exceptions" NS_A="ns-$(mktemp -u XXXXXX)" NS_B="ns-$(mktemp -u XXXXXX)" +NS_R1="ns-$(mktemp -u XXXXXX)" +NS_R2="ns-$(mktemp -u XXXXXX)" ns_a="ip netns exec ${NS_A}" ns_b="ip netns exec ${NS_B}" +ns_r1="ip netns exec ${NS_R1}" +ns_r2="ip netns exec ${NS_R2}" + +# Addressing and routing for tests with routers: four network segments, with +# index SEGMENT between 1 and 4, a common prefix (PREFIX4 or PREFIX6) and an +# identifier ID, which is 1 for hosts (A and B), 2 for routers (R1 and R2). +# Addresses are: +# - IPv4: PREFIX4.SEGMENT.ID (/24) +# - IPv6: PREFIX6:SEGMENT::ID (/64) +prefix4="10.0" +prefix6="fc00" +a_r1=1 +a_r2=2 +b_r1=3 +b_r2=4 +# ns peer segment +routing_addrs=" + A R1 ${a_r1} + A R2 ${a_r2} + B R1 ${b_r1} + B R2 ${b_r2} +" +# Traffic from A to B goes through R1 by default, and through R2, if destined to +# B's address on the b_r2 segment. +# Traffic from B to A goes through R1. +# ns destination gateway +routes=" + A default ${prefix4}.${a_r1}.2 + A ${prefix4}.${b_r2}.1 ${prefix4}.${a_r2}.2 + B default ${prefix4}.${b_r1}.2 + + A default ${prefix6}:${a_r1}::2 + A ${prefix6}:${b_r2}::1 ${prefix6}:${a_r2}::2 + B default ${prefix6}:${b_r1}::2 +" veth4_a_addr="192.168.1.1" veth4_b_addr="192.168.1.2" @@ -70,12 +197,12 @@ veth6_a_addr="fd00:1::a" veth6_b_addr="fd00:1::b" veth6_mask="64" -vti4_a_addr="192.168.2.1" -vti4_b_addr="192.168.2.2" -vti4_mask="24" -vti6_a_addr="fd00:2::a" -vti6_b_addr="fd00:2::b" -vti6_mask="64" +tunnel4_a_addr="192.168.2.1" +tunnel4_b_addr="192.168.2.2" +tunnel4_mask="24" +tunnel6_a_addr="fd00:2::a" +tunnel6_b_addr="fd00:2::b" +tunnel6_mask="64" dummy6_0_addr="fc00:1000::0" dummy6_1_addr="fc00:1001::0" @@ -83,6 +210,7 @@ dummy6_mask="64" cleanup_done=1 err_buf= +tcpdump_pids= err() { err_buf="${err_buf}${1} @@ -94,9 +222,100 @@ err_flush() { err_buf= } +# Find the auto-generated name for this namespace +nsname() { + eval echo \$NS_$1 +} + +setup_fou_or_gue() { + outer="${1}" + inner="${2}" + encap="${3}" + + if [ "${outer}" = "4" ]; then + modprobe fou || return 2 + a_addr="${prefix4}.${a_r1}.1" + b_addr="${prefix4}.${b_r1}.1" + if [ "${inner}" = "4" ]; then + type="ipip" + ipproto="4" + else + type="sit" + ipproto="41" + fi + else + modprobe fou6 || return 2 + a_addr="${prefix6}:${a_r1}::1" + b_addr="${prefix6}:${b_r1}::1" + if [ "${inner}" = "4" ]; then + type="ip6tnl" + mode="mode ipip6" + ipproto="4 -6" + else + type="ip6tnl" + mode="mode ip6ip6" + ipproto="41 -6" + fi + fi + + ${ns_a} ip fou add port 5555 ipproto ${ipproto} || return 2 + ${ns_a} ip link add ${encap}_a type ${type} ${mode} local ${a_addr} remote ${b_addr} encap ${encap} encap-sport auto encap-dport 5556 || return 2 + + ${ns_b} ip fou add port 5556 ipproto ${ipproto} + ${ns_b} ip link add ${encap}_b type ${type} ${mode} local ${b_addr} remote ${a_addr} encap ${encap} encap-sport auto encap-dport 5555 + + if [ "${inner}" = "4" ]; then + ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ${encap}_a + ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${encap}_b + else + ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ${encap}_a + ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ${encap}_b + fi + + ${ns_a} ip link set ${encap}_a up + ${ns_b} ip link set ${encap}_b up +} + +setup_fou44() { + setup_fou_or_gue 4 4 fou +} + +setup_fou46() { + setup_fou_or_gue 4 6 fou +} + +setup_fou64() { + setup_fou_or_gue 6 4 fou +} + +setup_fou66() { + setup_fou_or_gue 6 6 fou +} + +setup_gue44() { + setup_fou_or_gue 4 4 gue +} + +setup_gue46() { + setup_fou_or_gue 4 6 gue +} + +setup_gue64() { + setup_fou_or_gue 6 4 gue +} + +setup_gue66() { + setup_fou_or_gue 6 6 gue +} + setup_namespaces() { - ip netns add ${NS_A} || return 1 - ip netns add ${NS_B} + for n in ${NS_A} ${NS_B} ${NS_R1} ${NS_R2}; do + ip netns add ${n} || return 1 + + # Disable DAD, so that we don't have to wait to use the + # configured IPv6 addresses + ip netns exec ${n} sysctl -q net/ipv6/conf/default/accept_dad=0 + done } setup_veth() { @@ -131,16 +350,58 @@ setup_vti() { ${ns_a} ip link set vti${proto}_a up ${ns_b} ip link set vti${proto}_b up - - sleep 1 } setup_vti4() { - setup_vti 4 ${veth4_a_addr} ${veth4_b_addr} ${vti4_a_addr} ${vti4_b_addr} ${vti4_mask} + setup_vti 4 ${veth4_a_addr} ${veth4_b_addr} ${tunnel4_a_addr} ${tunnel4_b_addr} ${tunnel4_mask} } setup_vti6() { - setup_vti 6 ${veth6_a_addr} ${veth6_b_addr} ${vti6_a_addr} ${vti6_b_addr} ${vti6_mask} + setup_vti 6 ${veth6_a_addr} ${veth6_b_addr} ${tunnel6_a_addr} ${tunnel6_b_addr} ${tunnel6_mask} +} + +setup_vxlan_or_geneve() { + type="${1}" + a_addr="${2}" + b_addr="${3}" + opts="${4}" + + if [ "${type}" = "vxlan" ]; then + opts="${opts} ttl 64 dstport 4789" + opts_a="local ${a_addr}" + opts_b="local ${b_addr}" + else + opts_a="" + opts_b="" + fi + + ${ns_a} ip link add ${type}_a type ${type} id 1 ${opts_a} remote ${b_addr} ${opts} || return 1 + ${ns_b} ip link add ${type}_b type ${type} id 1 ${opts_b} remote ${a_addr} ${opts} + + ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ${type}_a + ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${type}_b + + ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ${type}_a + ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ${type}_b + + ${ns_a} ip link set ${type}_a up + ${ns_b} ip link set ${type}_b up +} + +setup_geneve4() { + setup_vxlan_or_geneve geneve ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 "df set" +} + +setup_vxlan4() { + setup_vxlan_or_geneve vxlan ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 "df set" +} + +setup_geneve6() { + setup_vxlan_or_geneve geneve ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 +} + +setup_vxlan6() { + setup_vxlan_or_geneve vxlan ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 } setup_xfrm() { @@ -167,6 +428,49 @@ setup_xfrm6() { setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr} } +setup_routing() { + for i in ${NS_R1} ${NS_R2}; do + ip netns exec ${i} sysctl -q net/ipv4/ip_forward=1 + ip netns exec ${i} sysctl -q net/ipv6/conf/all/forwarding=1 + done + + for i in ${routing_addrs}; do + [ "${ns}" = "" ] && ns="${i}" && continue + [ "${peer}" = "" ] && peer="${i}" && continue + [ "${segment}" = "" ] && segment="${i}" + + ns_name="$(nsname ${ns})" + peer_name="$(nsname ${peer})" + if="veth_${ns}-${peer}" + ifpeer="veth_${peer}-${ns}" + + # Create veth links + ip link add ${if} up netns ${ns_name} type veth peer name ${ifpeer} netns ${peer_name} || return 1 + ip -n ${peer_name} link set dev ${ifpeer} up + + # Add addresses + ip -n ${ns_name} addr add ${prefix4}.${segment}.1/24 dev ${if} + ip -n ${ns_name} addr add ${prefix6}:${segment}::1/64 dev ${if} + + ip -n ${peer_name} addr add ${prefix4}.${segment}.2/24 dev ${ifpeer} + ip -n ${peer_name} addr add ${prefix6}:${segment}::2/64 dev ${ifpeer} + + ns=""; peer=""; segment="" + done + + for i in ${routes}; do + [ "${ns}" = "" ] && ns="${i}" && continue + [ "${addr}" = "" ] && addr="${i}" && continue + [ "${gw}" = "" ] && gw="${i}" + + ns_name="$(nsname ${ns})" + + ip -n ${ns_name} route add ${addr} via ${gw} + + ns=""; addr=""; gw="" + done +} + setup() { [ "$(id -u)" -ne 0 ] && echo " need to run as root" && return $ksft_skip @@ -176,10 +480,28 @@ setup() { done } +trace() { + [ $tracing -eq 0 ] && return + + for arg do + [ "${ns_cmd}" = "" ] && ns_cmd="${arg}" && continue + ${ns_cmd} tcpdump -s 0 -i "${arg}" -w "${name}_${arg}.pcap" 2> /dev/null & + tcpdump_pids="${tcpdump_pids} $!" + ns_cmd= + done + sleep 1 +} + cleanup() { + for pid in ${tcpdump_pids}; do + kill ${pid} + done + tcpdump_pids= + [ ${cleanup_done} -eq 1 ] && return - ip netns del ${NS_A} 2> /dev/null - ip netns del ${NS_B} 2> /dev/null + for n in ${NS_A} ${NS_B} ${NS_R1} ${NS_R2}; do + ip netns del ${n} 2> /dev/null + done cleanup_done=1 } @@ -196,7 +518,9 @@ mtu_parse() { next=0 for i in ${input}; do + [ ${next} -eq 1 -a "${i}" = "lock" ] && next=2 && continue [ ${next} -eq 1 ] && echo "${i}" && return + [ ${next} -eq 2 ] && echo "lock ${i}" && return [ "${i}" = "mtu" ] && next=1 done } @@ -229,8 +553,272 @@ route_get_dst_pmtu_from_exception() { mtu_parse "$(route_get_dst_exception "${ns_cmd}" ${dst})" } +check_pmtu_value() { + expected="${1}" + value="${2}" + event="${3}" + + [ "${expected}" = "any" ] && [ -n "${value}" ] && return 0 + [ "${value}" = "${expected}" ] && return 0 + [ -z "${value}" ] && err " PMTU exception wasn't created after ${event}" && return 1 + [ -z "${expected}" ] && err " PMTU exception shouldn't exist after ${event}" && return 1 + err " found PMTU exception with incorrect MTU ${value}, expected ${expected}, after ${event}" + return 1 +} + +test_pmtu_ipvX() { + family=${1} + + setup namespaces routing || return 2 + trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ + "${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \ + "${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \ + "${ns_r2}" veth_R2-B "${ns_b}" veth_B-R2 + + if [ ${family} -eq 4 ]; then + ping=ping + dst1="${prefix4}.${b_r1}.1" + dst2="${prefix4}.${b_r2}.1" + else + ping=${ping6} + dst1="${prefix6}:${b_r1}::1" + dst2="${prefix6}:${b_r2}::1" + fi + + # Set up initial MTU values + mtu "${ns_a}" veth_A-R1 2000 + mtu "${ns_r1}" veth_R1-A 2000 + mtu "${ns_r1}" veth_R1-B 1400 + mtu "${ns_b}" veth_B-R1 1400 + + mtu "${ns_a}" veth_A-R2 2000 + mtu "${ns_r2}" veth_R2-A 2000 + mtu "${ns_r2}" veth_R2-B 1500 + mtu "${ns_b}" veth_B-R2 1500 + + # Create route exceptions + ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1800 ${dst1} > /dev/null + ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1800 ${dst2} > /dev/null + + # Check that exceptions have been created with the correct PMTU + pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})" + check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1 + pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})" + check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1 + + # Decrease local MTU below PMTU, check for PMTU decrease in route exception + mtu "${ns_a}" veth_A-R1 1300 + mtu "${ns_r1}" veth_R1-A 1300 + pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})" + check_pmtu_value "1300" "${pmtu_1}" "decreasing local MTU" || return 1 + # Second exception shouldn't be modified + pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})" + check_pmtu_value "1500" "${pmtu_2}" "changing local MTU on a link not on this path" || return 1 + + # Increase MTU, check for PMTU increase in route exception + mtu "${ns_a}" veth_A-R1 1700 + mtu "${ns_r1}" veth_R1-A 1700 + pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})" + check_pmtu_value "1700" "${pmtu_1}" "increasing local MTU" || return 1 + # Second exception shouldn't be modified + pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})" + check_pmtu_value "1500" "${pmtu_2}" "changing local MTU on a link not on this path" || return 1 + + # Skip PMTU locking tests for IPv6 + [ $family -eq 6 ] && return 0 + + # Decrease remote MTU on path via R2, get new exception + mtu "${ns_r2}" veth_R2-B 400 + mtu "${ns_b}" veth_B-R2 400 + ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1400 ${dst2} > /dev/null + pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})" + check_pmtu_value "lock 552" "${pmtu_2}" "exceeding MTU, with MTU < min_pmtu" || return 1 + + # Decrease local MTU below PMTU + mtu "${ns_a}" veth_A-R2 500 + mtu "${ns_r2}" veth_R2-A 500 + pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})" + check_pmtu_value "500" "${pmtu_2}" "decreasing local MTU" || return 1 + + # Increase local MTU + mtu "${ns_a}" veth_A-R2 1500 + mtu "${ns_r2}" veth_R2-A 1500 + pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})" + check_pmtu_value "1500" "${pmtu_2}" "increasing local MTU" || return 1 + + # Get new exception + ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1400 ${dst2} > /dev/null + pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})" + check_pmtu_value "lock 552" "${pmtu_2}" "exceeding MTU, with MTU < min_pmtu" || return 1 +} + +test_pmtu_ipv4_exception() { + test_pmtu_ipvX 4 +} + +test_pmtu_ipv6_exception() { + test_pmtu_ipvX 6 +} + +test_pmtu_ipvX_over_vxlanY_or_geneveY_exception() { + type=${1} + family=${2} + outer_family=${3} + ll_mtu=4000 + + if [ ${outer_family} -eq 4 ]; then + setup namespaces routing ${type}4 || return 2 + # IPv4 header UDP header VXLAN/GENEVE header Ethernet header + exp_mtu=$((${ll_mtu} - 20 - 8 - 8 - 14)) + else + setup namespaces routing ${type}6 || return 2 + # IPv6 header UDP header VXLAN/GENEVE header Ethernet header + exp_mtu=$((${ll_mtu} - 40 - 8 - 8 - 14)) + fi + + trace "${ns_a}" ${type}_a "${ns_b}" ${type}_b \ + "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ + "${ns_b}" veth_B-R1 "${ns_r1}" veth_R1-B + + if [ ${family} -eq 4 ]; then + ping=ping + dst=${tunnel4_b_addr} + else + ping=${ping6} + dst=${tunnel6_b_addr} + fi + + # Create route exception by exceeding link layer MTU + mtu "${ns_a}" veth_A-R1 $((${ll_mtu} + 1000)) + mtu "${ns_r1}" veth_R1-A $((${ll_mtu} + 1000)) + mtu "${ns_b}" veth_B-R1 ${ll_mtu} + mtu "${ns_r1}" veth_R1-B ${ll_mtu} + + mtu "${ns_a}" ${type}_a $((${ll_mtu} + 1000)) + mtu "${ns_b}" ${type}_b $((${ll_mtu} + 1000)) + ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${dst} > /dev/null + + # Check that exception was created + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})" + check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on ${type} interface" +} + +test_pmtu_ipv4_vxlan4_exception() { + test_pmtu_ipvX_over_vxlanY_or_geneveY_exception vxlan 4 4 +} + +test_pmtu_ipv6_vxlan4_exception() { + test_pmtu_ipvX_over_vxlanY_or_geneveY_exception vxlan 6 4 +} + +test_pmtu_ipv4_geneve4_exception() { + test_pmtu_ipvX_over_vxlanY_or_geneveY_exception geneve 4 4 +} + +test_pmtu_ipv6_geneve4_exception() { + test_pmtu_ipvX_over_vxlanY_or_geneveY_exception geneve 6 4 +} + +test_pmtu_ipv4_vxlan6_exception() { + test_pmtu_ipvX_over_vxlanY_or_geneveY_exception vxlan 4 6 +} + +test_pmtu_ipv6_vxlan6_exception() { + test_pmtu_ipvX_over_vxlanY_or_geneveY_exception vxlan 6 6 +} + +test_pmtu_ipv4_geneve6_exception() { + test_pmtu_ipvX_over_vxlanY_or_geneveY_exception geneve 4 6 +} + +test_pmtu_ipv6_geneve6_exception() { + test_pmtu_ipvX_over_vxlanY_or_geneveY_exception geneve 6 6 +} + +test_pmtu_ipvX_over_fouY_or_gueY() { + inner_family=${1} + outer_family=${2} + encap=${3} + ll_mtu=4000 + + setup namespaces routing ${encap}${outer_family}${inner_family} || return 2 + trace "${ns_a}" ${encap}_a "${ns_b}" ${encap}_b \ + "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ + "${ns_b}" veth_B-R1 "${ns_r1}" veth_R1-B + + if [ ${inner_family} -eq 4 ]; then + ping=ping + dst=${tunnel4_b_addr} + else + ping=${ping6} + dst=${tunnel6_b_addr} + fi + + if [ "${encap}" = "gue" ]; then + encap_overhead=4 + else + encap_overhead=0 + fi + + if [ ${outer_family} -eq 4 ]; then + # IPv4 header UDP header + exp_mtu=$((${ll_mtu} - 20 - 8 - ${encap_overhead})) + else + # IPv6 header Option 4 UDP header + exp_mtu=$((${ll_mtu} - 40 - 8 - 8 - ${encap_overhead})) + fi + + # Create route exception by exceeding link layer MTU + mtu "${ns_a}" veth_A-R1 $((${ll_mtu} + 1000)) + mtu "${ns_r1}" veth_R1-A $((${ll_mtu} + 1000)) + mtu "${ns_b}" veth_B-R1 ${ll_mtu} + mtu "${ns_r1}" veth_R1-B ${ll_mtu} + + mtu "${ns_a}" ${encap}_a $((${ll_mtu} + 1000)) + mtu "${ns_b}" ${encap}_b $((${ll_mtu} + 1000)) + ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${dst} > /dev/null + + # Check that exception was created + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})" + check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on ${encap} interface" +} + +test_pmtu_ipv4_fou4_exception() { + test_pmtu_ipvX_over_fouY_or_gueY 4 4 fou +} + +test_pmtu_ipv6_fou4_exception() { + test_pmtu_ipvX_over_fouY_or_gueY 6 4 fou +} + +test_pmtu_ipv4_fou6_exception() { + test_pmtu_ipvX_over_fouY_or_gueY 4 6 fou +} + +test_pmtu_ipv6_fou6_exception() { + test_pmtu_ipvX_over_fouY_or_gueY 6 6 fou +} + +test_pmtu_ipv4_gue4_exception() { + test_pmtu_ipvX_over_fouY_or_gueY 4 4 gue +} + +test_pmtu_ipv6_gue4_exception() { + test_pmtu_ipvX_over_fouY_or_gueY 6 4 gue +} + +test_pmtu_ipv4_gue6_exception() { + test_pmtu_ipvX_over_fouY_or_gueY 4 6 gue +} + +test_pmtu_ipv6_gue6_exception() { + test_pmtu_ipvX_over_fouY_or_gueY 6 6 gue +} + test_pmtu_vti4_exception() { setup namespaces veth vti4 xfrm4 || return 2 + trace "${ns_a}" veth_a "${ns_b}" veth_b \ + "${ns_a}" vti4_a "${ns_b}" vti4_b veth_mtu=1500 vti_mtu=$((veth_mtu - 20)) @@ -246,30 +834,21 @@ test_pmtu_vti4_exception() { # Send DF packet without exceeding link layer MTU, check that no # exception is created - ${ns_a} ping -q -M want -i 0.1 -w 2 -s ${ping_payload} ${vti4_b_addr} > /dev/null - pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti4_b_addr})" - if [ "${pmtu}" != "" ]; then - err " unexpected exception created with PMTU ${pmtu} for IP payload length ${esp_payload_rfc4106}" - return 1 - fi + ${ns_a} ping -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel4_b_addr} > /dev/null + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})" + check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1 # Now exceed link layer MTU by one byte, check that exception is created - ${ns_a} ping -q -M want -i 0.1 -w 2 -s $((ping_payload + 1)) ${vti4_b_addr} > /dev/null - pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti4_b_addr})" - if [ "${pmtu}" = "" ]; then - err " exception not created for IP payload length $((esp_payload_rfc4106 + 1))" - return 1 - fi - - # ...with the right PMTU value - if [ ${pmtu} -ne ${esp_payload_rfc4106} ]; then - err " wrong PMTU ${pmtu} in exception, expected: ${esp_payload_rfc4106}" - return 1 - fi + # with the right PMTU value + ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((ping_payload + 1)) ${tunnel4_b_addr} > /dev/null + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})" + check_pmtu_value "${esp_payload_rfc4106}" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106 + 1)))" } test_pmtu_vti6_exception() { setup namespaces veth vti6 xfrm6 || return 2 + trace "${ns_a}" veth_a "${ns_b}" veth_b \ + "${ns_a}" vti6_a "${ns_b}" vti6_b fail=0 # Create route exception by exceeding link layer MTU @@ -277,28 +856,21 @@ test_pmtu_vti6_exception() { mtu "${ns_b}" veth_b 4000 mtu "${ns_a}" vti6_a 5000 mtu "${ns_b}" vti6_b 5000 - ${ns_a} ${ping6} -q -i 0.1 -w 2 -s 60000 ${vti6_b_addr} > /dev/null + ${ns_a} ${ping6} -q -i 0.1 -w 1 -s 60000 ${tunnel6_b_addr} > /dev/null # Check that exception was created - if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" = "" ]; then - err " tunnel exceeding link layer MTU didn't create route exception" - return 1 - fi + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})" + check_pmtu_value any "${pmtu}" "creating tunnel exceeding link layer MTU" || return 1 # Decrease tunnel MTU, check for PMTU decrease in route exception mtu "${ns_a}" vti6_a 3000 - - if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" -ne 3000 ]; then - err " decreasing tunnel MTU didn't decrease route exception PMTU" - fail=1 - fi + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})" + check_pmtu_value "3000" "${pmtu}" "decreasing tunnel MTU" || fail=1 # Increase tunnel MTU, check for PMTU increase in route exception mtu "${ns_a}" vti6_a 9000 - if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" -ne 9000 ]; then - err " increasing tunnel MTU didn't increase route exception PMTU" - fail=1 - fi + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})" + check_pmtu_value "9000" "${pmtu}" "increasing tunnel MTU" || fail=1 return ${fail} } @@ -445,15 +1017,111 @@ test_pmtu_vti6_link_change_mtu() { return ${fail} } -trap cleanup EXIT +check_command() { + cmd=${1} + + if ! which ${cmd} > /dev/null 2>&1; then + err " missing required command: '${cmd}'" + return 1 + fi + return 0 +} + +test_cleanup_vxlanX_exception() { + outer="${1}" + encap="vxlan" + ll_mtu=4000 + + check_command taskset || return 2 + cpu_list=$(grep -m 2 processor /proc/cpuinfo | cut -d ' ' -f 2) + + setup namespaces routing ${encap}${outer} || return 2 + trace "${ns_a}" ${encap}_a "${ns_b}" ${encap}_b \ + "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ + "${ns_b}" veth_B-R1 "${ns_r1}" veth_R1-B + + # Create route exception by exceeding link layer MTU + mtu "${ns_a}" veth_A-R1 $((${ll_mtu} + 1000)) + mtu "${ns_r1}" veth_R1-A $((${ll_mtu} + 1000)) + mtu "${ns_b}" veth_B-R1 ${ll_mtu} + mtu "${ns_r1}" veth_R1-B ${ll_mtu} + + mtu "${ns_a}" ${encap}_a $((${ll_mtu} + 1000)) + mtu "${ns_b}" ${encap}_b $((${ll_mtu} + 1000)) + + # Fill exception cache for multiple CPUs (2) + # we can always use inner IPv4 for that + for cpu in ${cpu_list}; do + taskset --cpu-list ${cpu} ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${tunnel4_b_addr} > /dev/null + done + + ${ns_a} ip link del dev veth_A-R1 & + iplink_pid=$! + sleep 1 + if [ "$(cat /proc/${iplink_pid}/cmdline 2>/dev/null | tr -d '\0')" = "iplinkdeldevveth_A-R1" ]; then + err " can't delete veth device in a timely manner, PMTU dst likely leaked" + return 1 + fi +} + +test_cleanup_ipv6_exception() { + test_cleanup_vxlanX_exception 6 +} + +test_cleanup_ipv4_exception() { + test_cleanup_vxlanX_exception 4 +} + +usage() { + echo + echo "$0 [OPTIONS] [TEST]..." + echo "If no TEST argument is given, all tests will be run." + echo + echo "Options" + echo " --trace: capture traffic to TEST_INTERFACE.pcap" + echo + echo "Available tests${tests}" + exit 1 +} exitcode=0 desc=0 IFS=" " + +tracing=0 +for arg do + if [ "${arg}" != "${arg#--*}" ]; then + opt="${arg#--}" + if [ "${opt}" = "trace" ]; then + if which tcpdump > /dev/null 2>&1; then + tracing=1 + else + echo "=== tcpdump not available, tracing disabled" + fi + else + usage + fi + else + # Check first that all requested tests are available before + # running any + command -v > /dev/null "test_${arg}" || { echo "=== Test ${arg} not found"; usage; } + fi +done + +trap cleanup EXIT + for t in ${tests}; do [ $desc -eq 0 ] && name="${t}" && desc=1 && continue || desc=0 + run_this=1 + for arg do + [ "${arg}" != "${arg#--*}" ] && continue + [ "${arg}" = "${name}" ] && run_this=1 && break + run_this=0 + done + [ $run_this -eq 0 ] && continue + ( unset IFS eval test_${name} diff --git a/tools/testing/selftests/net/reuseport_addr_any.c b/tools/testing/selftests/net/reuseport_addr_any.c new file mode 100644 index 000000000000..c6233935fed1 --- /dev/null +++ b/tools/testing/selftests/net/reuseport_addr_any.c @@ -0,0 +1,274 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* Test that sockets listening on a specific address are preferred + * over sockets listening on addr_any. + */ + +#define _GNU_SOURCE + +#include <arpa/inet.h> +#include <errno.h> +#include <error.h> +#include <linux/dccp.h> +#include <linux/in.h> +#include <linux/unistd.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/epoll.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <unistd.h> + +static const char *IP4_ADDR = "127.0.0.1"; +static const char *IP6_ADDR = "::1"; +static const char *IP4_MAPPED6 = "::ffff:127.0.0.1"; + +static const int PORT = 8888; + +static void build_rcv_fd(int family, int proto, int *rcv_fds, int count, + const char *addr_str) +{ + struct sockaddr_in addr4 = {0}; + struct sockaddr_in6 addr6 = {0}; + struct sockaddr *addr; + int opt, i, sz; + + memset(&addr, 0, sizeof(addr)); + + switch (family) { + case AF_INET: + addr4.sin_family = family; + if (!addr_str) + addr4.sin_addr.s_addr = htonl(INADDR_ANY); + else if (!inet_pton(family, addr_str, &addr4.sin_addr.s_addr)) + error(1, errno, "inet_pton failed: %s", addr_str); + addr4.sin_port = htons(PORT); + sz = sizeof(addr4); + addr = (struct sockaddr *)&addr4; + break; + case AF_INET6: + addr6.sin6_family = AF_INET6; + if (!addr_str) + addr6.sin6_addr = in6addr_any; + else if (!inet_pton(family, addr_str, &addr6.sin6_addr)) + error(1, errno, "inet_pton failed: %s", addr_str); + addr6.sin6_port = htons(PORT); + sz = sizeof(addr6); + addr = (struct sockaddr *)&addr6; + break; + default: + error(1, 0, "Unsupported family %d", family); + /* clang does not recognize error() above as terminating + * the program, so it complains that saddr, sz are + * not initialized when this code path is taken. Silence it. + */ + return; + } + + for (i = 0; i < count; ++i) { + rcv_fds[i] = socket(family, proto, 0); + if (rcv_fds[i] < 0) + error(1, errno, "failed to create receive socket"); + + opt = 1; + if (setsockopt(rcv_fds[i], SOL_SOCKET, SO_REUSEPORT, &opt, + sizeof(opt))) + error(1, errno, "failed to set SO_REUSEPORT"); + + if (bind(rcv_fds[i], addr, sz)) + error(1, errno, "failed to bind receive socket"); + + if (proto == SOCK_STREAM && listen(rcv_fds[i], 10)) + error(1, errno, "tcp: failed to listen on receive port"); + else if (proto == SOCK_DCCP) { + if (setsockopt(rcv_fds[i], SOL_DCCP, + DCCP_SOCKOPT_SERVICE, + &(int) {htonl(42)}, sizeof(int))) + error(1, errno, "failed to setsockopt"); + + if (listen(rcv_fds[i], 10)) + error(1, errno, "dccp: failed to listen on receive port"); + } + } +} + +static int connect_and_send(int family, int proto) +{ + struct sockaddr_in saddr4 = {0}; + struct sockaddr_in daddr4 = {0}; + struct sockaddr_in6 saddr6 = {0}; + struct sockaddr_in6 daddr6 = {0}; + struct sockaddr *saddr, *daddr; + int fd, sz; + + switch (family) { + case AF_INET: + saddr4.sin_family = AF_INET; + saddr4.sin_addr.s_addr = htonl(INADDR_ANY); + saddr4.sin_port = 0; + + daddr4.sin_family = AF_INET; + if (!inet_pton(family, IP4_ADDR, &daddr4.sin_addr.s_addr)) + error(1, errno, "inet_pton failed: %s", IP4_ADDR); + daddr4.sin_port = htons(PORT); + + sz = sizeof(saddr4); + saddr = (struct sockaddr *)&saddr4; + daddr = (struct sockaddr *)&daddr4; + break; + case AF_INET6: + saddr6.sin6_family = AF_INET6; + saddr6.sin6_addr = in6addr_any; + + daddr6.sin6_family = AF_INET6; + if (!inet_pton(family, IP6_ADDR, &daddr6.sin6_addr)) + error(1, errno, "inet_pton failed: %s", IP6_ADDR); + daddr6.sin6_port = htons(PORT); + + sz = sizeof(saddr6); + saddr = (struct sockaddr *)&saddr6; + daddr = (struct sockaddr *)&daddr6; + break; + default: + error(1, 0, "Unsupported family %d", family); + /* clang does not recognize error() above as terminating + * the program, so it complains that saddr, daddr, sz are + * not initialized when this code path is taken. Silence it. + */ + return -1; + } + + fd = socket(family, proto, 0); + if (fd < 0) + error(1, errno, "failed to create send socket"); + + if (proto == SOCK_DCCP && + setsockopt(fd, SOL_DCCP, DCCP_SOCKOPT_SERVICE, + &(int){htonl(42)}, sizeof(int))) + error(1, errno, "failed to setsockopt"); + + if (bind(fd, saddr, sz)) + error(1, errno, "failed to bind send socket"); + + if (connect(fd, daddr, sz)) + error(1, errno, "failed to connect send socket"); + + if (send(fd, "a", 1, 0) < 0) + error(1, errno, "failed to send message"); + + return fd; +} + +static int receive_once(int epfd, int proto) +{ + struct epoll_event ev; + int i, fd; + char buf[8]; + + i = epoll_wait(epfd, &ev, 1, 3); + if (i < 0) + error(1, errno, "epoll_wait failed"); + + if (proto == SOCK_STREAM || proto == SOCK_DCCP) { + fd = accept(ev.data.fd, NULL, NULL); + if (fd < 0) + error(1, errno, "failed to accept"); + i = recv(fd, buf, sizeof(buf), 0); + close(fd); + } else { + i = recv(ev.data.fd, buf, sizeof(buf), 0); + } + + if (i < 0) + error(1, errno, "failed to recv"); + + return ev.data.fd; +} + +static void test(int *rcv_fds, int count, int family, int proto, int fd) +{ + struct epoll_event ev; + int epfd, i, send_fd, recv_fd; + + epfd = epoll_create(1); + if (epfd < 0) + error(1, errno, "failed to create epoll"); + + ev.events = EPOLLIN; + for (i = 0; i < count; ++i) { + ev.data.fd = rcv_fds[i]; + if (epoll_ctl(epfd, EPOLL_CTL_ADD, rcv_fds[i], &ev)) + error(1, errno, "failed to register sock epoll"); + } + + send_fd = connect_and_send(family, proto); + + recv_fd = receive_once(epfd, proto); + if (recv_fd != fd) + error(1, 0, "received on an unexpected socket"); + + close(send_fd); + close(epfd); +} + + +static void run_one_test(int fam_send, int fam_rcv, int proto, + const char *addr_str) +{ + /* Below we test that a socket listening on a specific address + * is always selected in preference over a socket listening + * on addr_any. Bugs where this is not the case often result + * in sockets created first or last to get picked. So below + * we make sure that there are always addr_any sockets created + * before and after a specific socket is created. + */ + int rcv_fds[10], i; + + build_rcv_fd(AF_INET, proto, rcv_fds, 2, NULL); + build_rcv_fd(AF_INET6, proto, rcv_fds + 2, 2, NULL); + build_rcv_fd(fam_rcv, proto, rcv_fds + 4, 1, addr_str); + build_rcv_fd(AF_INET, proto, rcv_fds + 5, 2, NULL); + build_rcv_fd(AF_INET6, proto, rcv_fds + 7, 2, NULL); + test(rcv_fds, 9, fam_send, proto, rcv_fds[4]); + for (i = 0; i < 9; ++i) + close(rcv_fds[i]); + fprintf(stderr, "pass\n"); +} + +static void test_proto(int proto, const char *proto_str) +{ + if (proto == SOCK_DCCP) { + int test_fd; + + test_fd = socket(AF_INET, proto, 0); + if (test_fd < 0) { + if (errno == ESOCKTNOSUPPORT) { + fprintf(stderr, "DCCP not supported: skipping DCCP tests\n"); + return; + } else + error(1, errno, "failed to create a DCCP socket"); + } + close(test_fd); + } + + fprintf(stderr, "%s IPv4 ... ", proto_str); + run_one_test(AF_INET, AF_INET, proto, IP4_ADDR); + + fprintf(stderr, "%s IPv6 ... ", proto_str); + run_one_test(AF_INET6, AF_INET6, proto, IP6_ADDR); + + fprintf(stderr, "%s IPv4 mapped to IPv6 ... ", proto_str); + run_one_test(AF_INET, AF_INET6, proto, IP4_MAPPED6); +} + +int main(void) +{ + test_proto(SOCK_DGRAM, "UDP"); + test_proto(SOCK_STREAM, "TCP"); + test_proto(SOCK_DCCP, "DCCP"); + + fprintf(stderr, "SUCCESS\n"); + return 0; +} diff --git a/tools/testing/selftests/net/reuseport_addr_any.sh b/tools/testing/selftests/net/reuseport_addr_any.sh new file mode 100755 index 000000000000..104592f62ad4 --- /dev/null +++ b/tools/testing/selftests/net/reuseport_addr_any.sh @@ -0,0 +1,4 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +./in_netns.sh ./reuseport_addr_any diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index e101af52d1d6..78fc593dfe40 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -205,6 +205,8 @@ kci_test_polrouting() kci_test_route_get() { + local hash_policy=$(sysctl -n net.ipv4.fib_multipath_hash_policy) + ret=0 ip route get 127.0.0.1 > /dev/null @@ -223,6 +225,19 @@ kci_test_route_get() check_err $? ip route get 10.23.7.11 from 10.23.7.12 iif "$devdummy" > /dev/null check_err $? + ip route add 10.23.8.0/24 \ + nexthop via 10.23.7.13 dev "$devdummy" \ + nexthop via 10.23.7.14 dev "$devdummy" + check_err $? + sysctl -wq net.ipv4.fib_multipath_hash_policy=0 + ip route get 10.23.8.11 > /dev/null + check_err $? + sysctl -wq net.ipv4.fib_multipath_hash_policy=1 + ip route get 10.23.8.11 > /dev/null + check_err $? + sysctl -wq net.ipv4.fib_multipath_hash_policy="$hash_policy" + ip route del 10.23.8.0/24 + check_err $? ip addr del dev "$devdummy" 10.23.7.11/24 check_err $? @@ -955,6 +970,111 @@ kci_test_ip6erspan() ip netns del "$testns" } +kci_test_fdb_get() +{ + IP="ip -netns testns" + BRIDGE="bridge -netns testns" + brdev="test-br0" + vxlandev="vxlan10" + test_mac=de:ad:be:ef:13:37 + localip="10.0.2.2" + dstip="10.0.2.3" + ret=0 + + bridge fdb help 2>&1 |grep -q 'bridge fdb get' + if [ $? -ne 0 ];then + echo "SKIP: fdb get tests: iproute2 too old" + return $ksft_skip + fi + + ip netns add testns + if [ $? -ne 0 ]; then + echo "SKIP fdb get tests: cannot add net namespace $testns" + return $ksft_skip + fi + + $IP link add "$vxlandev" type vxlan id 10 local $localip \ + dstport 4789 2>/dev/null + check_err $? + $IP link add name "$brdev" type bridge &>/dev/null + check_err $? + $IP link set dev "$vxlandev" master "$brdev" &>/dev/null + check_err $? + $BRIDGE fdb add $test_mac dev "$vxlandev" master &>/dev/null + check_err $? + $BRIDGE fdb add $test_mac dev "$vxlandev" dst $dstip self &>/dev/null + check_err $? + + $BRIDGE fdb get $test_mac brport "$vxlandev" 2>/dev/null | grep -q "dev $vxlandev master $brdev" + check_err $? + $BRIDGE fdb get $test_mac br "$brdev" 2>/dev/null | grep -q "dev $vxlandev master $brdev" + check_err $? + $BRIDGE fdb get $test_mac dev "$vxlandev" self 2>/dev/null | grep -q "dev $vxlandev dst $dstip" + check_err $? + + ip netns del testns &>/dev/null + + if [ $ret -ne 0 ]; then + echo "FAIL: bridge fdb get" + return 1 + fi + + echo "PASS: bridge fdb get" +} + +kci_test_neigh_get() +{ + dstmac=de:ad:be:ef:13:37 + dstip=10.0.2.4 + dstip6=dead::2 + ret=0 + + ip neigh help 2>&1 |grep -q 'ip neigh get' + if [ $? -ne 0 ];then + echo "SKIP: fdb get tests: iproute2 too old" + return $ksft_skip + fi + + # ipv4 + ip neigh add $dstip lladdr $dstmac dev "$devdummy" > /dev/null + check_err $? + ip neigh get $dstip dev "$devdummy" 2> /dev/null | grep -q "$dstmac" + check_err $? + ip neigh del $dstip lladdr $dstmac dev "$devdummy" > /dev/null + check_err $? + + # ipv4 proxy + ip neigh add proxy $dstip dev "$devdummy" > /dev/null + check_err $? + ip neigh get proxy $dstip dev "$devdummy" 2>/dev/null | grep -q "$dstip" + check_err $? + ip neigh del proxy $dstip dev "$devdummy" > /dev/null + check_err $? + + # ipv6 + ip neigh add $dstip6 lladdr $dstmac dev "$devdummy" > /dev/null + check_err $? + ip neigh get $dstip6 dev "$devdummy" 2> /dev/null | grep -q "$dstmac" + check_err $? + ip neigh del $dstip6 lladdr $dstmac dev "$devdummy" > /dev/null + check_err $? + + # ipv6 proxy + ip neigh add proxy $dstip6 dev "$devdummy" > /dev/null + check_err $? + ip neigh get proxy $dstip6 dev "$devdummy" 2>/dev/null | grep -q "$dstip6" + check_err $? + ip neigh del proxy $dstip6 dev "$devdummy" > /dev/null + check_err $? + + if [ $ret -ne 0 ];then + echo "FAIL: neigh get" + return 1 + fi + + echo "PASS: neigh get" +} + kci_test_rtnl() { kci_add_dummy @@ -979,6 +1099,8 @@ kci_test_rtnl() kci_test_macsec kci_test_ipsec kci_test_ipsec_offload + kci_test_fdb_get + kci_test_neigh_get kci_del_dummy } diff --git a/tools/testing/selftests/net/run_afpackettests b/tools/testing/selftests/net/run_afpackettests index bea079edc278..2dc95fda7ef7 100755 --- a/tools/testing/selftests/net/run_afpackettests +++ b/tools/testing/selftests/net/run_afpackettests @@ -25,3 +25,13 @@ if [ $? -ne 0 ]; then else echo "[PASS]" fi + +echo "--------------------" +echo "running txring_overwrite test" +echo "--------------------" +./in_netns.sh ./txring_overwrite +if [ $? -ne 0 ]; then + echo "[FAIL]" +else + echo "[PASS]" +fi diff --git a/tools/testing/selftests/net/test_vxlan_fdb_changelink.sh b/tools/testing/selftests/net/test_vxlan_fdb_changelink.sh new file mode 100755 index 000000000000..2d442cdab11e --- /dev/null +++ b/tools/testing/selftests/net/test_vxlan_fdb_changelink.sh @@ -0,0 +1,29 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Check FDB default-remote handling across "ip link set". + +check_remotes() +{ + local what=$1; shift + local N=$(bridge fdb sh dev vx | grep 00:00:00:00:00:00 | wc -l) + + echo -ne "expected two remotes after $what\t" + if [[ $N != 2 ]]; then + echo "[FAIL]" + EXIT_STATUS=1 + else + echo "[ OK ]" + fi +} + +ip link add name vx up type vxlan id 2000 dstport 4789 +bridge fdb ap dev vx 00:00:00:00:00:00 dst 192.0.2.20 self permanent +bridge fdb ap dev vx 00:00:00:00:00:00 dst 192.0.2.30 self permanent +check_remotes "fdb append" + +ip link set dev vx type vxlan remote 192.0.2.30 +check_remotes "link set" + +ip link del dev vx +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/test_vxlan_under_vrf.sh b/tools/testing/selftests/net/test_vxlan_under_vrf.sh new file mode 100755 index 000000000000..09f9ed92cbe4 --- /dev/null +++ b/tools/testing/selftests/net/test_vxlan_under_vrf.sh @@ -0,0 +1,129 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test is for checking VXLAN underlay in a non-default VRF. +# +# It simulates two hypervisors running a VM each using four network namespaces: +# two for the HVs, two for the VMs. +# A small VXLAN tunnel is made between the two hypervisors to have the two vms +# in the same virtual L2: +# +# +-------------------+ +-------------------+ +# | | | | +# | vm-1 netns | | vm-2 netns | +# | | | | +# | +-------------+ | | +-------------+ | +# | | veth-hv | | | | veth-hv | | +# | | 10.0.0.1/24 | | | | 10.0.0.2/24 | | +# | +-------------+ | | +-------------+ | +# | . | | . | +# +-------------------+ +-------------------+ +# . . +# . . +# . . +# +-----------------------------------+ +------------------------------------+ +# | . | | . | +# | +----------+ | | +----------+ | +# | | veth-tap | | | | veth-tap | | +# | +----+-----+ | | +----+-----+ | +# | | | | | | +# | +--+--+ +--------------+ | | +--------------+ +--+--+ | +# | | br0 | | vrf-underlay | | | | vrf-underlay | | br0 | | +# | +--+--+ +-------+------+ | | +------+-------+ +--+--+ | +# | | | | | | | | +# | +---+----+ +-------+-------+ | | +-------+-------+ +---+----+ | +# | | vxlan0 |....| veth0 |.|...|.| veth0 |....| vxlan0 | | +# | +--------+ | 172.16.0.1/24 | | | | 172.16.0.2/24 | +--------+ | +# | +---------------+ | | +---------------+ | +# | | | | +# | hv-1 netns | | hv-2 netns | +# | | | | +# +-----------------------------------+ +------------------------------------+ +# +# This tests both the connectivity between vm-1 and vm-2, and that the underlay +# can be moved in and out of the vrf by unsetting and setting veth0's master. + +set -e + +cleanup() { + ip link del veth-hv-1 2>/dev/null || true + ip link del veth-tap 2>/dev/null || true + + for ns in hv-1 hv-2 vm-1 vm-2; do + ip netns del $ns || true + done +} + +# Clean start +cleanup &> /dev/null + +[[ $1 == "clean" ]] && exit 0 + +trap cleanup EXIT + +# Setup "Hypervisors" simulated with netns +ip link add veth-hv-1 type veth peer name veth-hv-2 +setup-hv-networking() { + hv=$1 + + ip netns add hv-$hv + ip link set veth-hv-$hv netns hv-$hv + ip -netns hv-$hv link set veth-hv-$hv name veth0 + + ip -netns hv-$hv link add vrf-underlay type vrf table 1 + ip -netns hv-$hv link set vrf-underlay up + ip -netns hv-$hv addr add 172.16.0.$hv/24 dev veth0 + ip -netns hv-$hv link set veth0 up + + ip -netns hv-$hv link add br0 type bridge + ip -netns hv-$hv link set br0 up + + ip -netns hv-$hv link add vxlan0 type vxlan id 10 local 172.16.0.$hv dev veth0 dstport 4789 + ip -netns hv-$hv link set vxlan0 master br0 + ip -netns hv-$hv link set vxlan0 up +} +setup-hv-networking 1 +setup-hv-networking 2 + +# Check connectivity between HVs by pinging hv-2 from hv-1 +echo -n "Checking HV connectivity " +ip netns exec hv-1 ping -c 1 -W 1 172.16.0.2 &> /dev/null || (echo "[FAIL]"; false) +echo "[ OK ]" + +# Setups a "VM" simulated by a netns an a veth pair +setup-vm() { + id=$1 + + ip netns add vm-$id + ip link add veth-tap type veth peer name veth-hv + + ip link set veth-tap netns hv-$id + ip -netns hv-$id link set veth-tap master br0 + ip -netns hv-$id link set veth-tap up + + ip link set veth-hv netns vm-$id + ip -netns vm-$id addr add 10.0.0.$id/24 dev veth-hv + ip -netns vm-$id link set veth-hv up +} +setup-vm 1 +setup-vm 2 + +# Setup VTEP routes to make ARP work +bridge -netns hv-1 fdb add 00:00:00:00:00:00 dev vxlan0 dst 172.16.0.2 self permanent +bridge -netns hv-2 fdb add 00:00:00:00:00:00 dev vxlan0 dst 172.16.0.1 self permanent + +echo -n "Check VM connectivity through VXLAN (underlay in the default VRF) " +ip netns exec vm-1 ping -c 1 -W 1 10.0.0.2 &> /dev/null || (echo "[FAIL]"; false) +echo "[ OK ]" + +# Move the underlay to a non-default VRF +ip -netns hv-1 link set veth0 vrf vrf-underlay +ip -netns hv-1 link set veth0 down +ip -netns hv-1 link set veth0 up +ip -netns hv-2 link set veth0 vrf vrf-underlay +ip -netns hv-2 link set veth0 down +ip -netns hv-2 link set veth0 up + +echo -n "Check VM connectivity through VXLAN (underlay in a VRF) " +ip netns exec vm-1 ping -c 1 -W 1 10.0.0.2 &> /dev/null || (echo "[FAIL]"; false) +echo "[ OK ]" diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 8fdfeafaf8c0..fac68d710f35 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -121,11 +121,11 @@ TEST_F(tls, send_then_sendfile) buf = (char *)malloc(st.st_size); EXPECT_EQ(send(self->fd, test_str, to_send, 0), to_send); - EXPECT_EQ(recv(self->cfd, recv_buf, to_send, 0), to_send); + EXPECT_EQ(recv(self->cfd, recv_buf, to_send, MSG_WAITALL), to_send); EXPECT_EQ(memcmp(test_str, recv_buf, to_send), 0); EXPECT_GE(sendfile(self->fd, filefd, 0, st.st_size), 0); - EXPECT_EQ(recv(self->cfd, buf, st.st_size, 0), st.st_size); + EXPECT_EQ(recv(self->cfd, buf, st.st_size, MSG_WAITALL), st.st_size); } TEST_F(tls, recv_max) @@ -160,7 +160,7 @@ TEST_F(tls, msg_more) EXPECT_EQ(send(self->fd, test_str, send_len, MSG_MORE), send_len); EXPECT_EQ(recv(self->cfd, buf, send_len, MSG_DONTWAIT), -1); EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len); - EXPECT_EQ(recv(self->cfd, buf, send_len * 2, MSG_DONTWAIT), + EXPECT_EQ(recv(self->cfd, buf, send_len * 2, MSG_WAITALL), send_len * 2); EXPECT_EQ(memcmp(buf, test_str, send_len), 0); } @@ -180,7 +180,7 @@ TEST_F(tls, sendmsg_single) msg.msg_iov = &vec; msg.msg_iovlen = 1; EXPECT_EQ(sendmsg(self->fd, &msg, 0), send_len); - EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len); + EXPECT_EQ(recv(self->cfd, buf, send_len, MSG_WAITALL), send_len); EXPECT_EQ(memcmp(buf, test_str, send_len), 0); } @@ -288,7 +288,7 @@ TEST_F(tls, splice_from_pipe) ASSERT_GE(pipe(p), 0); EXPECT_GE(write(p[1], mem_send, send_len), 0); EXPECT_GE(splice(p[0], NULL, self->fd, NULL, send_len, 0), 0); - EXPECT_GE(recv(self->cfd, mem_recv, send_len, 0), 0); + EXPECT_EQ(recv(self->cfd, mem_recv, send_len, MSG_WAITALL), send_len); EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0); } @@ -306,7 +306,7 @@ TEST_F(tls, splice_from_pipe2) EXPECT_GE(splice(p[0], NULL, self->fd, NULL, 8000, 0), 0); EXPECT_GE(write(p2[1], mem_send + 8000, 8000), 0); EXPECT_GE(splice(p2[0], NULL, self->fd, NULL, 8000, 0), 0); - EXPECT_GE(recv(self->cfd, mem_recv, send_len, 0), 0); + EXPECT_EQ(recv(self->cfd, mem_recv, send_len, MSG_WAITALL), send_len); EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0); } @@ -322,13 +322,13 @@ TEST_F(tls, send_and_splice) ASSERT_GE(pipe(p), 0); EXPECT_EQ(send(self->fd, test_str, send_len2, 0), send_len2); - EXPECT_NE(recv(self->cfd, buf, send_len2, 0), -1); + EXPECT_EQ(recv(self->cfd, buf, send_len2, MSG_WAITALL), send_len2); EXPECT_EQ(memcmp(test_str, buf, send_len2), 0); EXPECT_GE(write(p[1], mem_send, send_len), send_len); EXPECT_GE(splice(p[0], NULL, self->fd, NULL, send_len, 0), send_len); - EXPECT_GE(recv(self->cfd, mem_recv, send_len, 0), 0); + EXPECT_EQ(recv(self->cfd, mem_recv, send_len, MSG_WAITALL), send_len); EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0); } @@ -436,7 +436,7 @@ TEST_F(tls, multiple_send_single_recv) EXPECT_GE(send(self->fd, send_mem, send_len, 0), 0); EXPECT_GE(send(self->fd, send_mem, send_len, 0), 0); memset(recv_mem, 0, total_len); - EXPECT_EQ(recv(self->cfd, recv_mem, total_len, 0), total_len); + EXPECT_EQ(recv(self->cfd, recv_mem, total_len, MSG_WAITALL), total_len); EXPECT_EQ(memcmp(send_mem, recv_mem, send_len), 0); EXPECT_EQ(memcmp(send_mem, recv_mem + send_len, send_len), 0); @@ -516,17 +516,17 @@ TEST_F(tls, recv_peek_multiple_records) len = strlen(test_str_second) + 1; EXPECT_EQ(send(self->fd, test_str_second, len, 0), len); - len = sizeof(buf); + len = strlen(test_str_first); memset(buf, 0, len); - EXPECT_NE(recv(self->cfd, buf, len, MSG_PEEK), -1); + EXPECT_EQ(recv(self->cfd, buf, len, MSG_PEEK | MSG_WAITALL), len); /* MSG_PEEK can only peek into the current record. */ - len = strlen(test_str_first) + 1; + len = strlen(test_str_first); EXPECT_EQ(memcmp(test_str_first, buf, len), 0); - len = sizeof(buf); + len = strlen(test_str) + 1; memset(buf, 0, len); - EXPECT_NE(recv(self->cfd, buf, len, 0), -1); + EXPECT_EQ(recv(self->cfd, buf, len, MSG_WAITALL), len); /* Non-MSG_PEEK will advance strparser (and therefore record) * however. @@ -543,6 +543,28 @@ TEST_F(tls, recv_peek_multiple_records) len = strlen(test_str_second) + 1; EXPECT_EQ(send(self->fd, test_str_second, len, 0), len); + len = strlen(test_str) + 1; + memset(buf, 0, len); + EXPECT_EQ(recv(self->cfd, buf, len, MSG_PEEK | MSG_WAITALL), len); + + len = strlen(test_str) + 1; + EXPECT_EQ(memcmp(test_str, buf, len), 0); +} + +TEST_F(tls, recv_peek_large_buf_mult_recs) +{ + char const *test_str = "test_read_peek_mult_recs"; + char const *test_str_first = "test_read_peek"; + char const *test_str_second = "_mult_recs"; + int len; + char buf[64]; + + len = strlen(test_str_first); + EXPECT_EQ(send(self->fd, test_str_first, len, 0), len); + + len = strlen(test_str_second) + 1; + EXPECT_EQ(send(self->fd, test_str_second, len, 0), len); + len = sizeof(buf); memset(buf, 0, len); EXPECT_NE(recv(self->cfd, buf, len, MSG_PEEK), -1); @@ -551,6 +573,7 @@ TEST_F(tls, recv_peek_multiple_records) EXPECT_EQ(memcmp(test_str, buf, len), 0); } + TEST_F(tls, pollin) { char const *test_str = "test_poll"; @@ -564,7 +587,7 @@ TEST_F(tls, pollin) EXPECT_EQ(poll(&fd, 1, 20), 1); EXPECT_EQ(fd.revents & POLLIN, 1); - EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len); + EXPECT_EQ(recv(self->cfd, buf, send_len, MSG_WAITALL), send_len); /* Test timing out */ EXPECT_EQ(poll(&fd, 1, 20), 0); } @@ -582,7 +605,7 @@ TEST_F(tls, poll_wait) /* Set timeout to inf. secs */ EXPECT_EQ(poll(&fd, 1, -1), 1); EXPECT_EQ(fd.revents & POLLIN, 1); - EXPECT_EQ(recv(self->cfd, recv_mem, send_len, 0), send_len); + EXPECT_EQ(recv(self->cfd, recv_mem, send_len, MSG_WAITALL), send_len); } TEST_F(tls, blocking) @@ -728,7 +751,7 @@ TEST_F(tls, control_msg) EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1); vec.iov_base = buf; - EXPECT_EQ(recvmsg(self->cfd, &msg, 0), send_len); + EXPECT_EQ(recvmsg(self->cfd, &msg, MSG_WAITALL), send_len); cmsg = CMSG_FIRSTHDR(&msg); EXPECT_NE(cmsg, NULL); EXPECT_EQ(cmsg->cmsg_level, SOL_TLS); diff --git a/tools/testing/selftests/net/txring_overwrite.c b/tools/testing/selftests/net/txring_overwrite.c new file mode 100644 index 000000000000..fd8b1c663c39 --- /dev/null +++ b/tools/testing/selftests/net/txring_overwrite.c @@ -0,0 +1,179 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Verify that consecutive sends over packet tx_ring are mirrored + * with their original content intact. + */ + +#define _GNU_SOURCE + +#include <arpa/inet.h> +#include <assert.h> +#include <error.h> +#include <errno.h> +#include <fcntl.h> +#include <linux/filter.h> +#include <linux/if_packet.h> +#include <net/ethernet.h> +#include <net/if.h> +#include <netinet/in.h> +#include <netinet/ip.h> +#include <netinet/udp.h> +#include <poll.h> +#include <pthread.h> +#include <sched.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <sys/socket.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/utsname.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +const int eth_off = TPACKET_HDRLEN - sizeof(struct sockaddr_ll); +const int cfg_frame_size = 1000; + +static void build_packet(void *buffer, size_t blen, char payload_char) +{ + struct udphdr *udph; + struct ethhdr *eth; + struct iphdr *iph; + size_t off = 0; + + memset(buffer, 0, blen); + + eth = buffer; + eth->h_proto = htons(ETH_P_IP); + + off += sizeof(*eth); + iph = buffer + off; + iph->ttl = 8; + iph->ihl = 5; + iph->version = 4; + iph->saddr = htonl(INADDR_LOOPBACK); + iph->daddr = htonl(INADDR_LOOPBACK + 1); + iph->protocol = IPPROTO_UDP; + iph->tot_len = htons(blen - off); + iph->check = 0; + + off += sizeof(*iph); + udph = buffer + off; + udph->dest = htons(8000); + udph->source = htons(8001); + udph->len = htons(blen - off); + udph->check = 0; + + off += sizeof(*udph); + memset(buffer + off, payload_char, blen - off); +} + +static int setup_rx(void) +{ + int fdr; + + fdr = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_IP)); + if (fdr == -1) + error(1, errno, "socket r"); + + return fdr; +} + +static int setup_tx(char **ring) +{ + struct sockaddr_ll laddr = {}; + struct tpacket_req req = {}; + int fdt; + + fdt = socket(PF_PACKET, SOCK_RAW, 0); + if (fdt == -1) + error(1, errno, "socket t"); + + laddr.sll_family = AF_PACKET; + laddr.sll_protocol = htons(0); + laddr.sll_ifindex = if_nametoindex("lo"); + if (!laddr.sll_ifindex) + error(1, errno, "if_nametoindex"); + + if (bind(fdt, (void *)&laddr, sizeof(laddr))) + error(1, errno, "bind fdt"); + + req.tp_block_size = getpagesize(); + req.tp_block_nr = 1; + req.tp_frame_size = getpagesize(); + req.tp_frame_nr = 1; + + if (setsockopt(fdt, SOL_PACKET, PACKET_TX_RING, + (void *)&req, sizeof(req))) + error(1, errno, "setsockopt ring"); + + *ring = mmap(0, req.tp_block_size * req.tp_block_nr, + PROT_READ | PROT_WRITE, MAP_SHARED, fdt, 0); + if (!*ring) + error(1, errno, "mmap"); + + return fdt; +} + +static void send_pkt(int fdt, void *slot, char payload_char) +{ + struct tpacket_hdr *header = slot; + int ret; + + while (header->tp_status != TP_STATUS_AVAILABLE) + usleep(1000); + + build_packet(slot + eth_off, cfg_frame_size, payload_char); + + header->tp_len = cfg_frame_size; + header->tp_status = TP_STATUS_SEND_REQUEST; + + ret = sendto(fdt, NULL, 0, 0, NULL, 0); + if (ret == -1) + error(1, errno, "kick tx"); +} + +static int read_verify_pkt(int fdr, char payload_char) +{ + char buf[100]; + int ret; + + ret = read(fdr, buf, sizeof(buf)); + if (ret != sizeof(buf)) + error(1, errno, "read"); + + if (buf[60] != payload_char) { + printf("wrong pattern: 0x%x != 0x%x\n", buf[60], payload_char); + return 1; + } + + printf("read: %c (0x%x)\n", buf[60], buf[60]); + return 0; +} + +int main(int argc, char **argv) +{ + const char payload_patterns[] = "ab"; + char *ring; + int fdr, fdt, ret = 0; + + fdr = setup_rx(); + fdt = setup_tx(&ring); + + send_pkt(fdt, ring, payload_patterns[0]); + send_pkt(fdt, ring, payload_patterns[1]); + + ret |= read_verify_pkt(fdr, payload_patterns[0]); + ret |= read_verify_pkt(fdr, payload_patterns[1]); + + if (close(fdt)) + error(1, errno, "close t"); + if (close(fdr)) + error(1, errno, "close r"); + + return ret; +} diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh new file mode 100755 index 000000000000..ac2a30be9b32 --- /dev/null +++ b/tools/testing/selftests/net/udpgro.sh @@ -0,0 +1,182 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Run a series of udpgro functional tests. + +readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" + +cleanup() { + local -r jobs="$(jobs -p)" + local -r ns="$(ip netns list|grep $PEER_NS)" + + [ -n "${jobs}" ] && kill -1 ${jobs} 2>/dev/null + [ -n "$ns" ] && ip netns del $ns 2>/dev/null +} +trap cleanup EXIT + +cfg_veth() { + ip netns add "${PEER_NS}" + ip -netns "${PEER_NS}" link set lo up + ip link add type veth + ip link set dev veth0 up + ip addr add dev veth0 192.168.1.2/24 + ip addr add dev veth0 2001:db8::2/64 nodad + + ip link set dev veth1 netns "${PEER_NS}" + ip -netns "${PEER_NS}" addr add dev veth1 192.168.1.1/24 + ip -netns "${PEER_NS}" addr add dev veth1 2001:db8::1/64 nodad + ip -netns "${PEER_NS}" link set dev veth1 up + ip -n "${PEER_NS}" link set veth1 xdp object ../bpf/xdp_dummy.o section xdp_dummy +} + +run_one() { + # use 'rx' as separator between sender args and receiver args + local -r all="$@" + local -r tx_args=${all%rx*} + local -r rx_args=${all#*rx} + + cfg_veth + + ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${rx_args} && \ + echo "ok" || \ + echo "failed" & + + # Hack: let bg programs complete the startup + sleep 0.1 + ./udpgso_bench_tx ${tx_args} + wait $(jobs -p) +} + +run_test() { + local -r args=$@ + + printf " %-40s" "$1" + ./in_netns.sh $0 __subprocess $2 rx -G -r $3 +} + +run_one_nat() { + # use 'rx' as separator between sender args and receiver args + local addr1 addr2 pid family="" ipt_cmd=ip6tables + local -r all="$@" + local -r tx_args=${all%rx*} + local -r rx_args=${all#*rx} + + if [[ ${tx_args} = *-4* ]]; then + ipt_cmd=iptables + family=-4 + addr1=192.168.1.1 + addr2=192.168.1.3/24 + else + addr1=2001:db8::1 + addr2="2001:db8::3/64 nodad" + fi + + cfg_veth + ip -netns "${PEER_NS}" addr add dev veth1 ${addr2} + + # fool the GRO engine changing the destination address ... + ip netns exec "${PEER_NS}" $ipt_cmd -t nat -I PREROUTING -d ${addr1} -j DNAT --to-destination ${addr2%/*} + + # ... so that GRO will match the UDP_GRO enabled socket, but packets + # will land on the 'plain' one + ip netns exec "${PEER_NS}" ./udpgso_bench_rx -G ${family} -b ${addr1} -n 0 & + pid=$! + ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${family} -b ${addr2%/*} ${rx_args} && \ + echo "ok" || \ + echo "failed"& + + sleep 0.1 + ./udpgso_bench_tx ${tx_args} + kill -INT $pid + wait $(jobs -p) +} + +run_one_2sock() { + # use 'rx' as separator between sender args and receiver args + local -r all="$@" + local -r tx_args=${all%rx*} + local -r rx_args=${all#*rx} + + cfg_veth + + ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${rx_args} -p 12345 & + ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 2000 -R 10 ${rx_args} && \ + echo "ok" || \ + echo "failed" & + + # Hack: let bg programs complete the startup + sleep 0.1 + ./udpgso_bench_tx ${tx_args} -p 12345 + sleep 0.1 + # first UDP GSO socket should be closed at this point + ./udpgso_bench_tx ${tx_args} + wait $(jobs -p) +} + +run_nat_test() { + local -r args=$@ + + printf " %-40s" "$1" + ./in_netns.sh $0 __subprocess_nat $2 rx -r $3 +} + +run_2sock_test() { + local -r args=$@ + + printf " %-40s" "$1" + ./in_netns.sh $0 __subprocess_2sock $2 rx -G -r $3 +} + +run_all() { + local -r core_args="-l 4" + local -r ipv4_args="${core_args} -4 -D 192.168.1.1" + local -r ipv6_args="${core_args} -6 -D 2001:db8::1" + + echo "ipv4" + run_test "no GRO" "${ipv4_args} -M 10 -s 1400" "-4 -n 10 -l 1400" + + # explicitly check we are not receiving UDP_SEGMENT cmsg (-S -1) + # when GRO does not take place + run_test "no GRO chk cmsg" "${ipv4_args} -M 10 -s 1400" "-4 -n 10 -l 1400 -S -1" + + # the GSO packets are aggregated because: + # * veth schedule napi after each xmit + # * segmentation happens in BH context, veth napi poll is delayed after + # the transmission of the last segment + run_test "GRO" "${ipv4_args} -M 1 -s 14720 -S 0 " "-4 -n 1 -l 14720" + run_test "GRO chk cmsg" "${ipv4_args} -M 1 -s 14720 -S 0 " "-4 -n 1 -l 14720 -S 1472" + run_test "GRO with custom segment size" "${ipv4_args} -M 1 -s 14720 -S 500 " "-4 -n 1 -l 14720" + run_test "GRO with custom segment size cmsg" "${ipv4_args} -M 1 -s 14720 -S 500 " "-4 -n 1 -l 14720 -S 500" + + run_nat_test "bad GRO lookup" "${ipv4_args} -M 1 -s 14720 -S 0" "-n 10 -l 1472" + run_2sock_test "multiple GRO socks" "${ipv4_args} -M 1 -s 14720 -S 0 " "-4 -n 1 -l 14720 -S 1472" + + echo "ipv6" + run_test "no GRO" "${ipv6_args} -M 10 -s 1400" "-n 10 -l 1400" + run_test "no GRO chk cmsg" "${ipv6_args} -M 10 -s 1400" "-n 10 -l 1400 -S -1" + run_test "GRO" "${ipv6_args} -M 1 -s 14520 -S 0" "-n 1 -l 14520" + run_test "GRO chk cmsg" "${ipv6_args} -M 1 -s 14520 -S 0" "-n 1 -l 14520 -S 1452" + run_test "GRO with custom segment size" "${ipv6_args} -M 1 -s 14520 -S 500" "-n 1 -l 14520" + run_test "GRO with custom segment size cmsg" "${ipv6_args} -M 1 -s 14520 -S 500" "-n 1 -l 14520 -S 500" + + run_nat_test "bad GRO lookup" "${ipv6_args} -M 1 -s 14520 -S 0" "-n 10 -l 1452" + run_2sock_test "multiple GRO socks" "${ipv6_args} -M 1 -s 14520 -S 0 " "-n 1 -l 14520 -S 1452" +} + +if [ ! -f ../bpf/xdp_dummy.o ]; then + echo "Missing xdp_dummy helper. Build bpf selftest first" + exit -1 +fi + +if [[ $# -eq 0 ]]; then + run_all +elif [[ $1 == "__subprocess" ]]; then + shift + run_one $@ +elif [[ $1 == "__subprocess_nat" ]]; then + shift + run_one_nat $@ +elif [[ $1 == "__subprocess_2sock" ]]; then + shift + run_one_2sock $@ +fi diff --git a/tools/testing/selftests/net/udpgro_bench.sh b/tools/testing/selftests/net/udpgro_bench.sh new file mode 100755 index 000000000000..820bc50f6b68 --- /dev/null +++ b/tools/testing/selftests/net/udpgro_bench.sh @@ -0,0 +1,95 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Run a series of udpgro benchmarks + +readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" + +cleanup() { + local -r jobs="$(jobs -p)" + local -r ns="$(ip netns list|grep $PEER_NS)" + + [ -n "${jobs}" ] && kill -INT ${jobs} 2>/dev/null + [ -n "$ns" ] && ip netns del $ns 2>/dev/null +} +trap cleanup EXIT + +run_one() { + # use 'rx' as separator between sender args and receiver args + local -r all="$@" + local -r tx_args=${all%rx*} + local rx_args=${all#*rx} + + [[ "${tx_args}" == *"-4"* ]] && rx_args="${rx_args} -4" + + ip netns add "${PEER_NS}" + ip -netns "${PEER_NS}" link set lo up + ip link add type veth + ip link set dev veth0 up + ip addr add dev veth0 192.168.1.2/24 + ip addr add dev veth0 2001:db8::2/64 nodad + + ip link set dev veth1 netns "${PEER_NS}" + ip -netns "${PEER_NS}" addr add dev veth1 192.168.1.1/24 + ip -netns "${PEER_NS}" addr add dev veth1 2001:db8::1/64 nodad + ip -netns "${PEER_NS}" link set dev veth1 up + + ip -n "${PEER_NS}" link set veth1 xdp object ../bpf/xdp_dummy.o section xdp_dummy + ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -r & + ip netns exec "${PEER_NS}" ./udpgso_bench_rx -t ${rx_args} -r & + + # Hack: let bg programs complete the startup + sleep 0.1 + ./udpgso_bench_tx ${tx_args} +} + +run_in_netns() { + local -r args=$@ + + ./in_netns.sh $0 __subprocess ${args} +} + +run_udp() { + local -r args=$@ + + echo "udp gso - over veth touching data" + run_in_netns ${args} -S 0 rx + + echo "udp gso and gro - over veth touching data" + run_in_netns ${args} -S 0 rx -G +} + +run_tcp() { + local -r args=$@ + + echo "tcp - over veth touching data" + run_in_netns ${args} -t rx +} + +run_all() { + local -r core_args="-l 4" + local -r ipv4_args="${core_args} -4 -D 192.168.1.1" + local -r ipv6_args="${core_args} -6 -D 2001:db8::1" + + echo "ipv4" + run_tcp "${ipv4_args}" + run_udp "${ipv4_args}" + + echo "ipv6" + run_tcp "${ipv4_args}" + run_udp "${ipv6_args}" +} + +if [ ! -f ../bpf/xdp_dummy.o ]; then + echo "Missing xdp_dummy helper. Build bpf selftest first" + exit -1 +fi + +if [[ $# -eq 0 ]]; then + run_all +elif [[ $1 == "__subprocess" ]]; then + shift + run_one $@ +else + run_in_netns $@ +fi diff --git a/tools/testing/selftests/net/udpgso_bench.sh b/tools/testing/selftests/net/udpgso_bench.sh index 99e537ab5ad9..5670a9ffd8eb 100755 --- a/tools/testing/selftests/net/udpgso_bench.sh +++ b/tools/testing/selftests/net/udpgso_bench.sh @@ -34,7 +34,10 @@ run_udp() { run_in_netns ${args} echo "udp gso" - run_in_netns ${args} -S + run_in_netns ${args} -S 0 + + echo "udp gso zerocopy" + run_in_netns ${args} -S 0 -z } run_tcp() { diff --git a/tools/testing/selftests/net/udpgso_bench_rx.c b/tools/testing/selftests/net/udpgso_bench_rx.c index 727cf67a3f75..db3d4a8b5a4c 100644 --- a/tools/testing/selftests/net/udpgso_bench_rx.c +++ b/tools/testing/selftests/net/udpgso_bench_rx.c @@ -31,9 +31,23 @@ #include <sys/wait.h> #include <unistd.h> +#ifndef UDP_GRO +#define UDP_GRO 104 +#endif + static int cfg_port = 8000; static bool cfg_tcp; static bool cfg_verify; +static bool cfg_read_all; +static bool cfg_gro_segment; +static int cfg_family = PF_INET6; +static int cfg_alen = sizeof(struct sockaddr_in6); +static int cfg_expected_pkt_nr; +static int cfg_expected_pkt_len; +static int cfg_expected_gso_size; +static int cfg_connect_timeout_ms; +static int cfg_rcv_timeout_ms; +static struct sockaddr_storage cfg_bind_addr; static bool interrupted; static unsigned long packets, bytes; @@ -44,6 +58,29 @@ static void sigint_handler(int signum) interrupted = true; } +static void setup_sockaddr(int domain, const char *str_addr, void *sockaddr) +{ + struct sockaddr_in6 *addr6 = (void *) sockaddr; + struct sockaddr_in *addr4 = (void *) sockaddr; + + switch (domain) { + case PF_INET: + addr4->sin_family = AF_INET; + addr4->sin_port = htons(cfg_port); + if (inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1) + error(1, 0, "ipv4 parse error: %s", str_addr); + break; + case PF_INET6: + addr6->sin6_family = AF_INET6; + addr6->sin6_port = htons(cfg_port); + if (inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1) + error(1, 0, "ipv6 parse error: %s", str_addr); + break; + default: + error(1, 0, "illegal domain"); + } +} + static unsigned long gettimeofday_ms(void) { struct timeval tv; @@ -52,7 +89,7 @@ static unsigned long gettimeofday_ms(void) return (tv.tv_sec * 1000) + (tv.tv_usec / 1000); } -static void do_poll(int fd) +static void do_poll(int fd, int timeout_ms) { struct pollfd pfd; int ret; @@ -63,22 +100,31 @@ static void do_poll(int fd) do { ret = poll(&pfd, 1, 10); + if (interrupted) + break; if (ret == -1) error(1, errno, "poll"); - if (ret == 0) - continue; + if (ret == 0) { + if (!timeout_ms) + continue; + + timeout_ms -= 10; + if (timeout_ms <= 0) { + interrupted = true; + break; + } + } if (pfd.revents != POLLIN) error(1, errno, "poll: 0x%x expected 0x%x\n", pfd.revents, POLLIN); - } while (!ret && !interrupted); + } while (!ret); } static int do_socket(bool do_tcp) { - struct sockaddr_in6 addr = {0}; int fd, val; - fd = socket(PF_INET6, cfg_tcp ? SOCK_STREAM : SOCK_DGRAM, 0); + fd = socket(cfg_family, cfg_tcp ? SOCK_STREAM : SOCK_DGRAM, 0); if (fd == -1) error(1, errno, "socket"); @@ -89,10 +135,7 @@ static int do_socket(bool do_tcp) if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &val, sizeof(val))) error(1, errno, "setsockopt reuseport"); - addr.sin6_family = PF_INET6; - addr.sin6_port = htons(cfg_port); - addr.sin6_addr = in6addr_any; - if (bind(fd, (void *) &addr, sizeof(addr))) + if (bind(fd, (void *)&cfg_bind_addr, cfg_alen)) error(1, errno, "bind"); if (do_tcp) { @@ -101,7 +144,9 @@ static int do_socket(bool do_tcp) if (listen(accept_fd, 1)) error(1, errno, "listen"); - do_poll(accept_fd); + do_poll(accept_fd, cfg_connect_timeout_ms); + if (interrupted) + exit(0); fd = accept(accept_fd, NULL, NULL); if (fd == -1) @@ -164,51 +209,131 @@ static void do_verify_udp(const char *data, int len) } } +static int recv_msg(int fd, char *buf, int len, int *gso_size) +{ + char control[CMSG_SPACE(sizeof(uint16_t))] = {0}; + struct msghdr msg = {0}; + struct iovec iov = {0}; + struct cmsghdr *cmsg; + uint16_t *gsosizeptr; + int ret; + + iov.iov_base = buf; + iov.iov_len = len; + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + *gso_size = -1; + ret = recvmsg(fd, &msg, MSG_TRUNC | MSG_DONTWAIT); + if (ret != -1) { + for (cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL; + cmsg = CMSG_NXTHDR(&msg, cmsg)) { + if (cmsg->cmsg_level == SOL_UDP + && cmsg->cmsg_type == UDP_GRO) { + gsosizeptr = (uint16_t *) CMSG_DATA(cmsg); + *gso_size = *gsosizeptr; + break; + } + } + } + return ret; +} + /* Flush all outstanding datagrams. Verify first few bytes of each. */ static void do_flush_udp(int fd) { - static char rbuf[ETH_DATA_LEN]; - int ret, len, budget = 256; + static char rbuf[ETH_MAX_MTU]; + int ret, len, gso_size, budget = 256; - len = cfg_verify ? sizeof(rbuf) : 0; + len = cfg_read_all ? sizeof(rbuf) : 0; while (budget--) { /* MSG_TRUNC will make return value full datagram length */ - ret = recv(fd, rbuf, len, MSG_TRUNC | MSG_DONTWAIT); + if (!cfg_expected_gso_size) + ret = recv(fd, rbuf, len, MSG_TRUNC | MSG_DONTWAIT); + else + ret = recv_msg(fd, rbuf, len, &gso_size); if (ret == -1 && errno == EAGAIN) - return; + break; if (ret == -1) error(1, errno, "recv"); - if (len) { + if (cfg_expected_pkt_len && ret != cfg_expected_pkt_len) + error(1, 0, "recv: bad packet len, got %d," + " expected %d\n", ret, cfg_expected_pkt_len); + if (len && cfg_verify) { if (ret == 0) error(1, errno, "recv: 0 byte datagram\n"); do_verify_udp(rbuf, ret); } + if (cfg_expected_gso_size && cfg_expected_gso_size != gso_size) + error(1, 0, "recv: bad gso size, got %d, expected %d " + "(-1 == no gso cmsg))\n", gso_size, + cfg_expected_gso_size); packets++; bytes += ret; + if (cfg_expected_pkt_nr && packets >= cfg_expected_pkt_nr) + break; } } static void usage(const char *filepath) { - error(1, 0, "Usage: %s [-tv] [-p port]", filepath); + error(1, 0, "Usage: %s [-C connect_timeout] [-Grtv] [-b addr] [-p port]" + " [-l pktlen] [-n packetnr] [-R rcv_timeout] [-S gsosize]", + filepath); } static void parse_opts(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "ptv")) != -1) { + /* bind to any by default */ + setup_sockaddr(PF_INET6, "::", &cfg_bind_addr); + while ((c = getopt(argc, argv, "4b:C:Gl:n:p:rR:S:tv")) != -1) { switch (c) { + case '4': + cfg_family = PF_INET; + cfg_alen = sizeof(struct sockaddr_in); + setup_sockaddr(PF_INET, "0.0.0.0", &cfg_bind_addr); + break; + case 'b': + setup_sockaddr(cfg_family, optarg, &cfg_bind_addr); + break; + case 'C': + cfg_connect_timeout_ms = strtoul(optarg, NULL, 0); + break; + case 'G': + cfg_gro_segment = true; + break; + case 'l': + cfg_expected_pkt_len = strtoul(optarg, NULL, 0); + break; + case 'n': + cfg_expected_pkt_nr = strtoul(optarg, NULL, 0); + break; case 'p': - cfg_port = htons(strtoul(optarg, NULL, 0)); + cfg_port = strtoul(optarg, NULL, 0); + break; + case 'r': + cfg_read_all = true; + break; + case 'R': + cfg_rcv_timeout_ms = strtoul(optarg, NULL, 0); + break; + case 'S': + cfg_expected_gso_size = strtol(optarg, NULL, 0); break; case 't': cfg_tcp = true; break; case 'v': cfg_verify = true; + cfg_read_all = true; break; } } @@ -222,14 +347,21 @@ static void parse_opts(int argc, char **argv) static void do_recv(void) { + int timeout_ms = cfg_tcp ? cfg_rcv_timeout_ms : cfg_connect_timeout_ms; unsigned long tnow, treport; int fd; fd = do_socket(cfg_tcp); + if (cfg_gro_segment && !cfg_tcp) { + int val = 1; + if (setsockopt(fd, IPPROTO_UDP, UDP_GRO, &val, sizeof(val))) + error(1, errno, "setsockopt UDP_GRO"); + } + treport = gettimeofday_ms() + 1000; do { - do_poll(fd); + do_poll(fd, timeout_ms); if (cfg_tcp) do_flush_tcp(fd); @@ -247,8 +379,14 @@ static void do_recv(void) treport = tnow + 1000; } + timeout_ms = cfg_rcv_timeout_ms; + } while (!interrupted); + if (cfg_expected_pkt_nr && (packets != cfg_expected_pkt_nr)) + error(1, 0, "wrong packet number! got %ld, expected %d\n", + packets, cfg_expected_pkt_nr); + if (close(fd)) error(1, errno, "close"); } diff --git a/tools/testing/selftests/net/udpgso_bench_tx.c b/tools/testing/selftests/net/udpgso_bench_tx.c index e821564053cf..4074538b5df5 100644 --- a/tools/testing/selftests/net/udpgso_bench_tx.c +++ b/tools/testing/selftests/net/udpgso_bench_tx.c @@ -52,6 +52,8 @@ static bool cfg_segment; static bool cfg_sendmmsg; static bool cfg_tcp; static bool cfg_zerocopy; +static int cfg_msg_nr; +static uint16_t cfg_gso_size; static socklen_t cfg_alen; static struct sockaddr_storage cfg_dst_addr; @@ -205,14 +207,14 @@ static void send_udp_segment_cmsg(struct cmsghdr *cm) cm->cmsg_level = SOL_UDP; cm->cmsg_type = UDP_SEGMENT; - cm->cmsg_len = CMSG_LEN(sizeof(cfg_mss)); + cm->cmsg_len = CMSG_LEN(sizeof(cfg_gso_size)); valp = (void *)CMSG_DATA(cm); - *valp = cfg_mss; + *valp = cfg_gso_size; } static int send_udp_segment(int fd, char *data) { - char control[CMSG_SPACE(sizeof(cfg_mss))] = {0}; + char control[CMSG_SPACE(sizeof(cfg_gso_size))] = {0}; struct msghdr msg = {0}; struct iovec iov = {0}; int ret; @@ -241,7 +243,7 @@ static int send_udp_segment(int fd, char *data) static void usage(const char *filepath) { - error(1, 0, "Usage: %s [-46cmStuz] [-C cpu] [-D dst ip] [-l secs] [-p port] [-s sendsize]", + error(1, 0, "Usage: %s [-46cmtuz] [-C cpu] [-D dst ip] [-l secs] [-m messagenr] [-p port] [-s sendsize] [-S gsosize]", filepath); } @@ -250,7 +252,7 @@ static void parse_opts(int argc, char **argv) int max_len, hdrlen; int c; - while ((c = getopt(argc, argv, "46cC:D:l:mp:s:Stuz")) != -1) { + while ((c = getopt(argc, argv, "46cC:D:l:mM:p:s:S:tuz")) != -1) { switch (c) { case '4': if (cfg_family != PF_UNSPEC) @@ -279,6 +281,9 @@ static void parse_opts(int argc, char **argv) case 'm': cfg_sendmmsg = true; break; + case 'M': + cfg_msg_nr = strtoul(optarg, NULL, 10); + break; case 'p': cfg_port = strtoul(optarg, NULL, 0); break; @@ -286,6 +291,7 @@ static void parse_opts(int argc, char **argv) cfg_payload_len = strtoul(optarg, NULL, 0); break; case 'S': + cfg_gso_size = strtoul(optarg, NULL, 0); cfg_segment = true; break; case 't': @@ -317,6 +323,8 @@ static void parse_opts(int argc, char **argv) cfg_mss = ETH_DATA_LEN - hdrlen; max_len = ETH_MAX_MTU - hdrlen; + if (!cfg_gso_size) + cfg_gso_size = cfg_mss; if (cfg_payload_len > max_len) error(1, 0, "payload length %u exceeds max %u", @@ -392,10 +400,12 @@ int main(int argc, char **argv) else num_sends += send_udp(fd, buf[i]); num_msgs++; - if (cfg_zerocopy && ((num_msgs & 0xF) == 0)) flush_zerocopy(fd); + if (cfg_msg_nr && num_msgs >= cfg_msg_nr) + break; + tnow = gettimeofday_ms(); if (tnow > treport) { fprintf(stderr, diff --git a/tools/testing/selftests/net/xfrm_policy.sh b/tools/testing/selftests/net/xfrm_policy.sh new file mode 100755 index 000000000000..71d7fdc513c1 --- /dev/null +++ b/tools/testing/selftests/net/xfrm_policy.sh @@ -0,0 +1,411 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Check xfrm policy resolution. Topology: +# +# 1.2 1.1 3.1 3.10 2.1 2.2 +# eth1 eth1 veth0 veth0 eth1 eth1 +# ns1 ---- ns3 ----- ns4 ---- ns2 +# +# ns3 and ns4 are connected via ipsec tunnel. +# pings from ns1 to ns2 (and vice versa) are supposed to work like this: +# ns1: ping 10.0.2.2: passes via ipsec tunnel. +# ns2: ping 10.0.1.2: passes via ipsec tunnel. + +# ns1: ping 10.0.1.253: passes via ipsec tunnel (direct policy) +# ns2: ping 10.0.2.253: passes via ipsec tunnel (direct policy) +# +# ns1: ping 10.0.2.254: does NOT pass via ipsec tunnel (exception) +# ns2: ping 10.0.1.254: does NOT pass via ipsec tunnel (exception) + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 +ret=0 +policy_checks_ok=1 + +KEY_SHA=0xdeadbeef1234567890abcdefabcdefabcdefabcd +KEY_AES=0x0123456789abcdef0123456789012345 +SPI1=0x1 +SPI2=0x2 + +do_esp_policy() { + local ns=$1 + local me=$2 + local remote=$3 + local lnet=$4 + local rnet=$5 + + # to encrypt packets as they go out (includes forwarded packets that need encapsulation) + ip -net $ns xfrm policy add src $lnet dst $rnet dir out tmpl src $me dst $remote proto esp mode tunnel priority 100 action allow + # to fwd decrypted packets after esp processing: + ip -net $ns xfrm policy add src $rnet dst $lnet dir fwd tmpl src $remote dst $me proto esp mode tunnel priority 100 action allow +} + +do_esp() { + local ns=$1 + local me=$2 + local remote=$3 + local lnet=$4 + local rnet=$5 + local spi_out=$6 + local spi_in=$7 + + ip -net $ns xfrm state add src $remote dst $me proto esp spi $spi_in enc aes $KEY_AES auth sha1 $KEY_SHA mode tunnel sel src $rnet dst $lnet + ip -net $ns xfrm state add src $me dst $remote proto esp spi $spi_out enc aes $KEY_AES auth sha1 $KEY_SHA mode tunnel sel src $lnet dst $rnet + + do_esp_policy $ns $me $remote $lnet $rnet +} + +# add policies with different netmasks, to make sure kernel carries +# the policies contained within new netmask over when search tree is +# re-built. +# peer netns that are supposed to be encapsulated via esp have addresses +# in the 10.0.1.0/24 and 10.0.2.0/24 subnets, respectively. +# +# Adding a policy for '10.0.1.0/23' will make it necessary to +# alter the prefix of 10.0.1.0 subnet. +# In case new prefix overlaps with existing node, the node and all +# policies it carries need to be merged with the existing one(s). +# +# Do that here. +do_overlap() +{ + local ns=$1 + + # adds new nodes to tree (neither network exists yet in policy database). + ip -net $ns xfrm policy add src 10.1.0.0/24 dst 10.0.0.0/24 dir fwd priority 200 action block + + # adds a new node in the 10.0.0.0/24 tree (dst node exists). + ip -net $ns xfrm policy add src 10.2.0.0/24 dst 10.0.0.0/24 dir fwd priority 200 action block + + # adds a 10.2.0.0/23 node, but for different dst. + ip -net $ns xfrm policy add src 10.2.0.0/23 dst 10.0.1.0/24 dir fwd priority 200 action block + + # dst now overlaps with the 10.0.1.0/24 ESP policy in fwd. + # kernel must 'promote' existing one (10.0.0.0/24) to 10.0.0.0/23. + # But 10.0.0.0/23 also includes existing 10.0.1.0/24, so that node + # also has to be merged too, including source-sorted subtrees. + # old: + # 10.0.0.0/24 (node 1 in dst tree of the bin) + # 10.1.0.0/24 (node in src tree of dst node 1) + # 10.2.0.0/24 (node in src tree of dst node 1) + # 10.0.1.0/24 (node 2 in dst tree of the bin) + # 10.0.2.0/24 (node in src tree of dst node 2) + # 10.2.0.0/24 (node in src tree of dst node 2) + # + # The next 'policy add' adds dst '10.0.0.0/23', which means + # that dst node 1 and dst node 2 have to be merged including + # the sub-tree. As no duplicates are allowed, policies in + # the two '10.0.2.0/24' are also merged. + # + # after the 'add', internal search tree should look like this: + # 10.0.0.0/23 (node in dst tree of bin) + # 10.0.2.0/24 (node in src tree of dst node) + # 10.1.0.0/24 (node in src tree of dst node) + # 10.2.0.0/24 (node in src tree of dst node) + # + # 10.0.0.0/24 and 10.0.1.0/24 nodes have been merged as 10.0.0.0/23. + ip -net $ns xfrm policy add src 10.1.0.0/24 dst 10.0.0.0/23 dir fwd priority 200 action block +} + +do_esp_policy_get_check() { + local ns=$1 + local lnet=$2 + local rnet=$3 + + ip -net $ns xfrm policy get src $lnet dst $rnet dir out > /dev/null + if [ $? -ne 0 ] && [ $policy_checks_ok -eq 1 ] ;then + policy_checks_ok=0 + echo "FAIL: ip -net $ns xfrm policy get src $lnet dst $rnet dir out" + ret=1 + fi + + ip -net $ns xfrm policy get src $rnet dst $lnet dir fwd > /dev/null + if [ $? -ne 0 ] && [ $policy_checks_ok -eq 1 ] ;then + policy_checks_ok=0 + echo "FAIL: ip -net $ns xfrm policy get src $rnet dst $lnet dir fwd" + ret=1 + fi +} + +do_exception() { + local ns=$1 + local me=$2 + local remote=$3 + local encryptip=$4 + local plain=$5 + + # network $plain passes without tunnel + ip -net $ns xfrm policy add dst $plain dir out priority 10 action allow + + # direct policy for $encryptip, use tunnel, higher prio takes precedence + ip -net $ns xfrm policy add dst $encryptip dir out tmpl src $me dst $remote proto esp mode tunnel priority 1 action allow +} + +# policies that are not supposed to match any packets generated in this test. +do_dummies4() { + local ns=$1 + + for i in $(seq 10 16);do + # dummy policy with wildcard src/dst. + echo netns exec $ns ip xfrm policy add src 0.0.0.0/0 dst 10.$i.99.0/30 dir out action block + echo netns exec $ns ip xfrm policy add src 10.$i.99.0/30 dst 0.0.0.0/0 dir out action block + for j in $(seq 32 64);do + echo netns exec $ns ip xfrm policy add src 10.$i.1.0/30 dst 10.$i.$j.0/30 dir out action block + # silly, as it encompasses the one above too, but its allowed: + echo netns exec $ns ip xfrm policy add src 10.$i.1.0/29 dst 10.$i.$j.0/29 dir out action block + # and yet again, even more broad one. + echo netns exec $ns ip xfrm policy add src 10.$i.1.0/24 dst 10.$i.$j.0/24 dir out action block + echo netns exec $ns ip xfrm policy add src 10.$i.$j.0/24 dst 10.$i.1.0/24 dir fwd action block + done + done | ip -batch /dev/stdin +} + +do_dummies6() { + local ns=$1 + + for i in $(seq 10 16);do + for j in $(seq 32 64);do + echo netns exec $ns ip xfrm policy add src dead:$i::/64 dst dead:$i:$j::/64 dir out action block + echo netns exec $ns ip xfrm policy add src dead:$i:$j::/64 dst dead:$i::/24 dir fwd action block + done + done | ip -batch /dev/stdin +} + +check_ipt_policy_count() +{ + ns=$1 + + ip netns exec $ns iptables-save -c |grep policy | ( read c rest + ip netns exec $ns iptables -Z + if [ x"$c" = x'[0:0]' ]; then + exit 0 + elif [ x"$c" = x ]; then + echo "ERROR: No counters" + ret=1 + exit 111 + else + exit 1 + fi + ) +} + +check_xfrm() { + # 0: iptables -m policy rule count == 0 + # 1: iptables -m policy rule count != 0 + rval=$1 + ip=$2 + lret=0 + + ip netns exec ns1 ping -q -c 1 10.0.2.$ip > /dev/null + + check_ipt_policy_count ns3 + if [ $? -ne $rval ] ; then + lret=1 + fi + check_ipt_policy_count ns4 + if [ $? -ne $rval ] ; then + lret=1 + fi + + ip netns exec ns2 ping -q -c 1 10.0.1.$ip > /dev/null + + check_ipt_policy_count ns3 + if [ $? -ne $rval ] ; then + lret=1 + fi + check_ipt_policy_count ns4 + if [ $? -ne $rval ] ; then + lret=1 + fi + + return $lret +} + +check_exceptions() +{ + logpostfix="$1" + local lret=0 + + # ping to .254 should be excluded from the tunnel (exception is in place). + check_xfrm 0 254 + if [ $? -ne 0 ]; then + echo "FAIL: expected ping to .254 to fail ($logpostfix)" + lret=1 + else + echo "PASS: ping to .254 bypassed ipsec tunnel ($logpostfix)" + fi + + # ping to .253 should use use ipsec due to direct policy exception. + check_xfrm 1 253 + if [ $? -ne 0 ]; then + echo "FAIL: expected ping to .253 to use ipsec tunnel ($logpostfix)" + lret=1 + else + echo "PASS: direct policy matches ($logpostfix)" + fi + + # ping to .2 should use ipsec. + check_xfrm 1 2 + if [ $? -ne 0 ]; then + echo "FAIL: expected ping to .2 to use ipsec tunnel ($logpostfix)" + lret=1 + else + echo "PASS: policy matches ($logpostfix)" + fi + + return $lret +} + +#check for needed privileges +if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit $ksft_skip +fi + +ip -Version 2>/dev/null >/dev/null +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without the ip tool" + exit $ksft_skip +fi + +# needed to check if policy lookup got valid ipsec result +iptables --version 2>/dev/null >/dev/null +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without iptables tool" + exit $ksft_skip +fi + +for i in 1 2 3 4; do + ip netns add ns$i + ip -net ns$i link set lo up +done + +DEV=veth0 +ip link add $DEV netns ns1 type veth peer name eth1 netns ns3 +ip link add $DEV netns ns2 type veth peer name eth1 netns ns4 + +ip link add $DEV netns ns3 type veth peer name veth0 netns ns4 + +DEV=veth0 +for i in 1 2; do + ip -net ns$i link set $DEV up + ip -net ns$i addr add 10.0.$i.2/24 dev $DEV + ip -net ns$i addr add dead:$i::2/64 dev $DEV + + ip -net ns$i addr add 10.0.$i.253 dev $DEV + ip -net ns$i addr add 10.0.$i.254 dev $DEV + ip -net ns$i addr add dead:$i::fd dev $DEV + ip -net ns$i addr add dead:$i::fe dev $DEV +done + +for i in 3 4; do +ip -net ns$i link set eth1 up +ip -net ns$i link set veth0 up +done + +ip -net ns1 route add default via 10.0.1.1 +ip -net ns2 route add default via 10.0.2.1 + +ip -net ns3 addr add 10.0.1.1/24 dev eth1 +ip -net ns3 addr add 10.0.3.1/24 dev veth0 +ip -net ns3 addr add 2001:1::1/64 dev eth1 +ip -net ns3 addr add 2001:3::1/64 dev veth0 + +ip -net ns3 route add default via 10.0.3.10 + +ip -net ns4 addr add 10.0.2.1/24 dev eth1 +ip -net ns4 addr add 10.0.3.10/24 dev veth0 +ip -net ns4 addr add 2001:2::1/64 dev eth1 +ip -net ns4 addr add 2001:3::10/64 dev veth0 +ip -net ns4 route add default via 10.0.3.1 + +for j in 4 6; do + for i in 3 4;do + ip netns exec ns$i sysctl net.ipv$j.conf.eth1.forwarding=1 > /dev/null + ip netns exec ns$i sysctl net.ipv$j.conf.veth0.forwarding=1 > /dev/null + done +done + +# abuse iptables rule counter to check if ping matches a policy +ip netns exec ns3 iptables -p icmp -A FORWARD -m policy --dir out --pol ipsec +ip netns exec ns4 iptables -p icmp -A FORWARD -m policy --dir out --pol ipsec +if [ $? -ne 0 ];then + echo "SKIP: Could not insert iptables rule" + for i in 1 2 3 4;do ip netns del ns$i;done + exit $ksft_skip +fi + +# localip remoteip localnet remotenet +do_esp ns3 10.0.3.1 10.0.3.10 10.0.1.0/24 10.0.2.0/24 $SPI1 $SPI2 +do_esp ns3 dead:3::1 dead:3::10 dead:1::/64 dead:2::/64 $SPI1 $SPI2 +do_esp ns4 10.0.3.10 10.0.3.1 10.0.2.0/24 10.0.1.0/24 $SPI2 $SPI1 +do_esp ns4 dead:3::10 dead:3::1 dead:2::/64 dead:1::/64 $SPI2 $SPI1 + +do_dummies4 ns3 +do_dummies6 ns4 + +do_esp_policy_get_check ns3 10.0.1.0/24 10.0.2.0/24 +do_esp_policy_get_check ns4 10.0.2.0/24 10.0.1.0/24 +do_esp_policy_get_check ns3 dead:1::/64 dead:2::/64 +do_esp_policy_get_check ns4 dead:2::/64 dead:1::/64 + +# ping to .254 should use ipsec, exception is not installed. +check_xfrm 1 254 +if [ $? -ne 0 ]; then + echo "FAIL: expected ping to .254 to use ipsec tunnel" + ret=1 +else + echo "PASS: policy before exception matches" +fi + +# installs exceptions +# localip remoteip encryptdst plaindst +do_exception ns3 10.0.3.1 10.0.3.10 10.0.2.253 10.0.2.240/28 +do_exception ns4 10.0.3.10 10.0.3.1 10.0.1.253 10.0.1.240/28 + +do_exception ns3 dead:3::1 dead:3::10 dead:2::fd dead:2:f0::/96 +do_exception ns4 dead:3::10 dead:3::1 dead:1::fd dead:1:f0::/96 + +check_exceptions "exceptions" +if [ $? -ne 0 ]; then + ret=1 +fi + +# insert block policies with adjacent/overlapping netmasks +do_overlap ns3 + +check_exceptions "exceptions and block policies" +if [ $? -ne 0 ]; then + ret=1 +fi + +for n in ns3 ns4;do + ip -net $n xfrm policy set hthresh4 28 24 hthresh6 126 125 + sleep $((RANDOM%5)) +done + +check_exceptions "exceptions and block policies after hresh changes" + +# full flush of policy db, check everything gets freed incl. internal meta data +ip -net ns3 xfrm policy flush + +do_esp_policy ns3 10.0.3.1 10.0.3.10 10.0.1.0/24 10.0.2.0/24 +do_exception ns3 10.0.3.1 10.0.3.10 10.0.2.253 10.0.2.240/28 + +# move inexact policies to hash table +ip -net ns3 xfrm policy set hthresh4 16 16 + +sleep $((RANDOM%5)) +check_exceptions "exceptions and block policies after hthresh change in ns3" + +# restore original hthresh settings -- move policies back to tables +for n in ns3 ns4;do + ip -net $n xfrm policy set hthresh4 32 32 hthresh6 128 128 + sleep $((RANDOM%5)) +done +check_exceptions "exceptions and block policies after hresh change to normal" + +for i in 1 2 3 4;do ip netns del ns$i;done + +exit $ret diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile new file mode 100644 index 000000000000..c9ff2b47bd1c --- /dev/null +++ b/tools/testing/selftests/netfilter/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0 +# Makefile for netfilter selftests + +TEST_PROGS := nft_trans_stress.sh nft_nat.sh + +include ../lib.mk diff --git a/tools/testing/selftests/netfilter/config b/tools/testing/selftests/netfilter/config new file mode 100644 index 000000000000..59caa8f71cd8 --- /dev/null +++ b/tools/testing/selftests/netfilter/config @@ -0,0 +1,2 @@ +CONFIG_NET_NS=y +CONFIG_NF_TABLES_INET=y diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh new file mode 100755 index 000000000000..8ec76681605c --- /dev/null +++ b/tools/testing/selftests/netfilter/nft_nat.sh @@ -0,0 +1,762 @@ +#!/bin/bash +# +# This test is for basic NAT functionality: snat, dnat, redirect, masquerade. +# + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 +ret=0 + +nft --version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without nft tool" + exit $ksft_skip +fi + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +ip netns add ns0 +ip netns add ns1 +ip netns add ns2 + +ip link add veth0 netns ns0 type veth peer name eth0 netns ns1 +ip link add veth1 netns ns0 type veth peer name eth0 netns ns2 + +ip -net ns0 link set lo up +ip -net ns0 link set veth0 up +ip -net ns0 addr add 10.0.1.1/24 dev veth0 +ip -net ns0 addr add dead:1::1/64 dev veth0 + +ip -net ns0 link set veth1 up +ip -net ns0 addr add 10.0.2.1/24 dev veth1 +ip -net ns0 addr add dead:2::1/64 dev veth1 + +for i in 1 2; do + ip -net ns$i link set lo up + ip -net ns$i link set eth0 up + ip -net ns$i addr add 10.0.$i.99/24 dev eth0 + ip -net ns$i route add default via 10.0.$i.1 + ip -net ns$i addr add dead:$i::99/64 dev eth0 + ip -net ns$i route add default via dead:$i::1 +done + +bad_counter() +{ + local ns=$1 + local counter=$2 + local expect=$3 + + echo "ERROR: $counter counter in $ns has unexpected value (expected $expect)" 1>&2 + ip netns exec $ns nft list counter inet filter $counter 1>&2 +} + +check_counters() +{ + ns=$1 + local lret=0 + + cnt=$(ip netns exec $ns nft list counter inet filter ns0in | grep -q "packets 1 bytes 84") + if [ $? -ne 0 ]; then + bad_counter $ns ns0in "packets 1 bytes 84" + lret=1 + fi + cnt=$(ip netns exec $ns nft list counter inet filter ns0out | grep -q "packets 1 bytes 84") + if [ $? -ne 0 ]; then + bad_counter $ns ns0out "packets 1 bytes 84" + lret=1 + fi + + expect="packets 1 bytes 104" + cnt=$(ip netns exec $ns nft list counter inet filter ns0in6 | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter $ns ns0in6 "$expect" + lret=1 + fi + cnt=$(ip netns exec $ns nft list counter inet filter ns0out6 | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter $ns ns0out6 "$expect" + lret=1 + fi + + return $lret +} + +check_ns0_counters() +{ + local ns=$1 + local lret=0 + + cnt=$(ip netns exec ns0 nft list counter inet filter ns0in | grep -q "packets 0 bytes 0") + if [ $? -ne 0 ]; then + bad_counter ns0 ns0in "packets 0 bytes 0" + lret=1 + fi + + cnt=$(ip netns exec ns0 nft list counter inet filter ns0in6 | grep -q "packets 0 bytes 0") + if [ $? -ne 0 ]; then + bad_counter ns0 ns0in6 "packets 0 bytes 0" + lret=1 + fi + + cnt=$(ip netns exec ns0 nft list counter inet filter ns0out | grep -q "packets 0 bytes 0") + if [ $? -ne 0 ]; then + bad_counter ns0 ns0out "packets 0 bytes 0" + lret=1 + fi + cnt=$(ip netns exec ns0 nft list counter inet filter ns0out6 | grep -q "packets 0 bytes 0") + if [ $? -ne 0 ]; then + bad_counter ns0 ns0out6 "packets 0 bytes 0" + lret=1 + fi + + for dir in "in" "out" ; do + expect="packets 1 bytes 84" + cnt=$(ip netns exec ns0 nft list counter inet filter ${ns}${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns0 $ns$dir "$expect" + lret=1 + fi + + expect="packets 1 bytes 104" + cnt=$(ip netns exec ns0 nft list counter inet filter ${ns}${dir}6 | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns0 $ns$dir6 "$expect" + lret=1 + fi + done + + return $lret +} + +reset_counters() +{ + for i in 0 1 2;do + ip netns exec ns$i nft reset counters inet > /dev/null + done +} + +test_local_dnat6() +{ + local lret=0 +ip netns exec ns0 nft -f - <<EOF +table ip6 nat { + chain output { + type nat hook output priority 0; policy accept; + ip6 daddr dead:1::99 dnat to dead:2::99 + } +} +EOF + if [ $? -ne 0 ]; then + echo "SKIP: Could not add add ip6 dnat hook" + return $ksft_skip + fi + + # ping netns1, expect rewrite to netns2 + ip netns exec ns0 ping -q -c 1 dead:1::99 > /dev/null + if [ $? -ne 0 ]; then + lret=1 + echo "ERROR: ping6 failed" + return $lret + fi + + expect="packets 0 bytes 0" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns0 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns0 ns1$dir "$expect" + lret=1 + fi + done + + expect="packets 1 bytes 104" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns0 ns2$dir "$expect" + lret=1 + fi + done + + # expect 0 count in ns1 + expect="packets 0 bytes 0" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + done + + # expect 1 packet in ns2 + expect="packets 1 bytes 104" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns2 nft list counter inet filter ns0${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns0$dir "$expect" + lret=1 + fi + done + + test $lret -eq 0 && echo "PASS: ipv6 ping to ns1 was NATted to ns2" + ip netns exec ns0 nft flush chain ip6 nat output + + return $lret +} + +test_local_dnat() +{ + local lret=0 +ip netns exec ns0 nft -f - <<EOF +table ip nat { + chain output { + type nat hook output priority 0; policy accept; + ip daddr 10.0.1.99 dnat to 10.0.2.99 + } +} +EOF + # ping netns1, expect rewrite to netns2 + ip netns exec ns0 ping -q -c 1 10.0.1.99 > /dev/null + if [ $? -ne 0 ]; then + lret=1 + echo "ERROR: ping failed" + return $lret + fi + + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns0 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns0 ns1$dir "$expect" + lret=1 + fi + done + + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns0 ns2$dir "$expect" + lret=1 + fi + done + + # expect 0 count in ns1 + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + done + + # expect 1 packet in ns2 + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns2 nft list counter inet filter ns0${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns0$dir "$expect" + lret=1 + fi + done + + test $lret -eq 0 && echo "PASS: ping to ns1 was NATted to ns2" + + ip netns exec ns0 nft flush chain ip nat output + + reset_counters + ip netns exec ns0 ping -q -c 1 10.0.1.99 > /dev/null + if [ $? -ne 0 ]; then + lret=1 + echo "ERROR: ping failed" + return $lret + fi + + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns0 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns1$dir "$expect" + lret=1 + fi + done + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns0 ns2$dir "$expect" + lret=1 + fi + done + + # expect 1 count in ns1 + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns0 ns0$dir "$expect" + lret=1 + fi + done + + # expect 0 packet in ns2 + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns2 nft list counter inet filter ns0${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns2$dir "$expect" + lret=1 + fi + done + + test $lret -eq 0 && echo "PASS: ping to ns1 OK after nat output chain flush" + + return $lret +} + + +test_masquerade6() +{ + local lret=0 + + ip netns exec ns0 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null + + ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannot ping ns1 from ns2 via ipv6" + return 1 + lret=1 + fi + + expect="packets 1 bytes 104" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns2$dir "$expect" + lret=1 + fi + + cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns1$dir "$expect" + lret=1 + fi + done + + reset_counters + +# add masquerading rule +ip netns exec ns0 nft -f - <<EOF +table ip6 nat { + chain postrouting { + type nat hook postrouting priority 0; policy accept; + meta oif veth0 masquerade + } +} +EOF + ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannot ping ns1 from ns2 with active ipv6 masquerading" + lret=1 + fi + + # ns1 should have seen packets from ns0, due to masquerade + expect="packets 1 bytes 104" + for dir in "in6" "out6" ; do + + cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + + cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns1$dir "$expect" + lret=1 + fi + done + + # ns1 should not have seen packets from ns2, due to masquerade + expect="packets 0 bytes 0" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns1$dir "$expect" + lret=1 + fi + done + + ip netns exec ns0 nft flush chain ip6 nat postrouting + if [ $? -ne 0 ]; then + echo "ERROR: Could not flush ip6 nat postrouting" 1>&2 + lret=1 + fi + + test $lret -eq 0 && echo "PASS: IPv6 masquerade for ns2" + + return $lret +} + +test_masquerade() +{ + local lret=0 + + ip netns exec ns0 sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null + ip netns exec ns0 sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null + + ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: canot ping ns1 from ns2" + lret=1 + fi + + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns2$dir "$expect" + lret=1 + fi + + cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns1$dir "$expect" + lret=1 + fi + done + + reset_counters + +# add masquerading rule +ip netns exec ns0 nft -f - <<EOF +table ip nat { + chain postrouting { + type nat hook postrouting priority 0; policy accept; + meta oif veth0 masquerade + } +} +EOF + ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannot ping ns1 from ns2 with active ip masquerading" + lret=1 + fi + + # ns1 should have seen packets from ns0, due to masquerade + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + + cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns1$dir "$expect" + lret=1 + fi + done + + # ns1 should not have seen packets from ns2, due to masquerade + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns1$dir "$expect" + lret=1 + fi + done + + ip netns exec ns0 nft flush chain ip nat postrouting + if [ $? -ne 0 ]; then + echo "ERROR: Could not flush nat postrouting" 1>&2 + lret=1 + fi + + test $lret -eq 0 && echo "PASS: IP masquerade for ns2" + + return $lret +} + +test_redirect6() +{ + local lret=0 + + ip netns exec ns0 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null + + ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannnot ping ns1 from ns2 via ipv6" + lret=1 + fi + + expect="packets 1 bytes 104" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns2$dir "$expect" + lret=1 + fi + + cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns1$dir "$expect" + lret=1 + fi + done + + reset_counters + +# add redirect rule +ip netns exec ns0 nft -f - <<EOF +table ip6 nat { + chain prerouting { + type nat hook prerouting priority 0; policy accept; + meta iif veth1 meta l4proto icmpv6 ip6 saddr dead:2::99 ip6 daddr dead:1::99 redirect + } +} +EOF + ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannot ping ns1 from ns2 with active ip6 redirect" + lret=1 + fi + + # ns1 should have seen no packets from ns2, due to redirection + expect="packets 0 bytes 0" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + done + + # ns0 should have seen packets from ns2, due to masquerade + expect="packets 1 bytes 104" + for dir in "in6" "out6" ; do + cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + done + + ip netns exec ns0 nft delete table ip6 nat + if [ $? -ne 0 ]; then + echo "ERROR: Could not delete ip6 nat table" 1>&2 + lret=1 + fi + + test $lret -eq 0 && echo "PASS: IPv6 redirection for ns2" + + return $lret +} + +test_redirect() +{ + local lret=0 + + ip netns exec ns0 sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null + ip netns exec ns0 sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null + + ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannot ping ns1 from ns2" + lret=1 + fi + + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns2$dir "$expect" + lret=1 + fi + + cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns2 ns1$dir "$expect" + lret=1 + fi + done + + reset_counters + +# add redirect rule +ip netns exec ns0 nft -f - <<EOF +table ip nat { + chain prerouting { + type nat hook prerouting priority 0; policy accept; + meta iif veth1 ip protocol icmp ip saddr 10.0.2.99 ip daddr 10.0.1.99 redirect + } +} +EOF + ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannot ping ns1 from ns2 with active ip redirect" + lret=1 + fi + + # ns1 should have seen no packets from ns2, due to redirection + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + + cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + done + + # ns0 should have seen packets from ns2, due to masquerade + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter ns1 ns0$dir "$expect" + lret=1 + fi + done + + ip netns exec ns0 nft delete table ip nat + if [ $? -ne 0 ]; then + echo "ERROR: Could not delete nat table" 1>&2 + lret=1 + fi + + test $lret -eq 0 && echo "PASS: IP redirection for ns2" + + return $lret +} + + +# ip netns exec ns0 ping -c 1 -q 10.0.$i.99 +for i in 0 1 2; do +ip netns exec ns$i nft -f - <<EOF +table inet filter { + counter ns0in {} + counter ns1in {} + counter ns2in {} + + counter ns0out {} + counter ns1out {} + counter ns2out {} + + counter ns0in6 {} + counter ns1in6 {} + counter ns2in6 {} + + counter ns0out6 {} + counter ns1out6 {} + counter ns2out6 {} + + map nsincounter { + type ipv4_addr : counter + elements = { 10.0.1.1 : "ns0in", + 10.0.2.1 : "ns0in", + 10.0.1.99 : "ns1in", + 10.0.2.99 : "ns2in" } + } + + map nsincounter6 { + type ipv6_addr : counter + elements = { dead:1::1 : "ns0in6", + dead:2::1 : "ns0in6", + dead:1::99 : "ns1in6", + dead:2::99 : "ns2in6" } + } + + map nsoutcounter { + type ipv4_addr : counter + elements = { 10.0.1.1 : "ns0out", + 10.0.2.1 : "ns0out", + 10.0.1.99: "ns1out", + 10.0.2.99: "ns2out" } + } + + map nsoutcounter6 { + type ipv6_addr : counter + elements = { dead:1::1 : "ns0out6", + dead:2::1 : "ns0out6", + dead:1::99 : "ns1out6", + dead:2::99 : "ns2out6" } + } + + chain input { + type filter hook input priority 0; policy accept; + counter name ip saddr map @nsincounter + icmpv6 type { "echo-request", "echo-reply" } counter name ip6 saddr map @nsincounter6 + } + chain output { + type filter hook output priority 0; policy accept; + counter name ip daddr map @nsoutcounter + icmpv6 type { "echo-request", "echo-reply" } counter name ip6 daddr map @nsoutcounter6 + } +} +EOF +done + +sleep 3 +# test basic connectivity +for i in 1 2; do + ip netns exec ns0 ping -c 1 -q 10.0.$i.99 > /dev/null + if [ $? -ne 0 ];then + echo "ERROR: Could not reach other namespace(s)" 1>&2 + ret=1 + fi + + ip netns exec ns0 ping -c 1 -q dead:$i::99 > /dev/null + if [ $? -ne 0 ];then + echo "ERROR: Could not reach other namespace(s) via ipv6" 1>&2 + ret=1 + fi + check_counters ns$i + if [ $? -ne 0 ]; then + ret=1 + fi + + check_ns0_counters ns$i + if [ $? -ne 0 ]; then + ret=1 + fi + reset_counters +done + +if [ $ret -eq 0 ];then + echo "PASS: netns routing/connectivity: ns0 can reach ns1 and ns2" +fi + +reset_counters +test_local_dnat +test_local_dnat6 + +reset_counters +test_masquerade +test_masquerade6 + +reset_counters +test_redirect +test_redirect6 + +for i in 0 1 2; do ip netns del ns$i;done + +exit $ret diff --git a/tools/testing/selftests/netfilter/nft_trans_stress.sh b/tools/testing/selftests/netfilter/nft_trans_stress.sh new file mode 100755 index 000000000000..f1affd12c4b1 --- /dev/null +++ b/tools/testing/selftests/netfilter/nft_trans_stress.sh @@ -0,0 +1,78 @@ +#!/bin/bash +# +# This test is for stress-testing the nf_tables config plane path vs. +# packet path processing: Make sure we never release rules that are +# still visible to other cpus. +# +# set -e + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +testns=testns1 +tables="foo bar baz quux" + +nft --version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without nft tool" + exit $ksft_skip +fi + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +tmp=$(mktemp) + +for table in $tables; do + echo add table inet "$table" >> "$tmp" + echo flush table inet "$table" >> "$tmp" + + echo "add chain inet $table INPUT { type filter hook input priority 0; }" >> "$tmp" + echo "add chain inet $table OUTPUT { type filter hook output priority 0; }" >> "$tmp" + for c in $(seq 1 400); do + chain=$(printf "chain%03u" "$c") + echo "add chain inet $table $chain" >> "$tmp" + done + + for c in $(seq 1 400); do + chain=$(printf "chain%03u" "$c") + for BASE in INPUT OUTPUT; do + echo "add rule inet $table $BASE counter jump $chain" >> "$tmp" + done + echo "add rule inet $table $chain counter return" >> "$tmp" + done +done + +ip netns add "$testns" +ip -netns "$testns" link set lo up + +lscpu | grep ^CPU\(s\): | ( read cpu cpunum ; +cpunum=$((cpunum-1)) +for i in $(seq 0 $cpunum);do + mask=$(printf 0x%x $((1<<$i))) + ip netns exec "$testns" taskset $mask ping -4 127.0.0.1 -fq > /dev/null & + ip netns exec "$testns" taskset $mask ping -6 ::1 -fq > /dev/null & +done) + +sleep 1 + +for i in $(seq 1 10) ; do ip netns exec "$testns" nft -f "$tmp" & done + +for table in $tables;do + randsleep=$((RANDOM%10)) + sleep $randsleep + ip netns exec "$testns" nft delete table inet $table 2>/dev/null +done + +randsleep=$((RANDOM%10)) +sleep $randsleep + +pkill -9 ping + +wait + +rm -f "$tmp" +ip netns del "$testns" diff --git a/tools/testing/selftests/networking/timestamping/Makefile b/tools/testing/selftests/networking/timestamping/Makefile index 14cfcf006936..1de8bd8ccf5d 100644 --- a/tools/testing/selftests/networking/timestamping/Makefile +++ b/tools/testing/selftests/networking/timestamping/Makefile @@ -1,12 +1,11 @@ # SPDX-License-Identifier: GPL-2.0 CFLAGS += -I../../../../../usr/include -TEST_PROGS := hwtstamp_config rxtimestamp timestamping txtimestamp +TEST_GEN_FILES := hwtstamp_config rxtimestamp timestamping txtimestamp +TEST_PROGS := txtimestamp.sh all: $(TEST_PROGS) top_srcdir = ../../../../.. +KSFT_KHDR_INSTALL := 1 include ../../lib.mk - -clean: - rm -fr $(TEST_PROGS) diff --git a/tools/testing/selftests/networking/timestamping/config b/tools/testing/selftests/networking/timestamping/config new file mode 100644 index 000000000000..a13e3169b0a4 --- /dev/null +++ b/tools/testing/selftests/networking/timestamping/config @@ -0,0 +1,2 @@ +CONFIG_IFB=y +CONFIG_NET_SCH_NETEM=y diff --git a/tools/testing/selftests/networking/timestamping/txtimestamp.c b/tools/testing/selftests/networking/timestamping/txtimestamp.c index 81a98a240456..d1bbafb16f47 100644 --- a/tools/testing/selftests/networking/timestamping/txtimestamp.c +++ b/tools/testing/selftests/networking/timestamping/txtimestamp.c @@ -39,6 +39,7 @@ #include <inttypes.h> #include <linux/errqueue.h> #include <linux/if_ether.h> +#include <linux/ipv6.h> #include <linux/net_tstamp.h> #include <netdb.h> #include <net/if.h> @@ -69,15 +70,67 @@ static int do_ipv4 = 1; static int do_ipv6 = 1; static int cfg_payload_len = 10; static int cfg_poll_timeout = 100; +static int cfg_delay_snd; +static int cfg_delay_ack; static bool cfg_show_payload; static bool cfg_do_pktinfo; static bool cfg_loop_nodata; static bool cfg_no_delay; +static bool cfg_use_cmsg; +static bool cfg_use_pf_packet; +static bool cfg_do_listen; static uint16_t dest_port = 9000; static struct sockaddr_in daddr; static struct sockaddr_in6 daddr6; -static struct timespec ts_prev; +static struct timespec ts_usr; + +static int saved_tskey = -1; +static int saved_tskey_type = -1; + +static bool test_failed; + +static int64_t timespec_to_us64(struct timespec *ts) +{ + return ts->tv_sec * 1000 * 1000 + ts->tv_nsec / 1000; +} + +static void validate_key(int tskey, int tstype) +{ + int stepsize; + + /* compare key for each subsequent request + * must only test for one type, the first one requested + */ + if (saved_tskey == -1) + saved_tskey_type = tstype; + else if (saved_tskey_type != tstype) + return; + + stepsize = cfg_proto == SOCK_STREAM ? cfg_payload_len : 1; + if (tskey != saved_tskey + stepsize) { + fprintf(stderr, "ERROR: key %d, expected %d\n", + tskey, saved_tskey + stepsize); + test_failed = true; + } + + saved_tskey = tskey; +} + +static void validate_timestamp(struct timespec *cur, int min_delay) +{ + int max_delay = min_delay + 500 /* processing time upper bound */; + int64_t cur64, start64; + + cur64 = timespec_to_us64(cur); + start64 = timespec_to_us64(&ts_usr); + + if (cur64 < start64 + min_delay || cur64 > start64 + max_delay) { + fprintf(stderr, "ERROR: delay %lu expected between %d and %d\n", + cur64 - start64, min_delay, max_delay); + test_failed = true; + } +} static void __print_timestamp(const char *name, struct timespec *cur, uint32_t key, int payload_len) @@ -89,32 +142,19 @@ static void __print_timestamp(const char *name, struct timespec *cur, name, cur->tv_sec, cur->tv_nsec / 1000, key, payload_len); - if ((ts_prev.tv_sec | ts_prev.tv_nsec)) { - int64_t cur_ms, prev_ms; - - cur_ms = (long) cur->tv_sec * 1000 * 1000; - cur_ms += cur->tv_nsec / 1000; - - prev_ms = (long) ts_prev.tv_sec * 1000 * 1000; - prev_ms += ts_prev.tv_nsec / 1000; - - fprintf(stderr, " (%+" PRId64 " us)", cur_ms - prev_ms); - } + if (cur != &ts_usr) + fprintf(stderr, " (USR %+" PRId64 " us)", + timespec_to_us64(cur) - timespec_to_us64(&ts_usr)); - ts_prev = *cur; fprintf(stderr, "\n"); } static void print_timestamp_usr(void) { - struct timespec ts; - struct timeval tv; /* avoid dependency on -lrt */ - - gettimeofday(&tv, NULL); - ts.tv_sec = tv.tv_sec; - ts.tv_nsec = tv.tv_usec * 1000; + if (clock_gettime(CLOCK_REALTIME, &ts_usr)) + error(1, errno, "clock_gettime"); - __print_timestamp(" USR", &ts, 0, 0); + __print_timestamp(" USR", &ts_usr, 0, 0); } static void print_timestamp(struct scm_timestamping *tss, int tstype, @@ -122,15 +162,20 @@ static void print_timestamp(struct scm_timestamping *tss, int tstype, { const char *tsname; + validate_key(tskey, tstype); + switch (tstype) { case SCM_TSTAMP_SCHED: tsname = " ENQ"; + validate_timestamp(&tss->ts[0], 0); break; case SCM_TSTAMP_SND: tsname = " SND"; + validate_timestamp(&tss->ts[0], cfg_delay_snd); break; case SCM_TSTAMP_ACK: tsname = " ACK"; + validate_timestamp(&tss->ts[0], cfg_delay_ack); break; default: error(1, 0, "unknown timestamp type: %u", @@ -194,7 +239,9 @@ static void __recv_errmsg_cmsg(struct msghdr *msg, int payload_len) } else if ((cm->cmsg_level == SOL_IP && cm->cmsg_type == IP_RECVERR) || (cm->cmsg_level == SOL_IPV6 && - cm->cmsg_type == IPV6_RECVERR)) { + cm->cmsg_type == IPV6_RECVERR) || + (cm->cmsg_level == SOL_PACKET && + cm->cmsg_type == PACKET_TX_TIMESTAMP)) { serr = (void *) CMSG_DATA(cm); if (serr->ee_errno != ENOMSG || serr->ee_origin != SO_EE_ORIGIN_TIMESTAMPING) { @@ -269,32 +316,124 @@ static int recv_errmsg(int fd) return ret == -1; } -static void do_test(int family, unsigned int opt) +static uint16_t get_ip_csum(const uint16_t *start, int num_words, + unsigned long sum) +{ + int i; + + for (i = 0; i < num_words; i++) + sum += start[i]; + + while (sum >> 16) + sum = (sum & 0xFFFF) + (sum >> 16); + + return ~sum; +} + +static uint16_t get_udp_csum(const struct udphdr *udph, int alen) +{ + unsigned long pseudo_sum, csum_len; + const void *csum_start = udph; + + pseudo_sum = htons(IPPROTO_UDP); + pseudo_sum += udph->len; + + /* checksum ip(v6) addresses + udp header + payload */ + csum_start -= alen * 2; + csum_len = ntohs(udph->len) + alen * 2; + + return get_ip_csum(csum_start, csum_len >> 1, pseudo_sum); +} + +static int fill_header_ipv4(void *p) +{ + struct iphdr *iph = p; + + memset(iph, 0, sizeof(*iph)); + + iph->ihl = 5; + iph->version = 4; + iph->ttl = 2; + iph->saddr = daddr.sin_addr.s_addr; /* set for udp csum calc */ + iph->daddr = daddr.sin_addr.s_addr; + iph->protocol = IPPROTO_UDP; + + /* kernel writes saddr, csum, len */ + + return sizeof(*iph); +} + +static int fill_header_ipv6(void *p) +{ + struct ipv6hdr *ip6h = p; + + memset(ip6h, 0, sizeof(*ip6h)); + + ip6h->version = 6; + ip6h->payload_len = htons(sizeof(struct udphdr) + cfg_payload_len); + ip6h->nexthdr = IPPROTO_UDP; + ip6h->hop_limit = 64; + + ip6h->saddr = daddr6.sin6_addr; + ip6h->daddr = daddr6.sin6_addr; + + /* kernel does not write saddr in case of ipv6 */ + + return sizeof(*ip6h); +} + +static void fill_header_udp(void *p, bool is_ipv4) { + struct udphdr *udph = p; + + udph->source = ntohs(dest_port + 1); /* spoof */ + udph->dest = ntohs(dest_port); + udph->len = ntohs(sizeof(*udph) + cfg_payload_len); + udph->check = 0; + + udph->check = get_udp_csum(udph, is_ipv4 ? sizeof(struct in_addr) : + sizeof(struct in6_addr)); +} + +static void do_test(int family, unsigned int report_opt) +{ + char control[CMSG_SPACE(sizeof(uint32_t))]; + struct sockaddr_ll laddr; + unsigned int sock_opt; + struct cmsghdr *cmsg; + struct msghdr msg; + struct iovec iov; char *buf; int fd, i, val = 1, total_len; - if (family == AF_INET6 && cfg_proto != SOCK_STREAM) { - /* due to lack of checksum generation code */ - fprintf(stderr, "test: skipping datagram over IPv6\n"); - return; - } - total_len = cfg_payload_len; - if (cfg_proto == SOCK_RAW) { + if (cfg_use_pf_packet || cfg_proto == SOCK_RAW) { total_len += sizeof(struct udphdr); - if (cfg_ipproto == IPPROTO_RAW) - total_len += sizeof(struct iphdr); + if (cfg_use_pf_packet || cfg_ipproto == IPPROTO_RAW) + if (family == PF_INET) + total_len += sizeof(struct iphdr); + else + total_len += sizeof(struct ipv6hdr); + + /* special case, only rawv6_sendmsg: + * pass proto in sin6_port if not connected + * also see ANK comment in net/ipv4/raw.c + */ + daddr6.sin6_port = htons(cfg_ipproto); } buf = malloc(total_len); if (!buf) error(1, 0, "malloc"); - fd = socket(family, cfg_proto, cfg_ipproto); + fd = socket(cfg_use_pf_packet ? PF_PACKET : family, + cfg_proto, cfg_ipproto); if (fd < 0) error(1, errno, "socket"); + /* reset expected key on each new socket */ + saved_tskey = -1; + if (cfg_proto == SOCK_STREAM) { if (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (char*) &val, sizeof(val))) @@ -321,54 +460,80 @@ static void do_test(int family, unsigned int opt) } } - opt |= SOF_TIMESTAMPING_SOFTWARE | - SOF_TIMESTAMPING_OPT_CMSG | - SOF_TIMESTAMPING_OPT_ID; + sock_opt = SOF_TIMESTAMPING_SOFTWARE | + SOF_TIMESTAMPING_OPT_CMSG | + SOF_TIMESTAMPING_OPT_ID; + + if (!cfg_use_cmsg) + sock_opt |= report_opt; + if (cfg_loop_nodata) - opt |= SOF_TIMESTAMPING_OPT_TSONLY; + sock_opt |= SOF_TIMESTAMPING_OPT_TSONLY; if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, - (char *) &opt, sizeof(opt))) + (char *) &sock_opt, sizeof(sock_opt))) error(1, 0, "setsockopt timestamping"); for (i = 0; i < cfg_num_pkts; i++) { - memset(&ts_prev, 0, sizeof(ts_prev)); + memset(&msg, 0, sizeof(msg)); memset(buf, 'a' + i, total_len); - if (cfg_proto == SOCK_RAW) { - struct udphdr *udph; + if (cfg_use_pf_packet || cfg_proto == SOCK_RAW) { int off = 0; - if (cfg_ipproto == IPPROTO_RAW) { - struct iphdr *iph = (void *) buf; - - memset(iph, 0, sizeof(*iph)); - iph->ihl = 5; - iph->version = 4; - iph->ttl = 2; - iph->daddr = daddr.sin_addr.s_addr; - iph->protocol = IPPROTO_UDP; - /* kernel writes saddr, csum, len */ - - off = sizeof(*iph); + if (cfg_use_pf_packet || cfg_ipproto == IPPROTO_RAW) { + if (family == PF_INET) + off = fill_header_ipv4(buf); + else + off = fill_header_ipv6(buf); } - udph = (void *) buf + off; - udph->source = ntohs(9000); /* random spoof */ - udph->dest = ntohs(dest_port); - udph->len = ntohs(sizeof(*udph) + cfg_payload_len); - udph->check = 0; /* not allowed for IPv6 */ + fill_header_udp(buf + off, family == PF_INET); } print_timestamp_usr(); + + iov.iov_base = buf; + iov.iov_len = total_len; + if (cfg_proto != SOCK_STREAM) { - if (family == PF_INET) - val = sendto(fd, buf, total_len, 0, (void *) &daddr, sizeof(daddr)); - else - val = sendto(fd, buf, total_len, 0, (void *) &daddr6, sizeof(daddr6)); - } else { - val = send(fd, buf, cfg_payload_len, 0); + if (cfg_use_pf_packet) { + memset(&laddr, 0, sizeof(laddr)); + + laddr.sll_family = AF_PACKET; + laddr.sll_ifindex = 1; + laddr.sll_protocol = htons(family == AF_INET ? ETH_P_IP : ETH_P_IPV6); + laddr.sll_halen = ETH_ALEN; + + msg.msg_name = (void *)&laddr; + msg.msg_namelen = sizeof(laddr); + } else if (family == PF_INET) { + msg.msg_name = (void *)&daddr; + msg.msg_namelen = sizeof(daddr); + } else { + msg.msg_name = (void *)&daddr6; + msg.msg_namelen = sizeof(daddr6); + } + } + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + if (cfg_use_cmsg) { + memset(control, 0, sizeof(control)); + + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SO_TIMESTAMPING; + cmsg->cmsg_len = CMSG_LEN(sizeof(uint32_t)); + + *((uint32_t *) CMSG_DATA(cmsg)) = report_opt; } + + val = sendmsg(fd, &msg, 0); if (val != total_len) error(1, errno, "send"); @@ -385,7 +550,7 @@ static void do_test(int family, unsigned int opt) error(1, errno, "close"); free(buf); - usleep(400 * 1000); + usleep(100 * 1000); } static void __attribute__((noreturn)) usage(const char *filepath) @@ -396,15 +561,20 @@ static void __attribute__((noreturn)) usage(const char *filepath) " -6: only IPv6\n" " -h: show this message\n" " -c N: number of packets for each test\n" + " -C: use cmsg to set tstamp recording options\n" " -D: no delay between packets\n" " -F: poll() waits forever for an event\n" " -I: request PKTINFO\n" " -l N: send N bytes at a time\n" + " -L listen on hostname and port\n" " -n: set no-payload option\n" + " -p N: connect to port N\n" + " -P: use PF_PACKET\n" " -r: use raw\n" " -R: use raw (IP_HDRINCL)\n" - " -p N: connect to port N\n" " -u: use udp\n" + " -v: validate SND delay (usec)\n" + " -V: validate ACK delay (usec)\n" " -x: show payload (up to 70 bytes)\n", filepath); exit(1); @@ -413,9 +583,9 @@ static void __attribute__((noreturn)) usage(const char *filepath) static void parse_opt(int argc, char **argv) { int proto_count = 0; - char c; + int c; - while ((c = getopt(argc, argv, "46c:DFhIl:np:rRux")) != -1) { + while ((c = getopt(argc, argv, "46c:CDFhIl:Lnp:PrRuv:V:x")) != -1) { switch (c) { case '4': do_ipv6 = 0; @@ -426,6 +596,9 @@ static void parse_opt(int argc, char **argv) case 'c': cfg_num_pkts = strtoul(optarg, NULL, 10); break; + case 'C': + cfg_use_cmsg = true; + break; case 'D': cfg_no_delay = true; break; @@ -435,9 +608,24 @@ static void parse_opt(int argc, char **argv) case 'I': cfg_do_pktinfo = true; break; + case 'l': + cfg_payload_len = strtoul(optarg, NULL, 10); + break; + case 'L': + cfg_do_listen = true; + break; case 'n': cfg_loop_nodata = true; break; + case 'p': + dest_port = strtoul(optarg, NULL, 10); + break; + case 'P': + proto_count++; + cfg_use_pf_packet = true; + cfg_proto = SOCK_DGRAM; + cfg_ipproto = 0; + break; case 'r': proto_count++; cfg_proto = SOCK_RAW; @@ -453,11 +641,11 @@ static void parse_opt(int argc, char **argv) cfg_proto = SOCK_DGRAM; cfg_ipproto = IPPROTO_UDP; break; - case 'l': - cfg_payload_len = strtoul(optarg, NULL, 10); + case 'v': + cfg_delay_snd = strtoul(optarg, NULL, 10); break; - case 'p': - dest_port = strtoul(optarg, NULL, 10); + case 'V': + cfg_delay_ack = strtoul(optarg, NULL, 10); break; case 'x': cfg_show_payload = true; @@ -475,7 +663,9 @@ static void parse_opt(int argc, char **argv) if (!do_ipv4 && !do_ipv6) error(1, 0, "pass -4 or -6, not both"); if (proto_count > 1) - error(1, 0, "pass -r, -R or -u, not multiple"); + error(1, 0, "pass -P, -r, -R or -u, not multiple"); + if (cfg_do_pktinfo && cfg_use_pf_packet) + error(1, 0, "cannot ask for pktinfo over pf_packet"); if (optind != argc - 1) error(1, 0, "missing required hostname argument"); @@ -483,10 +673,12 @@ static void parse_opt(int argc, char **argv) static void resolve_hostname(const char *hostname) { + struct addrinfo hints = { .ai_family = do_ipv4 ? AF_INET : AF_INET6 }; struct addrinfo *addrs, *cur; int have_ipv4 = 0, have_ipv6 = 0; - if (getaddrinfo(hostname, NULL, NULL, &addrs)) +retry: + if (getaddrinfo(hostname, NULL, &hints, &addrs)) error(1, errno, "getaddrinfo"); cur = addrs; @@ -506,14 +698,41 @@ static void resolve_hostname(const char *hostname) if (addrs) freeaddrinfo(addrs); + if (do_ipv6 && hints.ai_family != AF_INET6) { + hints.ai_family = AF_INET6; + goto retry; + } + do_ipv4 &= have_ipv4; do_ipv6 &= have_ipv6; } +static void do_listen(int family, void *addr, int alen) +{ + int fd, type; + + type = cfg_proto == SOCK_RAW ? SOCK_DGRAM : cfg_proto; + + fd = socket(family, type, 0); + if (fd == -1) + error(1, errno, "socket rx"); + + if (bind(fd, addr, alen)) + error(1, errno, "bind rx"); + + if (type == SOCK_STREAM && listen(fd, 10)) + error(1, errno, "listen rx"); + + /* leave fd open, will be closed on process exit. + * this enables connect() to succeed and avoids icmp replies + */ +} + static void do_main(int family) { - fprintf(stderr, "family: %s\n", - family == PF_INET ? "INET" : "INET6"); + fprintf(stderr, "family: %s %s\n", + family == PF_INET ? "INET" : "INET6", + cfg_use_pf_packet ? "(PF_PACKET)" : ""); fprintf(stderr, "test SND\n"); do_test(family, SOF_TIMESTAMPING_TX_SOFTWARE); @@ -555,10 +774,17 @@ int main(int argc, char **argv) fprintf(stderr, "server port: %u\n", dest_port); fprintf(stderr, "\n"); - if (do_ipv4) + if (do_ipv4) { + if (cfg_do_listen) + do_listen(PF_INET, &daddr, sizeof(daddr)); do_main(PF_INET); - if (do_ipv6) + } + + if (do_ipv6) { + if (cfg_do_listen) + do_listen(PF_INET6, &daddr6, sizeof(daddr6)); do_main(PF_INET6); + } - return 0; + return test_failed; } diff --git a/tools/testing/selftests/networking/timestamping/txtimestamp.sh b/tools/testing/selftests/networking/timestamping/txtimestamp.sh new file mode 100755 index 000000000000..df0d86ca72b7 --- /dev/null +++ b/tools/testing/selftests/networking/timestamping/txtimestamp.sh @@ -0,0 +1,57 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Send packets with transmit timestamps over loopback with netem +# Verify that timestamps correspond to netem delay + +set -e + +setup() { + # set 1ms delay on lo egress + tc qdisc add dev lo root netem delay 1ms + + # set 2ms delay on ifb0 egress + modprobe ifb + ip link add ifb_netem0 type ifb + ip link set dev ifb_netem0 up + tc qdisc add dev ifb_netem0 root netem delay 2ms + + # redirect lo ingress through ifb0 egress + tc qdisc add dev lo handle ffff: ingress + tc filter add dev lo parent ffff: \ + u32 match mark 0 0xffff \ + action mirred egress redirect dev ifb_netem0 +} + +run_test_v4v6() { + # SND will be delayed 1000us + # ACK will be delayed 6000us: 1 + 2 ms round-trip + local -r args="$@ -v 1000 -V 6000" + + ./txtimestamp ${args} -4 -L 127.0.0.1 + ./txtimestamp ${args} -6 -L ::1 +} + +run_test_tcpudpraw() { + local -r args=$@ + + run_test_v4v6 ${args} # tcp + run_test_v4v6 ${args} -u # udp + run_test_v4v6 ${args} -r # raw + run_test_v4v6 ${args} -R # raw (IPPROTO_RAW) + run_test_v4v6 ${args} -P # pf_packet +} + +run_test_all() { + run_test_tcpudpraw # setsockopt + run_test_tcpudpraw -C # cmsg + run_test_tcpudpraw -n # timestamp w/o data +} + +if [[ "$(ip netns identify)" == "root" ]]; then + ../../net/in_netns.sh $0 $@ +else + setup + run_test_all + echo "OK. All tests passed" +fi diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile index 201b598558b9..b3ad909aefbc 100644 --- a/tools/testing/selftests/powerpc/Makefile +++ b/tools/testing/selftests/powerpc/Makefile @@ -28,7 +28,8 @@ SUB_DIRS = alignment \ tm \ vphn \ math \ - ptrace + ptrace \ + security endif diff --git a/tools/testing/selftests/powerpc/cache_shape/Makefile b/tools/testing/selftests/powerpc/cache_shape/Makefile index ede4d3dae750..689f6c8ebcd8 100644 --- a/tools/testing/selftests/powerpc/cache_shape/Makefile +++ b/tools/testing/selftests/powerpc/cache_shape/Makefile @@ -1,12 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 -TEST_PROGS := cache_shape - -all: $(TEST_PROGS) - -$(TEST_PROGS): ../harness.c ../utils.c +TEST_GEN_PROGS := cache_shape top_srcdir = ../../../../.. include ../../lib.mk -clean: - rm -f $(TEST_PROGS) *.o +$(TEST_GEN_PROGS): ../harness.c ../utils.c diff --git a/tools/testing/selftests/powerpc/include/reg.h b/tools/testing/selftests/powerpc/include/reg.h index 7f348c059bc2..52b4710469d2 100644 --- a/tools/testing/selftests/powerpc/include/reg.h +++ b/tools/testing/selftests/powerpc/include/reg.h @@ -17,6 +17,7 @@ : "memory") #define mb() asm volatile("sync" : : : "memory"); +#define barrier() asm volatile("" : : : "memory"); #define SPRN_MMCR2 769 #define SPRN_MMCRA 770 diff --git a/tools/testing/selftests/powerpc/include/utils.h b/tools/testing/selftests/powerpc/include/utils.h index c58c370828b4..ae43a614835d 100644 --- a/tools/testing/selftests/powerpc/include/utils.h +++ b/tools/testing/selftests/powerpc/include/utils.h @@ -11,6 +11,7 @@ #include <stdint.h> #include <stdbool.h> #include <linux/auxvec.h> +#include <linux/perf_event.h> #include "reg.h" /* Avoid headaches with PRI?64 - just use %ll? always */ @@ -31,6 +32,15 @@ void *get_auxv_entry(int type); int pick_online_cpu(void); +int read_debugfs_file(char *debugfs_file, int *result); +int write_debugfs_file(char *debugfs_file, int result); +void set_dscr(unsigned long val); +int perf_event_open_counter(unsigned int type, + unsigned long config, int group_fd); +int perf_event_enable(int fd); +int perf_event_disable(int fd); +int perf_event_reset(int fd); + static inline bool have_hwcap(unsigned long ftr) { return ((unsigned long)get_auxv_entry(AT_HWCAP) & ftr) == ftr; @@ -72,6 +82,16 @@ do { \ } \ } while (0) +#define SKIP_IF_MSG(x, msg) \ +do { \ + if ((x)) { \ + fprintf(stderr, \ + "[SKIP] Test skipped on line %d: %s\n", \ + __LINE__, msg); \ + return MAGIC_SKIP_RETURN_VALUE; \ + } \ +} while (0) + #define _str(s) #s #define str(s) _str(s) @@ -80,4 +100,12 @@ do { \ #define PPC_FEATURE2_ARCH_3_00 0x00800000 #endif +#if defined(__powerpc64__) +#define UCONTEXT_NIA(UC) (UC)->uc_mcontext.gp_regs[PT_NIP] +#elif defined(__powerpc__) +#define UCONTEXT_NIA(UC) (UC)->uc_mcontext.uc_regs->gregs[PT_NIP] +#else +#error implement UCONTEXT_NIA +#endif + #endif /* _SELFTESTS_POWERPC_UTILS_H */ diff --git a/tools/testing/selftests/powerpc/mm/.gitignore b/tools/testing/selftests/powerpc/mm/.gitignore index 7d7c42ed6de9..ba919308fe30 100644 --- a/tools/testing/selftests/powerpc/mm/.gitignore +++ b/tools/testing/selftests/powerpc/mm/.gitignore @@ -2,4 +2,5 @@ hugetlb_vs_thp_test subpage_prot tempfile prot_sao -segv_errors
\ No newline at end of file +segv_errors +wild_bctr
\ No newline at end of file diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile index 33ced6e0ad25..43d68420e363 100644 --- a/tools/testing/selftests/powerpc/mm/Makefile +++ b/tools/testing/selftests/powerpc/mm/Makefile @@ -2,7 +2,7 @@ noarg: $(MAKE) -C ../ -TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors +TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors wild_bctr TEST_GEN_FILES := tempfile top_srcdir = ../../../../.. @@ -12,6 +12,8 @@ $(TEST_GEN_PROGS): ../harness.c $(OUTPUT)/prot_sao: ../utils.c +$(OUTPUT)/wild_bctr: CFLAGS += -m64 + $(OUTPUT)/tempfile: dd if=/dev/zero of=$@ bs=64k count=1 diff --git a/tools/testing/selftests/powerpc/mm/wild_bctr.c b/tools/testing/selftests/powerpc/mm/wild_bctr.c new file mode 100644 index 000000000000..f2fa101c5a6a --- /dev/null +++ b/tools/testing/selftests/powerpc/mm/wild_bctr.c @@ -0,0 +1,170 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright 2018, Michael Ellerman, IBM Corp. + * + * Test that an out-of-bounds branch to counter behaves as expected. + */ + +#include <setjmp.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <ucontext.h> +#include <unistd.h> + +#include "utils.h" + + +#define BAD_NIP 0x788c545a18000000ull + +static struct pt_regs signal_regs; +static jmp_buf setjmp_env; + +static void save_regs(ucontext_t *ctxt) +{ + struct pt_regs *regs = ctxt->uc_mcontext.regs; + + memcpy(&signal_regs, regs, sizeof(signal_regs)); +} + +static void segv_handler(int signum, siginfo_t *info, void *ctxt_v) +{ + save_regs(ctxt_v); + longjmp(setjmp_env, 1); +} + +static void usr2_handler(int signum, siginfo_t *info, void *ctxt_v) +{ + save_regs(ctxt_v); +} + +static int ok(void) +{ + printf("Everything is OK in here.\n"); + return 0; +} + +#define REG_POISON 0x5a5a +#define POISONED_REG(n) ((((unsigned long)REG_POISON) << 48) | ((n) << 32) | \ + (((unsigned long)REG_POISON) << 16) | (n)) + +static inline void poison_regs(void) +{ + #define POISON_REG(n) \ + "lis " __stringify(n) "," __stringify(REG_POISON) ";" \ + "addi " __stringify(n) "," __stringify(n) "," __stringify(n) ";" \ + "sldi " __stringify(n) "," __stringify(n) ", 32 ;" \ + "oris " __stringify(n) "," __stringify(n) "," __stringify(REG_POISON) ";" \ + "addi " __stringify(n) "," __stringify(n) "," __stringify(n) ";" + + asm (POISON_REG(15) + POISON_REG(16) + POISON_REG(17) + POISON_REG(18) + POISON_REG(19) + POISON_REG(20) + POISON_REG(21) + POISON_REG(22) + POISON_REG(23) + POISON_REG(24) + POISON_REG(25) + POISON_REG(26) + POISON_REG(27) + POISON_REG(28) + POISON_REG(29) + : // inputs + : // outputs + : "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25", + "26", "27", "28", "29" + ); + #undef POISON_REG +} + +static int check_regs(void) +{ + unsigned long i; + + for (i = 15; i <= 29; i++) + FAIL_IF(signal_regs.gpr[i] != POISONED_REG(i)); + + printf("Regs OK\n"); + return 0; +} + +static void dump_regs(void) +{ + for (int i = 0; i < 32; i += 4) { + printf("r%02d 0x%016lx r%02d 0x%016lx " \ + "r%02d 0x%016lx r%02d 0x%016lx\n", + i, signal_regs.gpr[i], + i+1, signal_regs.gpr[i+1], + i+2, signal_regs.gpr[i+2], + i+3, signal_regs.gpr[i+3]); + } +} + +#ifdef _CALL_AIXDESC +struct opd { + unsigned long ip; + unsigned long toc; + unsigned long env; +}; +static struct opd bad_opd = { + .ip = BAD_NIP, +}; +#define BAD_FUNC (&bad_opd) +#else +#define BAD_FUNC BAD_NIP +#endif + +int test_wild_bctr(void) +{ + int (*func_ptr)(void); + struct sigaction segv = { + .sa_sigaction = segv_handler, + .sa_flags = SA_SIGINFO + }; + struct sigaction usr2 = { + .sa_sigaction = usr2_handler, + .sa_flags = SA_SIGINFO + }; + + FAIL_IF(sigaction(SIGSEGV, &segv, NULL)); + FAIL_IF(sigaction(SIGUSR2, &usr2, NULL)); + + bzero(&signal_regs, sizeof(signal_regs)); + + if (setjmp(setjmp_env) == 0) { + func_ptr = ok; + func_ptr(); + + kill(getpid(), SIGUSR2); + printf("Regs before:\n"); + dump_regs(); + bzero(&signal_regs, sizeof(signal_regs)); + + poison_regs(); + + func_ptr = (int (*)(void))BAD_FUNC; + func_ptr(); + + FAIL_IF(1); /* we didn't segv? */ + } + + FAIL_IF(signal_regs.nip != BAD_NIP); + + printf("All good - took SEGV as expected branching to 0x%llx\n", BAD_NIP); + + dump_regs(); + FAIL_IF(check_regs()); + + return 0; +} + +int main(void) +{ + return test_harness(test_wild_bctr, "wild_bctr"); +} diff --git a/tools/testing/selftests/powerpc/pmu/ebb/Makefile b/tools/testing/selftests/powerpc/pmu/ebb/Makefile index bd5dfa509272..23f4caf48ffc 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/Makefile +++ b/tools/testing/selftests/powerpc/pmu/ebb/Makefile @@ -5,6 +5,9 @@ noarg: # The EBB handler is 64-bit code and everything links against it CFLAGS += -m64 +# Toolchains may build PIE by default which breaks the assembly +LDFLAGS += -no-pie + TEST_GEN_PROGS := reg_access_test event_attributes_test cycles_test \ cycles_with_freeze_test pmc56_overflow_test \ ebb_vs_cpu_event_test cpu_event_vs_ebb_test \ diff --git a/tools/testing/selftests/powerpc/primitives/load_unaligned_zeropad.c b/tools/testing/selftests/powerpc/primitives/load_unaligned_zeropad.c index ed3239bbfae2..ee1e9ca22f0d 100644 --- a/tools/testing/selftests/powerpc/primitives/load_unaligned_zeropad.c +++ b/tools/testing/selftests/powerpc/primitives/load_unaligned_zeropad.c @@ -65,14 +65,6 @@ static int unprotect_region(void) extern char __start___ex_table[]; extern char __stop___ex_table[]; -#if defined(__powerpc64__) -#define UCONTEXT_NIA(UC) (UC)->uc_mcontext.gp_regs[PT_NIP] -#elif defined(__powerpc__) -#define UCONTEXT_NIA(UC) (UC)->uc_mcontext.uc_regs->gregs[PT_NIP] -#else -#error implement UCONTEXT_NIA -#endif - struct extbl_entry { int insn; int fixup; diff --git a/tools/testing/selftests/powerpc/ptrace/Makefile b/tools/testing/selftests/powerpc/ptrace/Makefile index 923d531265f8..8d3f006c98cc 100644 --- a/tools/testing/selftests/powerpc/ptrace/Makefile +++ b/tools/testing/selftests/powerpc/ptrace/Makefile @@ -1,20 +1,15 @@ # SPDX-License-Identifier: GPL-2.0 -TEST_PROGS := ptrace-gpr ptrace-tm-gpr ptrace-tm-spd-gpr \ +TEST_GEN_PROGS := ptrace-gpr ptrace-tm-gpr ptrace-tm-spd-gpr \ ptrace-tar ptrace-tm-tar ptrace-tm-spd-tar ptrace-vsx ptrace-tm-vsx \ ptrace-tm-spd-vsx ptrace-tm-spr ptrace-hwbreak ptrace-pkey core-pkey \ - perf-hwbreak + perf-hwbreak ptrace-syscall top_srcdir = ../../../../.. include ../../lib.mk -all: $(TEST_PROGS) - CFLAGS += -m64 -I../../../../../usr/include -I../tm -mhtm -fno-pie -ptrace-pkey core-pkey: child.h -ptrace-pkey core-pkey: LDLIBS += -pthread - -$(TEST_PROGS): ../harness.c ../utils.c ../lib/reg.S ptrace.h +$(OUTPUT)/ptrace-pkey $(OUTPUT)/core-pkey: child.h +$(OUTPUT)/ptrace-pkey $(OUTPUT)/core-pkey: LDLIBS += -pthread -clean: - rm -f $(TEST_PROGS) *.o +$(TEST_GEN_PROGS): ../harness.c ../utils.c ../lib/reg.S ptrace.h diff --git a/tools/testing/selftests/powerpc/ptrace/core-pkey.c b/tools/testing/selftests/powerpc/ptrace/core-pkey.c index e23e2e199eb4..d5c64fee032d 100644 --- a/tools/testing/selftests/powerpc/ptrace/core-pkey.c +++ b/tools/testing/selftests/powerpc/ptrace/core-pkey.c @@ -352,10 +352,7 @@ static int write_core_pattern(const char *core_pattern) FILE *f; f = fopen(core_pattern_file, "w"); - if (!f) { - perror("Error writing to core_pattern file"); - return TEST_FAIL; - } + SKIP_IF_MSG(!f, "Try with root privileges"); ret = fwrite(core_pattern, 1, len, f); fclose(f); diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.c index 0b4ebcc2f485..ca29fafeed5d 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.c @@ -31,7 +31,7 @@ void gpr(void) ASM_LOAD_GPR_IMMED(gpr_1) ASM_LOAD_FPR_SINGLE_PRECISION(flt_1) : - : [gpr_1]"i"(GPR_1), [flt_1] "r" (&a) + : [gpr_1]"i"(GPR_1), [flt_1] "b" (&a) : "memory", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", "r24", diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-syscall.c b/tools/testing/selftests/powerpc/ptrace/ptrace-syscall.c new file mode 100644 index 000000000000..3353210dcdbd --- /dev/null +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-syscall.c @@ -0,0 +1,228 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * A ptrace test for testing PTRACE_SYSEMU, PTRACE_SETREGS and + * PTRACE_GETREG. This test basically create a child process that executes + * syscalls and the parent process check if it is being traced appropriated. + * + * This test is heavily based on tools/testing/selftests/x86/ptrace_syscall.c + * test, and it was adapted to run on Powerpc by + * Breno Leitao <leitao@debian.org> + */ +#define _GNU_SOURCE + +#include <sys/ptrace.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <sys/syscall.h> +#include <sys/user.h> +#include <unistd.h> +#include <errno.h> +#include <stddef.h> +#include <stdio.h> +#include <err.h> +#include <string.h> +#include <sys/auxv.h> +#include "utils.h" + +/* Bitness-agnostic defines for user_regs_struct fields. */ +#define user_syscall_nr gpr[0] +#define user_arg0 gpr[3] +#define user_arg1 gpr[4] +#define user_arg2 gpr[5] +#define user_arg3 gpr[6] +#define user_arg4 gpr[7] +#define user_arg5 gpr[8] +#define user_ip nip + +#define PTRACE_SYSEMU 0x1d + +static int nerrs; + +static void wait_trap(pid_t chld) +{ + siginfo_t si; + + if (waitid(P_PID, chld, &si, WEXITED|WSTOPPED) != 0) + err(1, "waitid"); + if (si.si_pid != chld) + errx(1, "got unexpected pid in event\n"); + if (si.si_code != CLD_TRAPPED) + errx(1, "got unexpected event type %d\n", si.si_code); +} + +static void test_ptrace_syscall_restart(void) +{ + int status; + struct pt_regs regs; + pid_t chld; + + printf("[RUN]\tptrace-induced syscall restart\n"); + + chld = fork(); + if (chld < 0) + err(1, "fork"); + + /* + * Child process is running 4 syscalls after ptrace. + * + * 1) getpid() + * 2) gettid() + * 3) tgkill() -> Send SIGSTOP + * 4) gettid() -> Where the tests will happen essentially + */ + if (chld == 0) { + if (ptrace(PTRACE_TRACEME, 0, 0, 0) != 0) + err(1, "PTRACE_TRACEME"); + + pid_t pid = getpid(), tid = syscall(SYS_gettid); + + printf("\tChild will make one syscall\n"); + syscall(SYS_tgkill, pid, tid, SIGSTOP); + + syscall(SYS_gettid, 10, 11, 12, 13, 14, 15); + _exit(0); + } + /* Parent process below */ + + /* Wait for SIGSTOP sent by tgkill above. */ + if (waitpid(chld, &status, 0) != chld || !WIFSTOPPED(status)) + err(1, "waitpid"); + + printf("[RUN]\tSYSEMU\n"); + if (ptrace(PTRACE_SYSEMU, chld, 0, 0) != 0) + err(1, "PTRACE_SYSEMU"); + wait_trap(chld); + + if (ptrace(PTRACE_GETREGS, chld, 0, ®s) != 0) + err(1, "PTRACE_GETREGS"); + + /* + * Ptrace trapped prior to executing the syscall, thus r3 still has + * the syscall number instead of the sys_gettid() result + */ + if (regs.user_syscall_nr != SYS_gettid || + regs.user_arg0 != 10 || regs.user_arg1 != 11 || + regs.user_arg2 != 12 || regs.user_arg3 != 13 || + regs.user_arg4 != 14 || regs.user_arg5 != 15) { + printf("[FAIL]\tInitial args are wrong (nr=%lu, args=%lu %lu %lu %lu %lu %lu)\n", + (unsigned long)regs.user_syscall_nr, + (unsigned long)regs.user_arg0, + (unsigned long)regs.user_arg1, + (unsigned long)regs.user_arg2, + (unsigned long)regs.user_arg3, + (unsigned long)regs.user_arg4, + (unsigned long)regs.user_arg5); + nerrs++; + } else { + printf("[OK]\tInitial nr and args are correct\n"); } + + printf("[RUN]\tRestart the syscall (ip = 0x%lx)\n", + (unsigned long)regs.user_ip); + + /* + * Rewind to retry the same syscall again. This will basically test + * the rewind process together with PTRACE_SETREGS and PTRACE_GETREGS. + */ + regs.user_ip -= 4; + if (ptrace(PTRACE_SETREGS, chld, 0, ®s) != 0) + err(1, "PTRACE_SETREGS"); + + if (ptrace(PTRACE_SYSEMU, chld, 0, 0) != 0) + err(1, "PTRACE_SYSEMU"); + wait_trap(chld); + + if (ptrace(PTRACE_GETREGS, chld, 0, ®s) != 0) + err(1, "PTRACE_GETREGS"); + + if (regs.user_syscall_nr != SYS_gettid || + regs.user_arg0 != 10 || regs.user_arg1 != 11 || + regs.user_arg2 != 12 || regs.user_arg3 != 13 || + regs.user_arg4 != 14 || regs.user_arg5 != 15) { + printf("[FAIL]\tRestart nr or args are wrong (nr=%lu, args=%lu %lu %lu %lu %lu %lu)\n", + (unsigned long)regs.user_syscall_nr, + (unsigned long)regs.user_arg0, + (unsigned long)regs.user_arg1, + (unsigned long)regs.user_arg2, + (unsigned long)regs.user_arg3, + (unsigned long)regs.user_arg4, + (unsigned long)regs.user_arg5); + nerrs++; + } else { + printf("[OK]\tRestarted nr and args are correct\n"); + } + + printf("[RUN]\tChange nr and args and restart the syscall (ip = 0x%lx)\n", + (unsigned long)regs.user_ip); + + /* + * Inject a new syscall (getpid) in the same place the previous + * syscall (gettid), rewind and re-execute. + */ + regs.user_syscall_nr = SYS_getpid; + regs.user_arg0 = 20; + regs.user_arg1 = 21; + regs.user_arg2 = 22; + regs.user_arg3 = 23; + regs.user_arg4 = 24; + regs.user_arg5 = 25; + regs.user_ip -= 4; + + if (ptrace(PTRACE_SETREGS, chld, 0, ®s) != 0) + err(1, "PTRACE_SETREGS"); + + if (ptrace(PTRACE_SYSEMU, chld, 0, 0) != 0) + err(1, "PTRACE_SYSEMU"); + wait_trap(chld); + + if (ptrace(PTRACE_GETREGS, chld, 0, ®s) != 0) + err(1, "PTRACE_GETREGS"); + + /* Check that ptrace stopped at the new syscall that was + * injected, and guarantee that it haven't executed, i.e, user_args + * contain the arguments and not the syscall return value, for + * instance. + */ + if (regs.user_syscall_nr != SYS_getpid + || regs.user_arg0 != 20 || regs.user_arg1 != 21 + || regs.user_arg2 != 22 || regs.user_arg3 != 23 + || regs.user_arg4 != 24 || regs.user_arg5 != 25) { + + printf("[FAIL]\tRestart nr or args are wrong (nr=%lu, args=%lu %lu %lu %lu %lu %lu)\n", + (unsigned long)regs.user_syscall_nr, + (unsigned long)regs.user_arg0, + (unsigned long)regs.user_arg1, + (unsigned long)regs.user_arg2, + (unsigned long)regs.user_arg3, + (unsigned long)regs.user_arg4, + (unsigned long)regs.user_arg5); + nerrs++; + } else { + printf("[OK]\tReplacement nr and args are correct\n"); + } + + if (ptrace(PTRACE_CONT, chld, 0, 0) != 0) + err(1, "PTRACE_CONT"); + + if (waitpid(chld, &status, 0) != chld) + err(1, "waitpid"); + + /* Guarantee that the process executed properly, returning 0 */ + if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) { + printf("[FAIL]\tChild failed\n"); + nerrs++; + } else { + printf("[OK]\tChild exited cleanly\n"); + } +} + +int ptrace_syscall(void) +{ + test_ptrace_syscall_restart(); + + return nerrs; +} + +int main(void) +{ + return test_harness(ptrace_syscall, "ptrace_syscall"); +} diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c index 59206b96e98a..a08a91594dbe 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c @@ -59,8 +59,8 @@ trans: "3: ;" : [res] "=r" (result), [texasr] "=r" (texasr) : [gpr_1]"i"(GPR_1), [gpr_2]"i"(GPR_2), - [sprn_texasr] "i" (SPRN_TEXASR), [flt_1] "r" (&a), - [flt_2] "r" (&b), [cptr1] "r" (&cptr[1]) + [sprn_texasr] "i" (SPRN_TEXASR), [flt_1] "b" (&a), + [flt_2] "b" (&b), [cptr1] "b" (&cptr[1]) : "memory", "r7", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19", "r20", "r21", "r22", diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c index b3c061dc9512..f47174746231 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c @@ -72,7 +72,7 @@ trans: "3: ;" : [res] "=r" (result), [texasr] "=r" (texasr) - : [val] "r" (cptr[1]), [sprn_dscr]"i"(SPRN_DSCR), + : [sprn_dscr]"i"(SPRN_DSCR), [sprn_tar]"i"(SPRN_TAR), [sprn_ppr]"i"(SPRN_PPR), [sprn_texasr]"i"(SPRN_TEXASR), [tar_1]"i"(TAR_1), [dscr_1]"i"(DSCR_1), [tar_2]"i"(TAR_2), [dscr_2]"i"(DSCR_2), diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c index 277dade1b382..18a685bf6a09 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c @@ -77,8 +77,7 @@ trans: "3: ;" : [res] "=r" (result), [texasr] "=r" (texasr) - : [fp_load] "r" (fp_load), [fp_load_ckpt] "r" (fp_load_ckpt), - [sprn_texasr] "i" (SPRN_TEXASR) + : [sprn_texasr] "i" (SPRN_TEXASR) : "memory", "r0", "r1", "r3", "r4", "r7", "r8", "r9", "r10", "r11" ); diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c index 51427a2465f6..ba04999254e3 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c @@ -74,7 +74,7 @@ trans: "3: ;" : [tfhar] "=r" (tfhar), [res] "=r" (result), - [texasr] "=r" (texasr), [cptr1] "=r" (cptr1) + [texasr] "=r" (texasr), [cptr1] "=b" (cptr1) : [sprn_texasr] "i" (SPRN_TEXASR) : "memory", "r0", "r8", "r31" ); diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c index 48b462f75023..f70023b25e6e 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c @@ -65,7 +65,7 @@ trans: : [sprn_dscr]"i"(SPRN_DSCR), [sprn_tar]"i"(SPRN_TAR), [sprn_ppr]"i"(SPRN_PPR), [sprn_texasr]"i"(SPRN_TEXASR), [tar_1]"i"(TAR_1), [dscr_1]"i"(DSCR_1), [tar_2]"i"(TAR_2), - [dscr_2]"i"(DSCR_2), [cptr1] "r" (&cptr[1]) + [dscr_2]"i"(DSCR_2), [cptr1] "b" (&cptr[1]) : "memory", "r0", "r1", "r3", "r4", "r5", "r6" ); diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c index 17c23cabac3e..dfba80058977 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c @@ -65,8 +65,7 @@ trans: "3: ;" : [res] "=r" (result), [texasr] "=r" (texasr) - : [fp_load] "r" (fp_load), [fp_load_ckpt] "r" (fp_load_ckpt), - [sprn_texasr] "i" (SPRN_TEXASR), [cptr1] "r" (&cptr[1]) + : [sprn_texasr] "i" (SPRN_TEXASR), [cptr1] "b" (&cptr[1]) : "memory", "r0", "r1", "r3", "r4", "r7", "r8", "r9", "r10", "r11" ); diff --git a/tools/testing/selftests/powerpc/security/Makefile b/tools/testing/selftests/powerpc/security/Makefile new file mode 100644 index 000000000000..85861c46b445 --- /dev/null +++ b/tools/testing/selftests/powerpc/security/Makefile @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0+ + +TEST_GEN_PROGS := rfi_flush +top_srcdir = ../../../../.. + +CFLAGS += -I../../../../../usr/include + +include ../../lib.mk + +$(TEST_GEN_PROGS): ../harness.c ../utils.c diff --git a/tools/testing/selftests/powerpc/security/rfi_flush.c b/tools/testing/selftests/powerpc/security/rfi_flush.c new file mode 100644 index 000000000000..0a7d0afb26b8 --- /dev/null +++ b/tools/testing/selftests/powerpc/security/rfi_flush.c @@ -0,0 +1,140 @@ +// SPDX-License-Identifier: GPL-2.0+ + +/* + * Copyright 2018 IBM Corporation. + */ + +#define __SANE_USERSPACE_TYPES__ + +#include <sys/types.h> +#include <stdint.h> +#include <malloc.h> +#include <unistd.h> +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include "utils.h" + +#define CACHELINE_SIZE 128 + +struct perf_event_read { + __u64 nr; + __u64 l1d_misses; +}; + +static inline __u64 load(void *addr) +{ + __u64 tmp; + + asm volatile("ld %0,0(%1)" : "=r"(tmp) : "b"(addr)); + + return tmp; +} + +static void syscall_loop(char *p, unsigned long iterations, + unsigned long zero_size) +{ + for (unsigned long i = 0; i < iterations; i++) { + for (unsigned long j = 0; j < zero_size; j += CACHELINE_SIZE) + load(p + j); + getppid(); + } +} + +int rfi_flush_test(void) +{ + char *p; + int repetitions = 10; + int fd, passes = 0, iter, rc = 0; + struct perf_event_read v; + __u64 l1d_misses_total = 0; + unsigned long iterations = 100000, zero_size = 24 * 1024; + unsigned long l1d_misses_expected; + int rfi_flush_org, rfi_flush; + + SKIP_IF(geteuid() != 0); + + if (read_debugfs_file("powerpc/rfi_flush", &rfi_flush_org)) { + perror("Unable to read powerpc/rfi_flush debugfs file"); + SKIP_IF(1); + } + + rfi_flush = rfi_flush_org; + + fd = perf_event_open_counter(PERF_TYPE_RAW, /* L1d miss */ 0x400f0, -1); + FAIL_IF(fd < 0); + + p = (char *)memalign(zero_size, CACHELINE_SIZE); + + FAIL_IF(perf_event_enable(fd)); + + set_dscr(1); + + iter = repetitions; + + /* + * We expect to see l1d miss for each cacheline access when rfi_flush + * is set. Allow a small variation on this. + */ + l1d_misses_expected = iterations * (zero_size / CACHELINE_SIZE - 2); + +again: + FAIL_IF(perf_event_reset(fd)); + + syscall_loop(p, iterations, zero_size); + + FAIL_IF(read(fd, &v, sizeof(v)) != sizeof(v)); + + if (rfi_flush && v.l1d_misses >= l1d_misses_expected) + passes++; + else if (!rfi_flush && v.l1d_misses < (l1d_misses_expected / 2)) + passes++; + + l1d_misses_total += v.l1d_misses; + + while (--iter) + goto again; + + if (passes < repetitions) { + printf("FAIL (L1D misses with rfi_flush=%d: %llu %c %lu) [%d/%d failures]\n", + rfi_flush, l1d_misses_total, rfi_flush ? '<' : '>', + rfi_flush ? repetitions * l1d_misses_expected : + repetitions * l1d_misses_expected / 2, + repetitions - passes, repetitions); + rc = 1; + } else + printf("PASS (L1D misses with rfi_flush=%d: %llu %c %lu) [%d/%d pass]\n", + rfi_flush, l1d_misses_total, rfi_flush ? '>' : '<', + rfi_flush ? repetitions * l1d_misses_expected : + repetitions * l1d_misses_expected / 2, + passes, repetitions); + + if (rfi_flush == rfi_flush_org) { + rfi_flush = !rfi_flush_org; + if (write_debugfs_file("powerpc/rfi_flush", rfi_flush) < 0) { + perror("error writing to powerpc/rfi_flush debugfs file"); + return 1; + } + iter = repetitions; + l1d_misses_total = 0; + passes = 0; + goto again; + } + + perf_event_disable(fd); + close(fd); + + set_dscr(0); + + if (write_debugfs_file("powerpc/rfi_flush", rfi_flush_org) < 0) { + perror("unable to restore original value of powerpc/rfi_flush debugfs file"); + return 1; + } + + return rc; +} + +int main(int argc, char *argv[]) +{ + return test_harness(rfi_flush_test, "rfi_flush_test"); +} diff --git a/tools/testing/selftests/powerpc/signal/Makefile b/tools/testing/selftests/powerpc/signal/Makefile index 1fca25c6ace0..209a958dca12 100644 --- a/tools/testing/selftests/powerpc/signal/Makefile +++ b/tools/testing/selftests/powerpc/signal/Makefile @@ -1,15 +1,10 @@ # SPDX-License-Identifier: GPL-2.0 -TEST_PROGS := signal signal_tm - -all: $(TEST_PROGS) - -$(TEST_PROGS): ../harness.c ../utils.c signal.S +TEST_GEN_PROGS := signal signal_tm CFLAGS += -maltivec -signal_tm: CFLAGS += -mhtm +$(OUTPUT)/signal_tm: CFLAGS += -mhtm top_srcdir = ../../../../.. include ../../lib.mk -clean: - rm -f $(TEST_PROGS) *.o +$(TEST_GEN_PROGS): ../harness.c ../utils.c signal.S diff --git a/tools/testing/selftests/powerpc/switch_endian/Makefile b/tools/testing/selftests/powerpc/switch_endian/Makefile index fcd2dcb8972b..bdc081afedb0 100644 --- a/tools/testing/selftests/powerpc/switch_endian/Makefile +++ b/tools/testing/selftests/powerpc/switch_endian/Makefile @@ -8,6 +8,7 @@ EXTRA_CLEAN = $(OUTPUT)/*.o $(OUTPUT)/check-reversed.S top_srcdir = ../../../../.. include ../../lib.mk +$(OUTPUT)/switch_endian_test: ASFLAGS += -I $(OUTPUT) $(OUTPUT)/switch_endian_test: $(OUTPUT)/check-reversed.S $(OUTPUT)/check-reversed.o: $(OUTPUT)/check.o diff --git a/tools/testing/selftests/powerpc/tm/.gitignore b/tools/testing/selftests/powerpc/tm/.gitignore index c3ee8393dae8..208452a93e2c 100644 --- a/tools/testing/selftests/powerpc/tm/.gitignore +++ b/tools/testing/selftests/powerpc/tm/.gitignore @@ -11,6 +11,7 @@ tm-signal-context-chk-fpu tm-signal-context-chk-gpr tm-signal-context-chk-vmx tm-signal-context-chk-vsx +tm-signal-sigreturn-nt tm-vmx-unavail tm-unavailable tm-trap diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile index 9fc2cf6fbc92..75a685359129 100644 --- a/tools/testing/selftests/powerpc/tm/Makefile +++ b/tools/testing/selftests/powerpc/tm/Makefile @@ -4,7 +4,7 @@ SIGNAL_CONTEXT_CHK_TESTS := tm-signal-context-chk-gpr tm-signal-context-chk-fpu TEST_GEN_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack \ tm-vmxcopy tm-fork tm-tar tm-tmspr tm-vmx-unavail tm-unavailable tm-trap \ - $(SIGNAL_CONTEXT_CHK_TESTS) tm-sigreturn + $(SIGNAL_CONTEXT_CHK_TESTS) tm-sigreturn tm-signal-sigreturn-nt top_srcdir = ../../../../.. include ../../lib.mk diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-sigreturn-nt.c b/tools/testing/selftests/powerpc/tm/tm-signal-sigreturn-nt.c new file mode 100644 index 000000000000..56fbf9f6bbf3 --- /dev/null +++ b/tools/testing/selftests/powerpc/tm/tm-signal-sigreturn-nt.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2018, Breno Leitao, Gustavo Romero, IBM Corp. + * + * A test case that creates a signal and starts a suspended transaction + * inside the signal handler. + * + * It returns from the signal handler with the CPU at suspended state, but + * without setting usercontext MSR Transaction State (TS) fields. + */ + +#define _GNU_SOURCE +#include <stdlib.h> +#include <signal.h> + +#include "utils.h" + +void trap_signal_handler(int signo, siginfo_t *si, void *uc) +{ + ucontext_t *ucp = (ucontext_t *) uc; + + asm("tbegin.; tsuspend.;"); + + /* Skip 'trap' instruction if it succeed */ + ucp->uc_mcontext.regs->nip += 4; +} + +int tm_signal_sigreturn_nt(void) +{ + struct sigaction trap_sa; + + trap_sa.sa_flags = SA_SIGINFO; + trap_sa.sa_sigaction = trap_signal_handler; + + sigaction(SIGTRAP, &trap_sa, NULL); + + raise(SIGTRAP); + + return EXIT_SUCCESS; +} + +int main(int argc, char **argv) +{ + test_harness(tm_signal_sigreturn_nt, "tm_signal_sigreturn_nt"); +} + diff --git a/tools/testing/selftests/powerpc/tm/tm-unavailable.c b/tools/testing/selftests/powerpc/tm/tm-unavailable.c index 156c8e750259..09894f4ff62e 100644 --- a/tools/testing/selftests/powerpc/tm/tm-unavailable.c +++ b/tools/testing/selftests/powerpc/tm/tm-unavailable.c @@ -236,7 +236,8 @@ void *tm_una_ping(void *input) } /* Check if we were not expecting a failure and a it occurred. */ - if (!expecting_failure() && is_failure(cr_)) { + if (!expecting_failure() && is_failure(cr_) && + !failure_is_reschedule()) { printf("\n\tUnexpected transaction failure 0x%02lx\n\t", failure_code()); return (void *) -1; @@ -244,9 +245,11 @@ void *tm_una_ping(void *input) /* * Check if TM failed due to the cause we were expecting. 0xda is a - * TM_CAUSE_FAC_UNAV cause, otherwise it's an unexpected cause. + * TM_CAUSE_FAC_UNAV cause, otherwise it's an unexpected cause, unless + * it was caused by a reschedule. */ - if (is_failure(cr_) && !failure_is_unavailable()) { + if (is_failure(cr_) && !failure_is_unavailable() && + !failure_is_reschedule()) { printf("\n\tUnexpected failure cause 0x%02lx\n\t", failure_code()); return (void *) -1; diff --git a/tools/testing/selftests/powerpc/tm/tm.h b/tools/testing/selftests/powerpc/tm/tm.h index df4204247d45..5518b1d4ef8b 100644 --- a/tools/testing/selftests/powerpc/tm/tm.h +++ b/tools/testing/selftests/powerpc/tm/tm.h @@ -52,6 +52,15 @@ static inline bool failure_is_unavailable(void) return (failure_code() & TM_CAUSE_FAC_UNAV) == TM_CAUSE_FAC_UNAV; } +static inline bool failure_is_reschedule(void) +{ + if ((failure_code() & TM_CAUSE_RESCHED) == TM_CAUSE_RESCHED || + (failure_code() & TM_CAUSE_KVM_RESCHED) == TM_CAUSE_KVM_RESCHED) + return true; + + return false; +} + static inline bool failure_is_nesting(void) { return (__builtin_get_texasru() & 0x400000); diff --git a/tools/testing/selftests/powerpc/utils.c b/tools/testing/selftests/powerpc/utils.c index aa8fc1e6365b..ed62f4153d3e 100644 --- a/tools/testing/selftests/powerpc/utils.c +++ b/tools/testing/selftests/powerpc/utils.c @@ -10,12 +10,17 @@ #include <fcntl.h> #include <link.h> #include <sched.h> +#include <signal.h> #include <stdio.h> +#include <stdlib.h> #include <string.h> +#include <sys/ioctl.h> #include <sys/stat.h> #include <sys/types.h> #include <sys/utsname.h> #include <unistd.h> +#include <asm/unistd.h> +#include <linux/limits.h> #include "utils.h" @@ -121,3 +126,150 @@ bool is_ppc64le(void) return strcmp(uts.machine, "ppc64le") == 0; } + +int read_debugfs_file(char *debugfs_file, int *result) +{ + int rc = -1, fd; + char path[PATH_MAX]; + char value[16]; + + strcpy(path, "/sys/kernel/debug/"); + strncat(path, debugfs_file, PATH_MAX - strlen(path) - 1); + + if ((fd = open(path, O_RDONLY)) < 0) + return rc; + + if ((rc = read(fd, value, sizeof(value))) < 0) + return rc; + + value[15] = 0; + *result = atoi(value); + close(fd); + + return 0; +} + +int write_debugfs_file(char *debugfs_file, int result) +{ + int rc = -1, fd; + char path[PATH_MAX]; + char value[16]; + + strcpy(path, "/sys/kernel/debug/"); + strncat(path, debugfs_file, PATH_MAX - strlen(path) - 1); + + if ((fd = open(path, O_WRONLY)) < 0) + return rc; + + snprintf(value, 16, "%d", result); + + if ((rc = write(fd, value, strlen(value))) < 0) + return rc; + + close(fd); + + return 0; +} + +static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, + int cpu, int group_fd, unsigned long flags) +{ + return syscall(__NR_perf_event_open, hw_event, pid, cpu, + group_fd, flags); +} + +static void perf_event_attr_init(struct perf_event_attr *event_attr, + unsigned int type, + unsigned long config) +{ + memset(event_attr, 0, sizeof(*event_attr)); + + event_attr->type = type; + event_attr->size = sizeof(struct perf_event_attr); + event_attr->config = config; + event_attr->read_format = PERF_FORMAT_GROUP; + event_attr->disabled = 1; + event_attr->exclude_kernel = 1; + event_attr->exclude_hv = 1; + event_attr->exclude_guest = 1; +} + +int perf_event_open_counter(unsigned int type, + unsigned long config, int group_fd) +{ + int fd; + struct perf_event_attr event_attr; + + perf_event_attr_init(&event_attr, type, config); + + fd = perf_event_open(&event_attr, 0, -1, group_fd, 0); + + if (fd < 0) + perror("perf_event_open() failed"); + + return fd; +} + +int perf_event_enable(int fd) +{ + if (ioctl(fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP) == -1) { + perror("error while enabling perf events"); + return -1; + } + + return 0; +} + +int perf_event_disable(int fd) +{ + if (ioctl(fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP) == -1) { + perror("error disabling perf events"); + return -1; + } + + return 0; +} + +int perf_event_reset(int fd) +{ + if (ioctl(fd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP) == -1) { + perror("error resetting perf events"); + return -1; + } + + return 0; +} + +static void sigill_handler(int signr, siginfo_t *info, void *unused) +{ + static int warned = 0; + ucontext_t *ctx = (ucontext_t *)unused; + unsigned long *pc = &UCONTEXT_NIA(ctx); + + /* mtspr 3,RS to check for move to DSCR below */ + if ((*((unsigned int *)*pc) & 0xfc1fffff) == 0x7c0303a6) { + if (!warned++) + printf("WARNING: Skipping over dscr setup. Consider running 'ppc64_cpu --dscr=1' manually.\n"); + *pc += 4; + } else { + printf("SIGILL at %p\n", pc); + abort(); + } +} + +void set_dscr(unsigned long val) +{ + static int init = 0; + struct sigaction sa; + + if (!init) { + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = sigill_handler; + sa.sa_flags = SA_SIGINFO; + if (sigaction(SIGILL, &sa, NULL)) + perror("sigill_handler"); + init = 1; + } + + asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR)); +} diff --git a/tools/testing/selftests/proc/.gitignore b/tools/testing/selftests/proc/.gitignore index 82121a81681f..29bac5ef9a93 100644 --- a/tools/testing/selftests/proc/.gitignore +++ b/tools/testing/selftests/proc/.gitignore @@ -10,4 +10,5 @@ /proc-uptime-002 /read /self +/setns-dcache /thread-self diff --git a/tools/testing/selftests/proc/Makefile b/tools/testing/selftests/proc/Makefile index 1c12c34cf85d..434d033ee067 100644 --- a/tools/testing/selftests/proc/Makefile +++ b/tools/testing/selftests/proc/Makefile @@ -14,6 +14,7 @@ TEST_GEN_PROGS += proc-uptime-001 TEST_GEN_PROGS += proc-uptime-002 TEST_GEN_PROGS += read TEST_GEN_PROGS += self +TEST_GEN_PROGS += setns-dcache TEST_GEN_PROGS += thread-self include ../lib.mk diff --git a/tools/testing/selftests/proc/fd-001-lookup.c b/tools/testing/selftests/proc/fd-001-lookup.c index a2010dfb2110..60d7948e7124 100644 --- a/tools/testing/selftests/proc/fd-001-lookup.c +++ b/tools/testing/selftests/proc/fd-001-lookup.c @@ -14,7 +14,7 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ // Test /proc/*/fd lookup. -#define _GNU_SOURCE + #undef NDEBUG #include <assert.h> #include <dirent.h> diff --git a/tools/testing/selftests/proc/fd-003-kthread.c b/tools/testing/selftests/proc/fd-003-kthread.c index 1d659d55368c..dc591f97b63d 100644 --- a/tools/testing/selftests/proc/fd-003-kthread.c +++ b/tools/testing/selftests/proc/fd-003-kthread.c @@ -14,7 +14,7 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ // Test that /proc/$KERNEL_THREAD/fd/ is empty. -#define _GNU_SOURCE + #undef NDEBUG #include <sys/syscall.h> #include <assert.h> diff --git a/tools/testing/selftests/proc/proc-self-map-files-002.c b/tools/testing/selftests/proc/proc-self-map-files-002.c index 6f1f4a6e1ecb..85744425b08d 100644 --- a/tools/testing/selftests/proc/proc-self-map-files-002.c +++ b/tools/testing/selftests/proc/proc-self-map-files-002.c @@ -13,7 +13,7 @@ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -/* Test readlink /proc/self/map_files/... with address 0. */ +/* Test readlink /proc/self/map_files/... with minimum address. */ #include <errno.h> #include <sys/types.h> #include <sys/stat.h> @@ -47,6 +47,11 @@ static void fail(const char *fmt, unsigned long a, unsigned long b) int main(void) { const unsigned int PAGE_SIZE = sysconf(_SC_PAGESIZE); +#ifdef __arm__ + unsigned long va = 2 * PAGE_SIZE; +#else + unsigned long va = 0; +#endif void *p; int fd; unsigned long a, b; @@ -55,7 +60,7 @@ int main(void) if (fd == -1) return 1; - p = mmap(NULL, PAGE_SIZE, PROT_NONE, MAP_PRIVATE|MAP_FILE|MAP_FIXED, fd, 0); + p = mmap((void *)va, PAGE_SIZE, PROT_NONE, MAP_PRIVATE|MAP_FILE|MAP_FIXED, fd, 0); if (p == MAP_FAILED) { if (errno == EPERM) return 2; diff --git a/tools/testing/selftests/proc/setns-dcache.c b/tools/testing/selftests/proc/setns-dcache.c new file mode 100644 index 000000000000..60ab197a73fc --- /dev/null +++ b/tools/testing/selftests/proc/setns-dcache.c @@ -0,0 +1,129 @@ +/* + * Copyright © 2019 Alexey Dobriyan <adobriyan@gmail.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +/* + * Test that setns(CLONE_NEWNET) points to new /proc/net content even + * if old one is in dcache. + * + * FIXME /proc/net/unix is under CONFIG_UNIX which can be disabled. + */ +#undef NDEBUG +#include <assert.h> +#include <errno.h> +#include <sched.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <sys/socket.h> + +static pid_t pid = -1; + +static void f(void) +{ + if (pid > 0) { + kill(pid, SIGTERM); + } +} + +int main(void) +{ + int fd[2]; + char _ = 0; + int nsfd; + + atexit(f); + + /* Check for priviledges and syscall availability straight away. */ + if (unshare(CLONE_NEWNET) == -1) { + if (errno == ENOSYS || errno == EPERM) { + return 4; + } + return 1; + } + /* Distinguisher between two otherwise empty net namespaces. */ + if (socket(AF_UNIX, SOCK_STREAM, 0) == -1) { + return 1; + } + + if (pipe(fd) == -1) { + return 1; + } + + pid = fork(); + if (pid == -1) { + return 1; + } + + if (pid == 0) { + if (unshare(CLONE_NEWNET) == -1) { + return 1; + } + + if (write(fd[1], &_, 1) != 1) { + return 1; + } + + pause(); + + return 0; + } + + if (read(fd[0], &_, 1) != 1) { + return 1; + } + + { + char buf[64]; + snprintf(buf, sizeof(buf), "/proc/%u/ns/net", pid); + nsfd = open(buf, O_RDONLY); + if (nsfd == -1) { + return 1; + } + } + + /* Reliably pin dentry into dcache. */ + (void)open("/proc/net/unix", O_RDONLY); + + if (setns(nsfd, CLONE_NEWNET) == -1) { + return 1; + } + + kill(pid, SIGTERM); + pid = 0; + + { + char buf[4096]; + ssize_t rv; + int fd; + + fd = open("/proc/net/unix", O_RDONLY); + if (fd == -1) { + return 1; + } + +#define S "Num RefCount Protocol Flags Type St Inode Path\n" + rv = read(fd, buf, sizeof(buf)); + + assert(rv == strlen(S)); + assert(memcmp(buf, S, strlen(S)) == 0); + } + + return 0; +} diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh index f7247ee00514..58ca758a5786 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh @@ -120,7 +120,6 @@ then parse-build.sh $resdir/Make.out $title else # Build failed. - cp $builddir/Make*.out $resdir cp $builddir/.config $resdir || : echo Build failed, not running KVM, see $resdir. if test -f $builddir.wait diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index 5a7a62d76a50..19864f1cb27a 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -194,6 +194,14 @@ do shift done +if test -z "$TORTURE_INITRD" || tools/testing/selftests/rcutorture/bin/mkinitrd.sh +then + : +else + echo No initrd and unable to create one, aborting test >&2 + exit 1 +fi + CONFIGFRAG=${KVM}/configs/${TORTURE_SUITE}; export CONFIGFRAG if test -z "$configs" diff --git a/tools/testing/selftests/rcutorture/bin/mkinitrd.sh b/tools/testing/selftests/rcutorture/bin/mkinitrd.sh new file mode 100755 index 000000000000..da298394daa2 --- /dev/null +++ b/tools/testing/selftests/rcutorture/bin/mkinitrd.sh @@ -0,0 +1,136 @@ +#!/bin/bash +# +# Create an initrd directory if one does not already exist. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# Copyright (C) IBM Corporation, 2013 +# +# Author: Connor Shu <Connor.Shu@ibm.com> + +D=tools/testing/selftests/rcutorture + +# Prerequisite checks +[ -z "$D" ] && echo >&2 "No argument supplied" && exit 1 +if [ ! -d "$D" ]; then + echo >&2 "$D does not exist: Malformed kernel source tree?" + exit 1 +fi +if [ -s "$D/initrd/init" ]; then + echo "$D/initrd/init already exists, no need to create it" + exit 0 +fi + +T=${TMPDIR-/tmp}/mkinitrd.sh.$$ +trap 'rm -rf $T' 0 2 +mkdir $T + +cat > $T/init << '__EOF___' +#!/bin/sh +# Run in userspace a few milliseconds every second. This helps to +# exercise the NO_HZ_FULL portions of RCU. +while : +do + q= + for i in \ + a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a \ + a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a \ + a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a \ + a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a \ + a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a \ + a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a + do + q="$q $i" + done + sleep 1 +done +__EOF___ + +# Try using dracut to create initrd +if command -v dracut >/dev/null 2>&1 +then + echo Creating $D/initrd using dracut. + # Filesystem creation + dracut --force --no-hostonly --no-hostonly-cmdline --module "base" $T/initramfs.img + cd $D + mkdir -p initrd + cd initrd + zcat $T/initramfs.img | cpio -id + cp $T/init init + chmod +x init + echo Done creating $D/initrd using dracut + exit 0 +fi + +# No dracut, so create a C-language initrd/init program and statically +# link it. This results in a very small initrd, but might be a bit less +# future-proof than dracut. +echo "Could not find dracut, attempting C initrd" +cd $D +mkdir -p initrd +cd initrd +cat > init.c << '___EOF___' +#ifndef NOLIBC +#include <unistd.h> +#include <sys/time.h> +#endif + +volatile unsigned long delaycount; + +int main(int argc, int argv[]) +{ + int i; + struct timeval tv; + struct timeval tvb; + + for (;;) { + sleep(1); + /* Need some userspace time. */ + if (gettimeofday(&tvb, NULL)) + continue; + do { + for (i = 0; i < 1000 * 100; i++) + delaycount = i * i; + if (gettimeofday(&tv, NULL)) + break; + tv.tv_sec -= tvb.tv_sec; + if (tv.tv_sec > 1) + break; + tv.tv_usec += tv.tv_sec * 1000 * 1000; + tv.tv_usec -= tvb.tv_usec; + } while (tv.tv_usec < 1000); + } + return 0; +} +___EOF___ + +# build using nolibc on supported archs (smaller executable) and fall +# back to regular glibc on other ones. +if echo -e "#if __x86_64__||__i386__||__i486__||__i586__||__i686__" \ + "||__ARM_EABI__||__aarch64__\nyes\n#endif" \ + | ${CROSS_COMPILE}gcc -E -nostdlib -xc - \ + | grep -q '^yes'; then + # architecture supported by nolibc + ${CROSS_COMPILE}gcc -fno-asynchronous-unwind-tables -fno-ident \ + -nostdlib -include ../bin/nolibc.h -lgcc -s -static -Os \ + -o init init.c +else + ${CROSS_COMPILE}gcc -s -static -Os -o init init.c +fi + +rm init.c +echo "Done creating a statically linked C-language initrd" + +exit 0 diff --git a/tools/testing/selftests/rcutorture/bin/nolibc.h b/tools/testing/selftests/rcutorture/bin/nolibc.h new file mode 100644 index 000000000000..f98f5b92d3eb --- /dev/null +++ b/tools/testing/selftests/rcutorture/bin/nolibc.h @@ -0,0 +1,2197 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* nolibc.h + * Copyright (C) 2017-2018 Willy Tarreau <w@1wt.eu> + */ + +/* some archs (at least aarch64) don't expose the regular syscalls anymore by + * default, either because they have an "_at" replacement, or because there are + * more modern alternatives. For now we'd rather still use them. + */ +#define __ARCH_WANT_SYSCALL_NO_AT +#define __ARCH_WANT_SYSCALL_NO_FLAGS +#define __ARCH_WANT_SYSCALL_DEPRECATED + +#include <asm/unistd.h> +#include <asm/ioctls.h> +#include <asm/errno.h> +#include <linux/fs.h> +#include <linux/loop.h> + +#define NOLIBC + +/* Build a static executable this way : + * $ gcc -fno-asynchronous-unwind-tables -fno-ident -s -Os -nostdlib \ + * -static -include nolibc.h -lgcc -o hello hello.c + * + * Useful calling convention table found here : + * http://man7.org/linux/man-pages/man2/syscall.2.html + * + * This doc is even better : + * https://w3challs.com/syscalls/ + */ + + +/* this way it will be removed if unused */ +static int errno; + +#ifndef NOLIBC_IGNORE_ERRNO +#define SET_ERRNO(v) do { errno = (v); } while (0) +#else +#define SET_ERRNO(v) do { } while (0) +#endif + +/* errno codes all ensure that they will not conflict with a valid pointer + * because they all correspond to the highest addressable memry page. + */ +#define MAX_ERRNO 4095 + +/* Declare a few quite common macros and types that usually are in stdlib.h, + * stdint.h, ctype.h, unistd.h and a few other common locations. + */ + +#define NULL ((void *)0) + +/* stdint types */ +typedef unsigned char uint8_t; +typedef signed char int8_t; +typedef unsigned short uint16_t; +typedef signed short int16_t; +typedef unsigned int uint32_t; +typedef signed int int32_t; +typedef unsigned long long uint64_t; +typedef signed long long int64_t; +typedef unsigned long size_t; +typedef signed long ssize_t; +typedef unsigned long uintptr_t; +typedef signed long intptr_t; +typedef signed long ptrdiff_t; + +/* for stat() */ +typedef unsigned int dev_t; +typedef unsigned long ino_t; +typedef unsigned int mode_t; +typedef signed int pid_t; +typedef unsigned int uid_t; +typedef unsigned int gid_t; +typedef unsigned long nlink_t; +typedef signed long off_t; +typedef signed long blksize_t; +typedef signed long blkcnt_t; +typedef signed long time_t; + +/* for poll() */ +struct pollfd { + int fd; + short int events; + short int revents; +}; + +/* for select() */ +struct timeval { + long tv_sec; + long tv_usec; +}; + +/* for pselect() */ +struct timespec { + long tv_sec; + long tv_nsec; +}; + +/* for gettimeofday() */ +struct timezone { + int tz_minuteswest; + int tz_dsttime; +}; + +/* for getdents64() */ +struct linux_dirent64 { + uint64_t d_ino; + int64_t d_off; + unsigned short d_reclen; + unsigned char d_type; + char d_name[]; +}; + +/* commonly an fd_set represents 256 FDs */ +#define FD_SETSIZE 256 +typedef struct { uint32_t fd32[FD_SETSIZE/32]; } fd_set; + +/* needed by wait4() */ +struct rusage { + struct timeval ru_utime; + struct timeval ru_stime; + long ru_maxrss; + long ru_ixrss; + long ru_idrss; + long ru_isrss; + long ru_minflt; + long ru_majflt; + long ru_nswap; + long ru_inblock; + long ru_oublock; + long ru_msgsnd; + long ru_msgrcv; + long ru_nsignals; + long ru_nvcsw; + long ru_nivcsw; +}; + +/* stat flags (WARNING, octal here) */ +#define S_IFDIR 0040000 +#define S_IFCHR 0020000 +#define S_IFBLK 0060000 +#define S_IFREG 0100000 +#define S_IFIFO 0010000 +#define S_IFLNK 0120000 +#define S_IFSOCK 0140000 +#define S_IFMT 0170000 + +#define S_ISDIR(mode) (((mode) & S_IFDIR) == S_IFDIR) +#define S_ISCHR(mode) (((mode) & S_IFCHR) == S_IFCHR) +#define S_ISBLK(mode) (((mode) & S_IFBLK) == S_IFBLK) +#define S_ISREG(mode) (((mode) & S_IFREG) == S_IFREG) +#define S_ISFIFO(mode) (((mode) & S_IFIFO) == S_IFIFO) +#define S_ISLNK(mode) (((mode) & S_IFLNK) == S_IFLNK) +#define S_ISSOCK(mode) (((mode) & S_IFSOCK) == S_IFSOCK) + +#define DT_UNKNOWN 0 +#define DT_FIFO 1 +#define DT_CHR 2 +#define DT_DIR 4 +#define DT_BLK 6 +#define DT_REG 8 +#define DT_LNK 10 +#define DT_SOCK 12 + +/* all the *at functions */ +#ifndef AT_FDWCD +#define AT_FDCWD -100 +#endif + +/* lseek */ +#define SEEK_SET 0 +#define SEEK_CUR 1 +#define SEEK_END 2 + +/* reboot */ +#define LINUX_REBOOT_MAGIC1 0xfee1dead +#define LINUX_REBOOT_MAGIC2 0x28121969 +#define LINUX_REBOOT_CMD_HALT 0xcdef0123 +#define LINUX_REBOOT_CMD_POWER_OFF 0x4321fedc +#define LINUX_REBOOT_CMD_RESTART 0x01234567 +#define LINUX_REBOOT_CMD_SW_SUSPEND 0xd000fce2 + + +/* The format of the struct as returned by the libc to the application, which + * significantly differs from the format returned by the stat() syscall flavours. + */ +struct stat { + dev_t st_dev; /* ID of device containing file */ + ino_t st_ino; /* inode number */ + mode_t st_mode; /* protection */ + nlink_t st_nlink; /* number of hard links */ + uid_t st_uid; /* user ID of owner */ + gid_t st_gid; /* group ID of owner */ + dev_t st_rdev; /* device ID (if special file) */ + off_t st_size; /* total size, in bytes */ + blksize_t st_blksize; /* blocksize for file system I/O */ + blkcnt_t st_blocks; /* number of 512B blocks allocated */ + time_t st_atime; /* time of last access */ + time_t st_mtime; /* time of last modification */ + time_t st_ctime; /* time of last status change */ +}; + +#define WEXITSTATUS(status) (((status) & 0xff00) >> 8) +#define WIFEXITED(status) (((status) & 0x7f) == 0) + + +/* Below comes the architecture-specific code. For each architecture, we have + * the syscall declarations and the _start code definition. This is the only + * global part. On all architectures the kernel puts everything in the stack + * before jumping to _start just above us, without any return address (_start + * is not a function but an entry pint). So at the stack pointer we find argc. + * Then argv[] begins, and ends at the first NULL. Then we have envp which + * starts and ends with a NULL as well. So envp=argv+argc+1. + */ + +#if defined(__x86_64__) +/* Syscalls for x86_64 : + * - registers are 64-bit + * - syscall number is passed in rax + * - arguments are in rdi, rsi, rdx, r10, r8, r9 respectively + * - the system call is performed by calling the syscall instruction + * - syscall return comes in rax + * - rcx and r8..r11 may be clobbered, others are preserved. + * - the arguments are cast to long and assigned into the target registers + * which are then simply passed as registers to the asm code, so that we + * don't have to experience issues with register constraints. + * - the syscall number is always specified last in order to allow to force + * some registers before (gcc refuses a %-register at the last position). + */ + +#define my_syscall0(num) \ +({ \ + long _ret; \ + register long _num asm("rax") = (num); \ + \ + asm volatile ( \ + "syscall\n" \ + : "=a" (_ret) \ + : "0"(_num) \ + : "rcx", "r8", "r9", "r10", "r11", "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall1(num, arg1) \ +({ \ + long _ret; \ + register long _num asm("rax") = (num); \ + register long _arg1 asm("rdi") = (long)(arg1); \ + \ + asm volatile ( \ + "syscall\n" \ + : "=a" (_ret) \ + : "r"(_arg1), \ + "0"(_num) \ + : "rcx", "r8", "r9", "r10", "r11", "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall2(num, arg1, arg2) \ +({ \ + long _ret; \ + register long _num asm("rax") = (num); \ + register long _arg1 asm("rdi") = (long)(arg1); \ + register long _arg2 asm("rsi") = (long)(arg2); \ + \ + asm volatile ( \ + "syscall\n" \ + : "=a" (_ret) \ + : "r"(_arg1), "r"(_arg2), \ + "0"(_num) \ + : "rcx", "r8", "r9", "r10", "r11", "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall3(num, arg1, arg2, arg3) \ +({ \ + long _ret; \ + register long _num asm("rax") = (num); \ + register long _arg1 asm("rdi") = (long)(arg1); \ + register long _arg2 asm("rsi") = (long)(arg2); \ + register long _arg3 asm("rdx") = (long)(arg3); \ + \ + asm volatile ( \ + "syscall\n" \ + : "=a" (_ret) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), \ + "0"(_num) \ + : "rcx", "r8", "r9", "r10", "r11", "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall4(num, arg1, arg2, arg3, arg4) \ +({ \ + long _ret; \ + register long _num asm("rax") = (num); \ + register long _arg1 asm("rdi") = (long)(arg1); \ + register long _arg2 asm("rsi") = (long)(arg2); \ + register long _arg3 asm("rdx") = (long)(arg3); \ + register long _arg4 asm("r10") = (long)(arg4); \ + \ + asm volatile ( \ + "syscall\n" \ + : "=a" (_ret), "=r"(_arg4) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \ + "0"(_num) \ + : "rcx", "r8", "r9", "r11", "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +({ \ + long _ret; \ + register long _num asm("rax") = (num); \ + register long _arg1 asm("rdi") = (long)(arg1); \ + register long _arg2 asm("rsi") = (long)(arg2); \ + register long _arg3 asm("rdx") = (long)(arg3); \ + register long _arg4 asm("r10") = (long)(arg4); \ + register long _arg5 asm("r8") = (long)(arg5); \ + \ + asm volatile ( \ + "syscall\n" \ + : "=a" (_ret), "=r"(_arg4), "=r"(_arg5) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ + "0"(_num) \ + : "rcx", "r9", "r11", "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ +({ \ + long _ret; \ + register long _num asm("rax") = (num); \ + register long _arg1 asm("rdi") = (long)(arg1); \ + register long _arg2 asm("rsi") = (long)(arg2); \ + register long _arg3 asm("rdx") = (long)(arg3); \ + register long _arg4 asm("r10") = (long)(arg4); \ + register long _arg5 asm("r8") = (long)(arg5); \ + register long _arg6 asm("r9") = (long)(arg6); \ + \ + asm volatile ( \ + "syscall\n" \ + : "=a" (_ret), "=r"(_arg4), "=r"(_arg5) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ + "r"(_arg6), "0"(_num) \ + : "rcx", "r11", "memory", "cc" \ + ); \ + _ret; \ +}) + +/* startup code */ +asm(".section .text\n" + ".global _start\n" + "_start:\n" + "pop %rdi\n" // argc (first arg, %rdi) + "mov %rsp, %rsi\n" // argv[] (second arg, %rsi) + "lea 8(%rsi,%rdi,8),%rdx\n" // then a NULL then envp (third arg, %rdx) + "and $-16, %rsp\n" // x86 ABI : esp must be 16-byte aligned when + "sub $8, %rsp\n" // entering the callee + "call main\n" // main() returns the status code, we'll exit with it. + "movzb %al, %rdi\n" // retrieve exit code from 8 lower bits + "mov $60, %rax\n" // NR_exit == 60 + "syscall\n" // really exit + "hlt\n" // ensure it does not return + ""); + +/* fcntl / open */ +#define O_RDONLY 0 +#define O_WRONLY 1 +#define O_RDWR 2 +#define O_CREAT 0x40 +#define O_EXCL 0x80 +#define O_NOCTTY 0x100 +#define O_TRUNC 0x200 +#define O_APPEND 0x400 +#define O_NONBLOCK 0x800 +#define O_DIRECTORY 0x10000 + +/* The struct returned by the stat() syscall, equivalent to stat64(). The + * syscall returns 116 bytes and stops in the middle of __unused. + */ +struct sys_stat_struct { + unsigned long st_dev; + unsigned long st_ino; + unsigned long st_nlink; + unsigned int st_mode; + unsigned int st_uid; + + unsigned int st_gid; + unsigned int __pad0; + unsigned long st_rdev; + long st_size; + long st_blksize; + + long st_blocks; + unsigned long st_atime; + unsigned long st_atime_nsec; + unsigned long st_mtime; + + unsigned long st_mtime_nsec; + unsigned long st_ctime; + unsigned long st_ctime_nsec; + long __unused[3]; +}; + +#elif defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) +/* Syscalls for i386 : + * - mostly similar to x86_64 + * - registers are 32-bit + * - syscall number is passed in eax + * - arguments are in ebx, ecx, edx, esi, edi, ebp respectively + * - all registers are preserved (except eax of course) + * - the system call is performed by calling int $0x80 + * - syscall return comes in eax + * - the arguments are cast to long and assigned into the target registers + * which are then simply passed as registers to the asm code, so that we + * don't have to experience issues with register constraints. + * - the syscall number is always specified last in order to allow to force + * some registers before (gcc refuses a %-register at the last position). + * + * Also, i386 supports the old_select syscall if newselect is not available + */ +#define __ARCH_WANT_SYS_OLD_SELECT + +#define my_syscall0(num) \ +({ \ + long _ret; \ + register long _num asm("eax") = (num); \ + \ + asm volatile ( \ + "int $0x80\n" \ + : "=a" (_ret) \ + : "0"(_num) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall1(num, arg1) \ +({ \ + long _ret; \ + register long _num asm("eax") = (num); \ + register long _arg1 asm("ebx") = (long)(arg1); \ + \ + asm volatile ( \ + "int $0x80\n" \ + : "=a" (_ret) \ + : "r"(_arg1), \ + "0"(_num) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall2(num, arg1, arg2) \ +({ \ + long _ret; \ + register long _num asm("eax") = (num); \ + register long _arg1 asm("ebx") = (long)(arg1); \ + register long _arg2 asm("ecx") = (long)(arg2); \ + \ + asm volatile ( \ + "int $0x80\n" \ + : "=a" (_ret) \ + : "r"(_arg1), "r"(_arg2), \ + "0"(_num) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall3(num, arg1, arg2, arg3) \ +({ \ + long _ret; \ + register long _num asm("eax") = (num); \ + register long _arg1 asm("ebx") = (long)(arg1); \ + register long _arg2 asm("ecx") = (long)(arg2); \ + register long _arg3 asm("edx") = (long)(arg3); \ + \ + asm volatile ( \ + "int $0x80\n" \ + : "=a" (_ret) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), \ + "0"(_num) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall4(num, arg1, arg2, arg3, arg4) \ +({ \ + long _ret; \ + register long _num asm("eax") = (num); \ + register long _arg1 asm("ebx") = (long)(arg1); \ + register long _arg2 asm("ecx") = (long)(arg2); \ + register long _arg3 asm("edx") = (long)(arg3); \ + register long _arg4 asm("esi") = (long)(arg4); \ + \ + asm volatile ( \ + "int $0x80\n" \ + : "=a" (_ret) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \ + "0"(_num) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +({ \ + long _ret; \ + register long _num asm("eax") = (num); \ + register long _arg1 asm("ebx") = (long)(arg1); \ + register long _arg2 asm("ecx") = (long)(arg2); \ + register long _arg3 asm("edx") = (long)(arg3); \ + register long _arg4 asm("esi") = (long)(arg4); \ + register long _arg5 asm("edi") = (long)(arg5); \ + \ + asm volatile ( \ + "int $0x80\n" \ + : "=a" (_ret) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ + "0"(_num) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +/* startup code */ +asm(".section .text\n" + ".global _start\n" + "_start:\n" + "pop %eax\n" // argc (first arg, %eax) + "mov %esp, %ebx\n" // argv[] (second arg, %ebx) + "lea 4(%ebx,%eax,4),%ecx\n" // then a NULL then envp (third arg, %ecx) + "and $-16, %esp\n" // x86 ABI : esp must be 16-byte aligned when + "push %ecx\n" // push all registers on the stack so that we + "push %ebx\n" // support both regparm and plain stack modes + "push %eax\n" + "call main\n" // main() returns the status code in %eax + "movzbl %al, %ebx\n" // retrieve exit code from lower 8 bits + "movl $1, %eax\n" // NR_exit == 1 + "int $0x80\n" // exit now + "hlt\n" // ensure it does not + ""); + +/* fcntl / open */ +#define O_RDONLY 0 +#define O_WRONLY 1 +#define O_RDWR 2 +#define O_CREAT 0x40 +#define O_EXCL 0x80 +#define O_NOCTTY 0x100 +#define O_TRUNC 0x200 +#define O_APPEND 0x400 +#define O_NONBLOCK 0x800 +#define O_DIRECTORY 0x10000 + +/* The struct returned by the stat() syscall, 32-bit only, the syscall returns + * exactly 56 bytes (stops before the unused array). + */ +struct sys_stat_struct { + unsigned long st_dev; + unsigned long st_ino; + unsigned short st_mode; + unsigned short st_nlink; + unsigned short st_uid; + unsigned short st_gid; + + unsigned long st_rdev; + unsigned long st_size; + unsigned long st_blksize; + unsigned long st_blocks; + + unsigned long st_atime; + unsigned long st_atime_nsec; + unsigned long st_mtime; + unsigned long st_mtime_nsec; + + unsigned long st_ctime; + unsigned long st_ctime_nsec; + unsigned long __unused[2]; +}; + +#elif defined(__ARM_EABI__) +/* Syscalls for ARM in ARM or Thumb modes : + * - registers are 32-bit + * - stack is 8-byte aligned + * ( http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.faqs/ka4127.html) + * - syscall number is passed in r7 + * - arguments are in r0, r1, r2, r3, r4, r5 + * - the system call is performed by calling svc #0 + * - syscall return comes in r0. + * - only lr is clobbered. + * - the arguments are cast to long and assigned into the target registers + * which are then simply passed as registers to the asm code, so that we + * don't have to experience issues with register constraints. + * - the syscall number is always specified last in order to allow to force + * some registers before (gcc refuses a %-register at the last position). + * + * Also, ARM supports the old_select syscall if newselect is not available + */ +#define __ARCH_WANT_SYS_OLD_SELECT + +#define my_syscall0(num) \ +({ \ + register long _num asm("r7") = (num); \ + register long _arg1 asm("r0"); \ + \ + asm volatile ( \ + "svc #0\n" \ + : "=r"(_arg1) \ + : "r"(_num) \ + : "memory", "cc", "lr" \ + ); \ + _arg1; \ +}) + +#define my_syscall1(num, arg1) \ +({ \ + register long _num asm("r7") = (num); \ + register long _arg1 asm("r0") = (long)(arg1); \ + \ + asm volatile ( \ + "svc #0\n" \ + : "=r"(_arg1) \ + : "r"(_arg1), \ + "r"(_num) \ + : "memory", "cc", "lr" \ + ); \ + _arg1; \ +}) + +#define my_syscall2(num, arg1, arg2) \ +({ \ + register long _num asm("r7") = (num); \ + register long _arg1 asm("r0") = (long)(arg1); \ + register long _arg2 asm("r1") = (long)(arg2); \ + \ + asm volatile ( \ + "svc #0\n" \ + : "=r"(_arg1) \ + : "r"(_arg1), "r"(_arg2), \ + "r"(_num) \ + : "memory", "cc", "lr" \ + ); \ + _arg1; \ +}) + +#define my_syscall3(num, arg1, arg2, arg3) \ +({ \ + register long _num asm("r7") = (num); \ + register long _arg1 asm("r0") = (long)(arg1); \ + register long _arg2 asm("r1") = (long)(arg2); \ + register long _arg3 asm("r2") = (long)(arg3); \ + \ + asm volatile ( \ + "svc #0\n" \ + : "=r"(_arg1) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), \ + "r"(_num) \ + : "memory", "cc", "lr" \ + ); \ + _arg1; \ +}) + +#define my_syscall4(num, arg1, arg2, arg3, arg4) \ +({ \ + register long _num asm("r7") = (num); \ + register long _arg1 asm("r0") = (long)(arg1); \ + register long _arg2 asm("r1") = (long)(arg2); \ + register long _arg3 asm("r2") = (long)(arg3); \ + register long _arg4 asm("r3") = (long)(arg4); \ + \ + asm volatile ( \ + "svc #0\n" \ + : "=r"(_arg1) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \ + "r"(_num) \ + : "memory", "cc", "lr" \ + ); \ + _arg1; \ +}) + +#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +({ \ + register long _num asm("r7") = (num); \ + register long _arg1 asm("r0") = (long)(arg1); \ + register long _arg2 asm("r1") = (long)(arg2); \ + register long _arg3 asm("r2") = (long)(arg3); \ + register long _arg4 asm("r3") = (long)(arg4); \ + register long _arg5 asm("r4") = (long)(arg5); \ + \ + asm volatile ( \ + "svc #0\n" \ + : "=r" (_arg1) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ + "r"(_num) \ + : "memory", "cc", "lr" \ + ); \ + _arg1; \ +}) + +/* startup code */ +asm(".section .text\n" + ".global _start\n" + "_start:\n" +#if defined(__THUMBEB__) || defined(__THUMBEL__) + /* We enter here in 32-bit mode but if some previous functions were in + * 16-bit mode, the assembler cannot know, so we need to tell it we're in + * 32-bit now, then switch to 16-bit (is there a better way to do it than + * adding 1 by hand ?) and tell the asm we're now in 16-bit mode so that + * it generates correct instructions. Note that we do not support thumb1. + */ + ".code 32\n" + "add r0, pc, #1\n" + "bx r0\n" + ".code 16\n" +#endif + "pop {%r0}\n" // argc was in the stack + "mov %r1, %sp\n" // argv = sp + "add %r2, %r1, %r0, lsl #2\n" // envp = argv + 4*argc ... + "add %r2, %r2, $4\n" // ... + 4 + "and %r3, %r1, $-8\n" // AAPCS : sp must be 8-byte aligned in the + "mov %sp, %r3\n" // callee, an bl doesn't push (lr=pc) + "bl main\n" // main() returns the status code, we'll exit with it. + "and %r0, %r0, $0xff\n" // limit exit code to 8 bits + "movs r7, $1\n" // NR_exit == 1 + "svc $0x00\n" + ""); + +/* fcntl / open */ +#define O_RDONLY 0 +#define O_WRONLY 1 +#define O_RDWR 2 +#define O_CREAT 0x40 +#define O_EXCL 0x80 +#define O_NOCTTY 0x100 +#define O_TRUNC 0x200 +#define O_APPEND 0x400 +#define O_NONBLOCK 0x800 +#define O_DIRECTORY 0x4000 + +/* The struct returned by the stat() syscall, 32-bit only, the syscall returns + * exactly 56 bytes (stops before the unused array). In big endian, the format + * differs as devices are returned as short only. + */ +struct sys_stat_struct { +#if defined(__ARMEB__) + unsigned short st_dev; + unsigned short __pad1; +#else + unsigned long st_dev; +#endif + unsigned long st_ino; + unsigned short st_mode; + unsigned short st_nlink; + unsigned short st_uid; + unsigned short st_gid; +#if defined(__ARMEB__) + unsigned short st_rdev; + unsigned short __pad2; +#else + unsigned long st_rdev; +#endif + unsigned long st_size; + unsigned long st_blksize; + unsigned long st_blocks; + unsigned long st_atime; + unsigned long st_atime_nsec; + unsigned long st_mtime; + unsigned long st_mtime_nsec; + unsigned long st_ctime; + unsigned long st_ctime_nsec; + unsigned long __unused[2]; +}; + +#elif defined(__aarch64__) +/* Syscalls for AARCH64 : + * - registers are 64-bit + * - stack is 16-byte aligned + * - syscall number is passed in x8 + * - arguments are in x0, x1, x2, x3, x4, x5 + * - the system call is performed by calling svc 0 + * - syscall return comes in x0. + * - the arguments are cast to long and assigned into the target registers + * which are then simply passed as registers to the asm code, so that we + * don't have to experience issues with register constraints. + * + * On aarch64, select() is not implemented so we have to use pselect6(). + */ +#define __ARCH_WANT_SYS_PSELECT6 + +#define my_syscall0(num) \ +({ \ + register long _num asm("x8") = (num); \ + register long _arg1 asm("x0"); \ + \ + asm volatile ( \ + "svc #0\n" \ + : "=r"(_arg1) \ + : "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall1(num, arg1) \ +({ \ + register long _num asm("x8") = (num); \ + register long _arg1 asm("x0") = (long)(arg1); \ + \ + asm volatile ( \ + "svc #0\n" \ + : "=r"(_arg1) \ + : "r"(_arg1), \ + "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall2(num, arg1, arg2) \ +({ \ + register long _num asm("x8") = (num); \ + register long _arg1 asm("x0") = (long)(arg1); \ + register long _arg2 asm("x1") = (long)(arg2); \ + \ + asm volatile ( \ + "svc #0\n" \ + : "=r"(_arg1) \ + : "r"(_arg1), "r"(_arg2), \ + "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall3(num, arg1, arg2, arg3) \ +({ \ + register long _num asm("x8") = (num); \ + register long _arg1 asm("x0") = (long)(arg1); \ + register long _arg2 asm("x1") = (long)(arg2); \ + register long _arg3 asm("x2") = (long)(arg3); \ + \ + asm volatile ( \ + "svc #0\n" \ + : "=r"(_arg1) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), \ + "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall4(num, arg1, arg2, arg3, arg4) \ +({ \ + register long _num asm("x8") = (num); \ + register long _arg1 asm("x0") = (long)(arg1); \ + register long _arg2 asm("x1") = (long)(arg2); \ + register long _arg3 asm("x2") = (long)(arg3); \ + register long _arg4 asm("x3") = (long)(arg4); \ + \ + asm volatile ( \ + "svc #0\n" \ + : "=r"(_arg1) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \ + "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +({ \ + register long _num asm("x8") = (num); \ + register long _arg1 asm("x0") = (long)(arg1); \ + register long _arg2 asm("x1") = (long)(arg2); \ + register long _arg3 asm("x2") = (long)(arg3); \ + register long _arg4 asm("x3") = (long)(arg4); \ + register long _arg5 asm("x4") = (long)(arg5); \ + \ + asm volatile ( \ + "svc #0\n" \ + : "=r" (_arg1) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ + "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ +({ \ + register long _num asm("x8") = (num); \ + register long _arg1 asm("x0") = (long)(arg1); \ + register long _arg2 asm("x1") = (long)(arg2); \ + register long _arg3 asm("x2") = (long)(arg3); \ + register long _arg4 asm("x3") = (long)(arg4); \ + register long _arg5 asm("x4") = (long)(arg5); \ + register long _arg6 asm("x5") = (long)(arg6); \ + \ + asm volatile ( \ + "svc #0\n" \ + : "=r" (_arg1) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ + "r"(_arg6), "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +/* startup code */ +asm(".section .text\n" + ".global _start\n" + "_start:\n" + "ldr x0, [sp]\n" // argc (x0) was in the stack + "add x1, sp, 8\n" // argv (x1) = sp + "lsl x2, x0, 3\n" // envp (x2) = 8*argc ... + "add x2, x2, 8\n" // + 8 (skip null) + "add x2, x2, x1\n" // + argv + "and sp, x1, -16\n" // sp must be 16-byte aligned in the callee + "bl main\n" // main() returns the status code, we'll exit with it. + "and x0, x0, 0xff\n" // limit exit code to 8 bits + "mov x8, 93\n" // NR_exit == 93 + "svc #0\n" + ""); + +/* fcntl / open */ +#define O_RDONLY 0 +#define O_WRONLY 1 +#define O_RDWR 2 +#define O_CREAT 0x40 +#define O_EXCL 0x80 +#define O_NOCTTY 0x100 +#define O_TRUNC 0x200 +#define O_APPEND 0x400 +#define O_NONBLOCK 0x800 +#define O_DIRECTORY 0x4000 + +/* The struct returned by the newfstatat() syscall. Differs slightly from the + * x86_64's stat one by field ordering, so be careful. + */ +struct sys_stat_struct { + unsigned long st_dev; + unsigned long st_ino; + unsigned int st_mode; + unsigned int st_nlink; + unsigned int st_uid; + unsigned int st_gid; + + unsigned long st_rdev; + unsigned long __pad1; + long st_size; + int st_blksize; + int __pad2; + + long st_blocks; + long st_atime; + unsigned long st_atime_nsec; + long st_mtime; + + unsigned long st_mtime_nsec; + long st_ctime; + unsigned long st_ctime_nsec; + unsigned int __unused[2]; +}; + +#elif defined(__mips__) && defined(_ABIO32) +/* Syscalls for MIPS ABI O32 : + * - WARNING! there's always a delayed slot! + * - WARNING again, the syntax is different, registers take a '$' and numbers + * do not. + * - registers are 32-bit + * - stack is 8-byte aligned + * - syscall number is passed in v0 (starts at 0xfa0). + * - arguments are in a0, a1, a2, a3, then the stack. The caller needs to + * leave some room in the stack for the callee to save a0..a3 if needed. + * - Many registers are clobbered, in fact only a0..a2 and s0..s8 are + * preserved. See: https://www.linux-mips.org/wiki/Syscall as well as + * scall32-o32.S in the kernel sources. + * - the system call is performed by calling "syscall" + * - syscall return comes in v0, and register a3 needs to be checked to know + * if an error occured, in which case errno is in v0. + * - the arguments are cast to long and assigned into the target registers + * which are then simply passed as registers to the asm code, so that we + * don't have to experience issues with register constraints. + */ + +#define my_syscall0(num) \ +({ \ + register long _num asm("v0") = (num); \ + register long _arg4 asm("a3"); \ + \ + asm volatile ( \ + "addiu $sp, $sp, -32\n" \ + "syscall\n" \ + "addiu $sp, $sp, 32\n" \ + : "=r"(_num), "=r"(_arg4) \ + : "r"(_num) \ + : "memory", "cc", "at", "v1", "hi", "lo", \ + \ + ); \ + _arg4 ? -_num : _num; \ +}) + +#define my_syscall1(num, arg1) \ +({ \ + register long _num asm("v0") = (num); \ + register long _arg1 asm("a0") = (long)(arg1); \ + register long _arg4 asm("a3"); \ + \ + asm volatile ( \ + "addiu $sp, $sp, -32\n" \ + "syscall\n" \ + "addiu $sp, $sp, 32\n" \ + : "=r"(_num), "=r"(_arg4) \ + : "0"(_num), \ + "r"(_arg1) \ + : "memory", "cc", "at", "v1", "hi", "lo", \ + \ + ); \ + _arg4 ? -_num : _num; \ +}) + +#define my_syscall2(num, arg1, arg2) \ +({ \ + register long _num asm("v0") = (num); \ + register long _arg1 asm("a0") = (long)(arg1); \ + register long _arg2 asm("a1") = (long)(arg2); \ + register long _arg4 asm("a3"); \ + \ + asm volatile ( \ + "addiu $sp, $sp, -32\n" \ + "syscall\n" \ + "addiu $sp, $sp, 32\n" \ + : "=r"(_num), "=r"(_arg4) \ + : "0"(_num), \ + "r"(_arg1), "r"(_arg2) \ + : "memory", "cc", "at", "v1", "hi", "lo", \ + \ + ); \ + _arg4 ? -_num : _num; \ +}) + +#define my_syscall3(num, arg1, arg2, arg3) \ +({ \ + register long _num asm("v0") = (num); \ + register long _arg1 asm("a0") = (long)(arg1); \ + register long _arg2 asm("a1") = (long)(arg2); \ + register long _arg3 asm("a2") = (long)(arg3); \ + register long _arg4 asm("a3"); \ + \ + asm volatile ( \ + "addiu $sp, $sp, -32\n" \ + "syscall\n" \ + "addiu $sp, $sp, 32\n" \ + : "=r"(_num), "=r"(_arg4) \ + : "0"(_num), \ + "r"(_arg1), "r"(_arg2), "r"(_arg3) \ + : "memory", "cc", "at", "v1", "hi", "lo", \ + \ + ); \ + _arg4 ? -_num : _num; \ +}) + +#define my_syscall4(num, arg1, arg2, arg3, arg4) \ +({ \ + register long _num asm("v0") = (num); \ + register long _arg1 asm("a0") = (long)(arg1); \ + register long _arg2 asm("a1") = (long)(arg2); \ + register long _arg3 asm("a2") = (long)(arg3); \ + register long _arg4 asm("a3") = (long)(arg4); \ + \ + asm volatile ( \ + "addiu $sp, $sp, -32\n" \ + "syscall\n" \ + "addiu $sp, $sp, 32\n" \ + : "=r" (_num), "=r"(_arg4) \ + : "0"(_num), \ + "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4) \ + : "memory", "cc", "at", "v1", "hi", "lo", \ + \ + ); \ + _arg4 ? -_num : _num; \ +}) + +#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +({ \ + register long _num asm("v0") = (num); \ + register long _arg1 asm("a0") = (long)(arg1); \ + register long _arg2 asm("a1") = (long)(arg2); \ + register long _arg3 asm("a2") = (long)(arg3); \ + register long _arg4 asm("a3") = (long)(arg4); \ + register long _arg5 = (long)(arg5); \ + \ + asm volatile ( \ + "addiu $sp, $sp, -32\n" \ + "sw %7, 16($sp)\n" \ + "syscall\n " \ + "addiu $sp, $sp, 32\n" \ + : "=r" (_num), "=r"(_arg4) \ + : "0"(_num), \ + "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5) \ + : "memory", "cc", "at", "v1", "hi", "lo", \ + \ + ); \ + _arg4 ? -_num : _num; \ +}) + +/* startup code, note that it's called __start on MIPS */ +asm(".section .text\n" + ".set nomips16\n" + ".global __start\n" + ".set noreorder\n" + ".option pic0\n" + ".ent __start\n" + "__start:\n" + "lw $a0,($sp)\n" // argc was in the stack + "addiu $a1, $sp, 4\n" // argv = sp + 4 + "sll $a2, $a0, 2\n" // a2 = argc * 4 + "add $a2, $a2, $a1\n" // envp = argv + 4*argc ... + "addiu $a2, $a2, 4\n" // ... + 4 + "li $t0, -8\n" + "and $sp, $sp, $t0\n" // sp must be 8-byte aligned + "addiu $sp,$sp,-16\n" // the callee expects to save a0..a3 there! + "jal main\n" // main() returns the status code, we'll exit with it. + "nop\n" // delayed slot + "and $a0, $v0, 0xff\n" // limit exit code to 8 bits + "li $v0, 4001\n" // NR_exit == 4001 + "syscall\n" + ".end __start\n" + ""); + +/* fcntl / open */ +#define O_RDONLY 0 +#define O_WRONLY 1 +#define O_RDWR 2 +#define O_APPEND 0x0008 +#define O_NONBLOCK 0x0080 +#define O_CREAT 0x0100 +#define O_TRUNC 0x0200 +#define O_EXCL 0x0400 +#define O_NOCTTY 0x0800 +#define O_DIRECTORY 0x10000 + +/* The struct returned by the stat() syscall. 88 bytes are returned by the + * syscall. + */ +struct sys_stat_struct { + unsigned int st_dev; + long st_pad1[3]; + unsigned long st_ino; + unsigned int st_mode; + unsigned int st_nlink; + unsigned int st_uid; + unsigned int st_gid; + unsigned int st_rdev; + long st_pad2[2]; + long st_size; + long st_pad3; + long st_atime; + long st_atime_nsec; + long st_mtime; + long st_mtime_nsec; + long st_ctime; + long st_ctime_nsec; + long st_blksize; + long st_blocks; + long st_pad4[14]; +}; + +#endif + + +/* Below are the C functions used to declare the raw syscalls. They try to be + * architecture-agnostic, and return either a success or -errno. Declaring them + * static will lead to them being inlined in most cases, but it's still possible + * to reference them by a pointer if needed. + */ +static __attribute__((unused)) +void *sys_brk(void *addr) +{ + return (void *)my_syscall1(__NR_brk, addr); +} + +static __attribute__((noreturn,unused)) +void sys_exit(int status) +{ + my_syscall1(__NR_exit, status & 255); + while(1); // shut the "noreturn" warnings. +} + +static __attribute__((unused)) +int sys_chdir(const char *path) +{ + return my_syscall1(__NR_chdir, path); +} + +static __attribute__((unused)) +int sys_chmod(const char *path, mode_t mode) +{ +#ifdef __NR_fchmodat + return my_syscall4(__NR_fchmodat, AT_FDCWD, path, mode, 0); +#else + return my_syscall2(__NR_chmod, path, mode); +#endif +} + +static __attribute__((unused)) +int sys_chown(const char *path, uid_t owner, gid_t group) +{ +#ifdef __NR_fchownat + return my_syscall5(__NR_fchownat, AT_FDCWD, path, owner, group, 0); +#else + return my_syscall3(__NR_chown, path, owner, group); +#endif +} + +static __attribute__((unused)) +int sys_chroot(const char *path) +{ + return my_syscall1(__NR_chroot, path); +} + +static __attribute__((unused)) +int sys_close(int fd) +{ + return my_syscall1(__NR_close, fd); +} + +static __attribute__((unused)) +int sys_dup(int fd) +{ + return my_syscall1(__NR_dup, fd); +} + +static __attribute__((unused)) +int sys_dup2(int old, int new) +{ + return my_syscall2(__NR_dup2, old, new); +} + +static __attribute__((unused)) +int sys_execve(const char *filename, char *const argv[], char *const envp[]) +{ + return my_syscall3(__NR_execve, filename, argv, envp); +} + +static __attribute__((unused)) +pid_t sys_fork(void) +{ + return my_syscall0(__NR_fork); +} + +static __attribute__((unused)) +int sys_fsync(int fd) +{ + return my_syscall1(__NR_fsync, fd); +} + +static __attribute__((unused)) +int sys_getdents64(int fd, struct linux_dirent64 *dirp, int count) +{ + return my_syscall3(__NR_getdents64, fd, dirp, count); +} + +static __attribute__((unused)) +pid_t sys_getpgrp(void) +{ + return my_syscall0(__NR_getpgrp); +} + +static __attribute__((unused)) +pid_t sys_getpid(void) +{ + return my_syscall0(__NR_getpid); +} + +static __attribute__((unused)) +int sys_gettimeofday(struct timeval *tv, struct timezone *tz) +{ + return my_syscall2(__NR_gettimeofday, tv, tz); +} + +static __attribute__((unused)) +int sys_ioctl(int fd, unsigned long req, void *value) +{ + return my_syscall3(__NR_ioctl, fd, req, value); +} + +static __attribute__((unused)) +int sys_kill(pid_t pid, int signal) +{ + return my_syscall2(__NR_kill, pid, signal); +} + +static __attribute__((unused)) +int sys_link(const char *old, const char *new) +{ +#ifdef __NR_linkat + return my_syscall5(__NR_linkat, AT_FDCWD, old, AT_FDCWD, new, 0); +#else + return my_syscall2(__NR_link, old, new); +#endif +} + +static __attribute__((unused)) +off_t sys_lseek(int fd, off_t offset, int whence) +{ + return my_syscall3(__NR_lseek, fd, offset, whence); +} + +static __attribute__((unused)) +int sys_mkdir(const char *path, mode_t mode) +{ +#ifdef __NR_mkdirat + return my_syscall3(__NR_mkdirat, AT_FDCWD, path, mode); +#else + return my_syscall2(__NR_mkdir, path, mode); +#endif +} + +static __attribute__((unused)) +long sys_mknod(const char *path, mode_t mode, dev_t dev) +{ +#ifdef __NR_mknodat + return my_syscall4(__NR_mknodat, AT_FDCWD, path, mode, dev); +#else + return my_syscall3(__NR_mknod, path, mode, dev); +#endif +} + +static __attribute__((unused)) +int sys_mount(const char *src, const char *tgt, const char *fst, + unsigned long flags, const void *data) +{ + return my_syscall5(__NR_mount, src, tgt, fst, flags, data); +} + +static __attribute__((unused)) +int sys_open(const char *path, int flags, mode_t mode) +{ +#ifdef __NR_openat + return my_syscall4(__NR_openat, AT_FDCWD, path, flags, mode); +#else + return my_syscall3(__NR_open, path, flags, mode); +#endif +} + +static __attribute__((unused)) +int sys_pivot_root(const char *new, const char *old) +{ + return my_syscall2(__NR_pivot_root, new, old); +} + +static __attribute__((unused)) +int sys_poll(struct pollfd *fds, int nfds, int timeout) +{ + return my_syscall3(__NR_poll, fds, nfds, timeout); +} + +static __attribute__((unused)) +ssize_t sys_read(int fd, void *buf, size_t count) +{ + return my_syscall3(__NR_read, fd, buf, count); +} + +static __attribute__((unused)) +ssize_t sys_reboot(int magic1, int magic2, int cmd, void *arg) +{ + return my_syscall4(__NR_reboot, magic1, magic2, cmd, arg); +} + +static __attribute__((unused)) +int sys_sched_yield(void) +{ + return my_syscall0(__NR_sched_yield); +} + +static __attribute__((unused)) +int sys_select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeval *timeout) +{ +#if defined(__ARCH_WANT_SYS_OLD_SELECT) && !defined(__NR__newselect) + struct sel_arg_struct { + unsigned long n; + fd_set *r, *w, *e; + struct timeval *t; + } arg = { .n = nfds, .r = rfds, .w = wfds, .e = efds, .t = timeout }; + return my_syscall1(__NR_select, &arg); +#elif defined(__ARCH_WANT_SYS_PSELECT6) && defined(__NR_pselect6) + struct timespec t; + + if (timeout) { + t.tv_sec = timeout->tv_sec; + t.tv_nsec = timeout->tv_usec * 1000; + } + return my_syscall6(__NR_pselect6, nfds, rfds, wfds, efds, timeout ? &t : NULL, NULL); +#else +#ifndef __NR__newselect +#define __NR__newselect __NR_select +#endif + return my_syscall5(__NR__newselect, nfds, rfds, wfds, efds, timeout); +#endif +} + +static __attribute__((unused)) +int sys_setpgid(pid_t pid, pid_t pgid) +{ + return my_syscall2(__NR_setpgid, pid, pgid); +} + +static __attribute__((unused)) +pid_t sys_setsid(void) +{ + return my_syscall0(__NR_setsid); +} + +static __attribute__((unused)) +int sys_stat(const char *path, struct stat *buf) +{ + struct sys_stat_struct stat; + long ret; + +#ifdef __NR_newfstatat + /* only solution for arm64 */ + ret = my_syscall4(__NR_newfstatat, AT_FDCWD, path, &stat, 0); +#else + ret = my_syscall2(__NR_stat, path, &stat); +#endif + buf->st_dev = stat.st_dev; + buf->st_ino = stat.st_ino; + buf->st_mode = stat.st_mode; + buf->st_nlink = stat.st_nlink; + buf->st_uid = stat.st_uid; + buf->st_gid = stat.st_gid; + buf->st_rdev = stat.st_rdev; + buf->st_size = stat.st_size; + buf->st_blksize = stat.st_blksize; + buf->st_blocks = stat.st_blocks; + buf->st_atime = stat.st_atime; + buf->st_mtime = stat.st_mtime; + buf->st_ctime = stat.st_ctime; + return ret; +} + + +static __attribute__((unused)) +int sys_symlink(const char *old, const char *new) +{ +#ifdef __NR_symlinkat + return my_syscall3(__NR_symlinkat, old, AT_FDCWD, new); +#else + return my_syscall2(__NR_symlink, old, new); +#endif +} + +static __attribute__((unused)) +mode_t sys_umask(mode_t mode) +{ + return my_syscall1(__NR_umask, mode); +} + +static __attribute__((unused)) +int sys_umount2(const char *path, int flags) +{ + return my_syscall2(__NR_umount2, path, flags); +} + +static __attribute__((unused)) +int sys_unlink(const char *path) +{ +#ifdef __NR_unlinkat + return my_syscall3(__NR_unlinkat, AT_FDCWD, path, 0); +#else + return my_syscall1(__NR_unlink, path); +#endif +} + +static __attribute__((unused)) +pid_t sys_wait4(pid_t pid, int *status, int options, struct rusage *rusage) +{ + return my_syscall4(__NR_wait4, pid, status, options, rusage); +} + +static __attribute__((unused)) +pid_t sys_waitpid(pid_t pid, int *status, int options) +{ + return sys_wait4(pid, status, options, 0); +} + +static __attribute__((unused)) +pid_t sys_wait(int *status) +{ + return sys_waitpid(-1, status, 0); +} + +static __attribute__((unused)) +ssize_t sys_write(int fd, const void *buf, size_t count) +{ + return my_syscall3(__NR_write, fd, buf, count); +} + + +/* Below are the libc-compatible syscalls which return x or -1 and set errno. + * They rely on the functions above. Similarly they're marked static so that it + * is possible to assign pointers to them if needed. + */ + +static __attribute__((unused)) +int brk(void *addr) +{ + void *ret = sys_brk(addr); + + if (!ret) { + SET_ERRNO(ENOMEM); + return -1; + } + return 0; +} + +static __attribute__((noreturn,unused)) +void exit(int status) +{ + sys_exit(status); +} + +static __attribute__((unused)) +int chdir(const char *path) +{ + int ret = sys_chdir(path); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int chmod(const char *path, mode_t mode) +{ + int ret = sys_chmod(path, mode); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int chown(const char *path, uid_t owner, gid_t group) +{ + int ret = sys_chown(path, owner, group); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int chroot(const char *path) +{ + int ret = sys_chroot(path); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int close(int fd) +{ + int ret = sys_close(fd); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int dup2(int old, int new) +{ + int ret = sys_dup2(old, new); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int execve(const char *filename, char *const argv[], char *const envp[]) +{ + int ret = sys_execve(filename, argv, envp); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +pid_t fork(void) +{ + pid_t ret = sys_fork(); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int fsync(int fd) +{ + int ret = sys_fsync(fd); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int getdents64(int fd, struct linux_dirent64 *dirp, int count) +{ + int ret = sys_getdents64(fd, dirp, count); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +pid_t getpgrp(void) +{ + pid_t ret = sys_getpgrp(); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +pid_t getpid(void) +{ + pid_t ret = sys_getpid(); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int gettimeofday(struct timeval *tv, struct timezone *tz) +{ + int ret = sys_gettimeofday(tv, tz); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int ioctl(int fd, unsigned long req, void *value) +{ + int ret = sys_ioctl(fd, req, value); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int kill(pid_t pid, int signal) +{ + int ret = sys_kill(pid, signal); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int link(const char *old, const char *new) +{ + int ret = sys_link(old, new); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +off_t lseek(int fd, off_t offset, int whence) +{ + off_t ret = sys_lseek(fd, offset, whence); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int mkdir(const char *path, mode_t mode) +{ + int ret = sys_mkdir(path, mode); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int mknod(const char *path, mode_t mode, dev_t dev) +{ + int ret = sys_mknod(path, mode, dev); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int mount(const char *src, const char *tgt, + const char *fst, unsigned long flags, + const void *data) +{ + int ret = sys_mount(src, tgt, fst, flags, data); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int open(const char *path, int flags, mode_t mode) +{ + int ret = sys_open(path, flags, mode); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int pivot_root(const char *new, const char *old) +{ + int ret = sys_pivot_root(new, old); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int poll(struct pollfd *fds, int nfds, int timeout) +{ + int ret = sys_poll(fds, nfds, timeout); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +ssize_t read(int fd, void *buf, size_t count) +{ + ssize_t ret = sys_read(fd, buf, count); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int reboot(int cmd) +{ + int ret = sys_reboot(LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, cmd, 0); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +void *sbrk(intptr_t inc) +{ + void *ret; + + /* first call to find current end */ + if ((ret = sys_brk(0)) && (sys_brk(ret + inc) == ret + inc)) + return ret + inc; + + SET_ERRNO(ENOMEM); + return (void *)-1; +} + +static __attribute__((unused)) +int sched_yield(void) +{ + int ret = sys_sched_yield(); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeval *timeout) +{ + int ret = sys_select(nfds, rfds, wfds, efds, timeout); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int setpgid(pid_t pid, pid_t pgid) +{ + int ret = sys_setpgid(pid, pgid); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +pid_t setsid(void) +{ + pid_t ret = sys_setsid(); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +unsigned int sleep(unsigned int seconds) +{ + struct timeval my_timeval = { seconds, 0 }; + + if (sys_select(0, 0, 0, 0, &my_timeval) < 0) + return my_timeval.tv_sec + !!my_timeval.tv_usec; + else + return 0; +} + +static __attribute__((unused)) +int stat(const char *path, struct stat *buf) +{ + int ret = sys_stat(path, buf); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int symlink(const char *old, const char *new) +{ + int ret = sys_symlink(old, new); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int tcsetpgrp(int fd, pid_t pid) +{ + return ioctl(fd, TIOCSPGRP, &pid); +} + +static __attribute__((unused)) +mode_t umask(mode_t mode) +{ + return sys_umask(mode); +} + +static __attribute__((unused)) +int umount2(const char *path, int flags) +{ + int ret = sys_umount2(path, flags); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int unlink(const char *path) +{ + int ret = sys_unlink(path); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +pid_t wait4(pid_t pid, int *status, int options, struct rusage *rusage) +{ + pid_t ret = sys_wait4(pid, status, options, rusage); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +pid_t waitpid(pid_t pid, int *status, int options) +{ + pid_t ret = sys_waitpid(pid, status, options); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +pid_t wait(int *status) +{ + pid_t ret = sys_wait(status); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +ssize_t write(int fd, const void *buf, size_t count) +{ + ssize_t ret = sys_write(fd, buf, count); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +/* some size-optimized reimplementations of a few common str* and mem* + * functions. They're marked static, except memcpy() and raise() which are used + * by libgcc on ARM, so they are marked weak instead in order not to cause an + * error when building a program made of multiple files (not recommended). + */ + +static __attribute__((unused)) +void *memmove(void *dst, const void *src, size_t len) +{ + ssize_t pos = (dst <= src) ? -1 : (long)len; + void *ret = dst; + + while (len--) { + pos += (dst <= src) ? 1 : -1; + ((char *)dst)[pos] = ((char *)src)[pos]; + } + return ret; +} + +static __attribute__((unused)) +void *memset(void *dst, int b, size_t len) +{ + char *p = dst; + + while (len--) + *(p++) = b; + return dst; +} + +static __attribute__((unused)) +int memcmp(const void *s1, const void *s2, size_t n) +{ + size_t ofs = 0; + char c1 = 0; + + while (ofs < n && !(c1 = ((char *)s1)[ofs] - ((char *)s2)[ofs])) { + ofs++; + } + return c1; +} + +static __attribute__((unused)) +char *strcpy(char *dst, const char *src) +{ + char *ret = dst; + + while ((*dst++ = *src++)); + return ret; +} + +static __attribute__((unused)) +char *strchr(const char *s, int c) +{ + while (*s) { + if (*s == (char)c) + return (char *)s; + s++; + } + return NULL; +} + +static __attribute__((unused)) +char *strrchr(const char *s, int c) +{ + const char *ret = NULL; + + while (*s) { + if (*s == (char)c) + ret = s; + s++; + } + return (char *)ret; +} + +static __attribute__((unused)) +size_t nolibc_strlen(const char *str) +{ + size_t len; + + for (len = 0; str[len]; len++); + return len; +} + +#define strlen(str) ({ \ + __builtin_constant_p((str)) ? \ + __builtin_strlen((str)) : \ + nolibc_strlen((str)); \ +}) + +static __attribute__((unused)) +int isdigit(int c) +{ + return (unsigned int)(c - '0') <= 9; +} + +static __attribute__((unused)) +long atol(const char *s) +{ + unsigned long ret = 0; + unsigned long d; + int neg = 0; + + if (*s == '-') { + neg = 1; + s++; + } + + while (1) { + d = (*s++) - '0'; + if (d > 9) + break; + ret *= 10; + ret += d; + } + + return neg ? -ret : ret; +} + +static __attribute__((unused)) +int atoi(const char *s) +{ + return atol(s); +} + +static __attribute__((unused)) +const char *ltoa(long in) +{ + /* large enough for -9223372036854775808 */ + static char buffer[21]; + char *pos = buffer + sizeof(buffer) - 1; + int neg = in < 0; + unsigned long n = neg ? -in : in; + + *pos-- = '\0'; + do { + *pos-- = '0' + n % 10; + n /= 10; + if (pos < buffer) + return pos + 1; + } while (n); + + if (neg) + *pos-- = '-'; + return pos + 1; +} + +__attribute__((weak,unused)) +void *memcpy(void *dst, const void *src, size_t len) +{ + return memmove(dst, src, len); +} + +/* needed by libgcc for divide by zero */ +__attribute__((weak,unused)) +int raise(int signal) +{ + return kill(getpid(), signal); +} + +/* Here come a few helper functions */ + +static __attribute__((unused)) +void FD_ZERO(fd_set *set) +{ + memset(set, 0, sizeof(*set)); +} + +static __attribute__((unused)) +void FD_SET(int fd, fd_set *set) +{ + if (fd < 0 || fd >= FD_SETSIZE) + return; + set->fd32[fd / 32] |= 1 << (fd & 31); +} + +/* WARNING, it only deals with the 4096 first majors and 256 first minors */ +static __attribute__((unused)) +dev_t makedev(unsigned int major, unsigned int minor) +{ + return ((major & 0xfff) << 8) | (minor & 0xff); +} diff --git a/tools/testing/selftests/rcutorture/configs/rcu/CFLIST b/tools/testing/selftests/rcutorture/configs/rcu/CFLIST index 6a0b9f69faad..c3c1fb5a9e1f 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/CFLIST +++ b/tools/testing/selftests/rcutorture/configs/rcu/CFLIST @@ -3,9 +3,7 @@ TREE02 TREE03 TREE04 TREE05 -TREE06 TREE07 -TREE08 TREE09 SRCU-N SRCU-P diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot index 84a7d51b7481..ce48c7b82673 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot @@ -1 +1,2 @@ rcutorture.torture_type=srcud +rcupdate.rcu_self_test=1 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u.boot index 84a7d51b7481..ce48c7b82673 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u.boot @@ -1 +1,2 @@ rcutorture.torture_type=srcud +rcupdate.rcu_self_test=1 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TINY02.boot b/tools/testing/selftests/rcutorture/configs/rcu/TINY02.boot index 6c1a292a65fb..b39f1553a478 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TINY02.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/TINY02.boot @@ -1,3 +1 @@ rcupdate.rcu_self_test=1 -rcupdate.rcu_self_test_bh=1 -rcutorture.torture_type=rcu_bh diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot index 9f3a4d28e508..ea47da95374b 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot @@ -1,4 +1,4 @@ -rcutorture.torture_type=rcu_bh maxcpus=8 nr_cpus=43 +maxcpus=8 nr_cpus=43 rcutree.gp_preinit_delay=3 rcutree.gp_init_delay=3 rcutree.gp_cleanup_delay=3 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot index e6071bb96c7d..5adc6756792a 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot @@ -1 +1 @@ -rcutorture.torture_type=rcu_bh rcutree.rcu_fanout_leaf=4 nohz_full=1-7 +rcutree.rcu_fanout_leaf=4 nohz_full=1-7 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot index c7fd050dfcd9..c419cac233ee 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot @@ -1,5 +1,4 @@ -rcutorture.torture_type=sched -rcupdate.rcu_self_test_sched=1 rcutree.gp_preinit_delay=3 rcutree.gp_init_delay=3 rcutree.gp_cleanup_delay=3 +rcupdate.rcu_self_test=1 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot index ad18b52a2cad..055f4aa79077 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot @@ -1,6 +1,4 @@ rcupdate.rcu_self_test=1 -rcupdate.rcu_self_test_bh=1 -rcupdate.rcu_self_test_sched=1 rcutree.rcu_fanout_exact=1 rcutree.gp_preinit_delay=3 rcutree.gp_init_delay=3 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot index 1bd8efc4141e..22478fd3a865 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot @@ -1,5 +1,3 @@ -rcutorture.torture_type=sched rcupdate.rcu_self_test=1 -rcupdate.rcu_self_test_sched=1 rcutree.rcu_fanout_exact=1 rcu_nocbs=0-7 diff --git a/tools/testing/selftests/rcutorture/doc/initrd.txt b/tools/testing/selftests/rcutorture/doc/initrd.txt index 833f826d6ec2..933b4fd12327 100644 --- a/tools/testing/selftests/rcutorture/doc/initrd.txt +++ b/tools/testing/selftests/rcutorture/doc/initrd.txt @@ -1,9 +1,12 @@ -This document describes one way to create the initrd directory hierarchy -in order to allow an initrd to be built into your kernel. The trick -here is to steal the initrd file used on your Linux laptop, Ubuntu in -this case. There are probably much better ways of doing this. +The rcutorture scripting tools automatically create the needed initrd +directory using dracut. Failing that, this tool will create an initrd +containing a single statically linked binary named "init" that loops +over a very long sleep() call. In both cases, this creation is done +by tools/testing/selftests/rcutorture/bin/mkinitrd.sh. -That said, here are the commands: +However, if you are attempting to run rcutorture on a system that does +not have dracut installed, and if you don't like the notion of static +linking, you might wish to press an existing initrd into service: ------------------------------------------------------------------------ cd tools/testing/selftests/rcutorture @@ -11,22 +14,7 @@ zcat /initrd.img > /tmp/initrd.img.zcat mkdir initrd cd initrd cpio -id < /tmp/initrd.img.zcat ------------------------------------------------------------------------- - -Another way to create an initramfs image is using "dracut"[1], which is -available on many distros, however the initramfs dracut generates is a cpio -archive with another cpio archive in it, so an extra step is needed to create -the initrd directory hierarchy. - -Here are the commands to create a initrd directory for rcutorture using -dracut: - ------------------------------------------------------------------------- -dracut --no-hostonly --no-hostonly-cmdline --module "base bash shutdown" /tmp/initramfs.img -cd tools/testing/selftests/rcutorture -mkdir initrd -cd initrd -/usr/lib/dracut/skipcpio /tmp/initramfs.img | zcat | cpio -id < /tmp/initramfs.img +# Manually verify that initrd contains needed binaries and libraries. ------------------------------------------------------------------------ Interestingly enough, if you are running rcutorture, you don't really @@ -39,75 +27,12 @@ with 0755 mode. ------------------------------------------------------------------------ #!/bin/sh -[ -d /dev ] || mkdir -m 0755 /dev -[ -d /root ] || mkdir -m 0700 /root -[ -d /sys ] || mkdir /sys -[ -d /proc ] || mkdir /proc -[ -d /tmp ] || mkdir /tmp -mkdir -p /var/lock -mount -t sysfs -o nodev,noexec,nosuid sysfs /sys -mount -t proc -o nodev,noexec,nosuid proc /proc -# Some things don't work properly without /etc/mtab. -ln -sf /proc/mounts /etc/mtab - -# Note that this only becomes /dev on the real filesystem if udev's scripts -# are used; which they will be, but it's worth pointing out -if ! mount -t devtmpfs -o mode=0755 udev /dev; then - echo "W: devtmpfs not available, falling back to tmpfs for /dev" - mount -t tmpfs -o mode=0755 udev /dev - [ -e /dev/console ] || mknod --mode=600 /dev/console c 5 1 - [ -e /dev/kmsg ] || mknod --mode=644 /dev/kmsg c 1 11 - [ -e /dev/null ] || mknod --mode=666 /dev/null c 1 3 -fi - -mkdir /dev/pts -mount -t devpts -o noexec,nosuid,gid=5,mode=0620 devpts /dev/pts || true -mount -t tmpfs -o "nosuid,size=20%,mode=0755" tmpfs /run -mkdir /run/initramfs -# compatibility symlink for the pre-oneiric locations -ln -s /run/initramfs /dev/.initramfs - -# Export relevant variables -export ROOT= -export ROOTDELAY= -export ROOTFLAGS= -export ROOTFSTYPE= -export IP= -export BOOT= -export BOOTIF= -export UBIMTD= -export break= -export init=/sbin/init -export quiet=n -export readonly=y -export rootmnt=/root -export debug= -export panic= -export blacklist= -export resume= -export resume_offset= -export recovery= - -for i in /sys/devices/system/cpu/cpu*/online -do - case $i in - '/sys/devices/system/cpu/cpu0/online') - ;; - '/sys/devices/system/cpu/cpu*/online') - ;; - *) - echo 1 > $i - ;; - esac -done - while : do sleep 10 done ------------------------------------------------------------------------ -References: -[1]: https://dracut.wiki.kernel.org/index.php/Main_Page -[2]: http://blog.elastocloud.org/2015/06/rapid-linux-kernel-devtest-with-qemu.html -[3]: https://www.centos.org/forums/viewtopic.php?t=51621 +This approach also allows most of the binaries and libraries in the +initrd filesystem to be dispensed with, which can save significant +space in rcutorture's "res" directory. diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/types.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/types.h index 891ad13e95b2..d27285f8ee82 100644 --- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/types.h +++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/types.h @@ -131,8 +131,8 @@ struct hlist_node { * weird ABI and we need to ask it explicitly. * * The alignment is required to guarantee that bits 0 and 1 of @next will be - * clear under normal conditions -- as long as we use call_rcu(), - * call_rcu_bh(), call_rcu_sched(), or call_srcu() to queue callback. + * clear under normal conditions -- as long as we use call_rcu() or + * call_srcu() to queue callback. * * This guarantee is important for few reasons: * - future call_rcu_lazy() will make use of lower bits in the pointer; diff --git a/tools/testing/selftests/rtc/rtctest.c b/tools/testing/selftests/rtc/rtctest.c index e20b017e7073..b2065536d407 100644 --- a/tools/testing/selftests/rtc/rtctest.c +++ b/tools/testing/selftests/rtc/rtctest.c @@ -145,15 +145,12 @@ TEST_F(rtc, alarm_alm_set) { rc = select(self->fd + 1, &readfds, NULL, NULL, &tv); ASSERT_NE(-1, rc); - EXPECT_NE(0, rc); + ASSERT_NE(0, rc); /* Disable alarm interrupts */ rc = ioctl(self->fd, RTC_AIE_OFF, 0); ASSERT_NE(-1, rc); - if (rc == 0) - return; - rc = read(self->fd, &data, sizeof(unsigned long)); ASSERT_NE(-1, rc); TH_LOG("data: %lx", data); @@ -202,7 +199,109 @@ TEST_F(rtc, alarm_wkalm_set) { rc = select(self->fd + 1, &readfds, NULL, NULL, &tv); ASSERT_NE(-1, rc); - EXPECT_NE(0, rc); + ASSERT_NE(0, rc); + + rc = read(self->fd, &data, sizeof(unsigned long)); + ASSERT_NE(-1, rc); + + rc = ioctl(self->fd, RTC_RD_TIME, &tm); + ASSERT_NE(-1, rc); + + new = timegm((struct tm *)&tm); + ASSERT_EQ(new, secs); +} + +TEST_F(rtc, alarm_alm_set_minute) { + struct timeval tv = { .tv_sec = 62 }; + unsigned long data; + struct rtc_time tm; + fd_set readfds; + time_t secs, new; + int rc; + + rc = ioctl(self->fd, RTC_RD_TIME, &tm); + ASSERT_NE(-1, rc); + + secs = timegm((struct tm *)&tm) + 60 - tm.tm_sec; + gmtime_r(&secs, (struct tm *)&tm); + + rc = ioctl(self->fd, RTC_ALM_SET, &tm); + if (rc == -1) { + ASSERT_EQ(EINVAL, errno); + TH_LOG("skip alarms are not supported."); + return; + } + + rc = ioctl(self->fd, RTC_ALM_READ, &tm); + ASSERT_NE(-1, rc); + + TH_LOG("Alarm time now set to %02d:%02d:%02d.", + tm.tm_hour, tm.tm_min, tm.tm_sec); + + /* Enable alarm interrupts */ + rc = ioctl(self->fd, RTC_AIE_ON, 0); + ASSERT_NE(-1, rc); + + FD_ZERO(&readfds); + FD_SET(self->fd, &readfds); + + rc = select(self->fd + 1, &readfds, NULL, NULL, &tv); + ASSERT_NE(-1, rc); + ASSERT_NE(0, rc); + + /* Disable alarm interrupts */ + rc = ioctl(self->fd, RTC_AIE_OFF, 0); + ASSERT_NE(-1, rc); + + rc = read(self->fd, &data, sizeof(unsigned long)); + ASSERT_NE(-1, rc); + TH_LOG("data: %lx", data); + + rc = ioctl(self->fd, RTC_RD_TIME, &tm); + ASSERT_NE(-1, rc); + + new = timegm((struct tm *)&tm); + ASSERT_EQ(new, secs); +} + +TEST_F(rtc, alarm_wkalm_set_minute) { + struct timeval tv = { .tv_sec = 62 }; + struct rtc_wkalrm alarm = { 0 }; + struct rtc_time tm; + unsigned long data; + fd_set readfds; + time_t secs, new; + int rc; + + rc = ioctl(self->fd, RTC_RD_TIME, &alarm.time); + ASSERT_NE(-1, rc); + + secs = timegm((struct tm *)&alarm.time) + 60 - alarm.time.tm_sec; + gmtime_r(&secs, (struct tm *)&alarm.time); + + alarm.enabled = 1; + + rc = ioctl(self->fd, RTC_WKALM_SET, &alarm); + if (rc == -1) { + ASSERT_EQ(EINVAL, errno); + TH_LOG("skip alarms are not supported."); + return; + } + + rc = ioctl(self->fd, RTC_WKALM_RD, &alarm); + ASSERT_NE(-1, rc); + + TH_LOG("Alarm time now set to %02d/%02d/%02d %02d:%02d:%02d.", + alarm.time.tm_mday, alarm.time.tm_mon + 1, + alarm.time.tm_year + 1900, alarm.time.tm_hour, + alarm.time.tm_min, alarm.time.tm_sec); + + FD_ZERO(&readfds); + FD_SET(self->fd, &readfds); + + rc = select(self->fd + 1, &readfds, NULL, NULL, &tv); + ASSERT_NE(-1, rc); + ASSERT_NE(0, rc); rc = read(self->fd, &data, sizeof(unsigned long)); ASSERT_NE(-1, rc); diff --git a/tools/testing/selftests/seccomp/Makefile b/tools/testing/selftests/seccomp/Makefile index fce7f4ce0692..1760b3e39730 100644 --- a/tools/testing/selftests/seccomp/Makefile +++ b/tools/testing/selftests/seccomp/Makefile @@ -9,7 +9,7 @@ BINARIES := seccomp_bpf seccomp_benchmark CFLAGS += -Wl,-no-as-needed -Wall seccomp_bpf: seccomp_bpf.c ../kselftest_harness.h - $(CC) $(CFLAGS) $(LDFLAGS) -lpthread $< -o $@ + $(CC) $(CFLAGS) $(LDFLAGS) $< -lpthread -o $@ TEST_PROGS += $(BINARIES) EXTRA_CLEAN := $(BINARIES) diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index e1473234968d..7e632b465ab4 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -5,6 +5,7 @@ * Test code for seccomp bpf. */ +#define _GNU_SOURCE #include <sys/types.h> /* @@ -40,10 +41,12 @@ #include <sys/fcntl.h> #include <sys/mman.h> #include <sys/times.h> +#include <sys/socket.h> +#include <sys/ioctl.h> -#define _GNU_SOURCE #include <unistd.h> #include <sys/syscall.h> +#include <poll.h> #include "../kselftest_harness.h" @@ -133,6 +136,10 @@ struct seccomp_data { #define SECCOMP_GET_ACTION_AVAIL 2 #endif +#ifndef SECCOMP_GET_NOTIF_SIZES +#define SECCOMP_GET_NOTIF_SIZES 3 +#endif + #ifndef SECCOMP_FILTER_FLAG_TSYNC #define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0) #endif @@ -154,6 +161,44 @@ struct seccomp_metadata { }; #endif +#ifndef SECCOMP_FILTER_FLAG_NEW_LISTENER +#define SECCOMP_FILTER_FLAG_NEW_LISTENER (1UL << 3) + +#define SECCOMP_RET_USER_NOTIF 0x7fc00000U + +#define SECCOMP_IOC_MAGIC '!' +#define SECCOMP_IO(nr) _IO(SECCOMP_IOC_MAGIC, nr) +#define SECCOMP_IOR(nr, type) _IOR(SECCOMP_IOC_MAGIC, nr, type) +#define SECCOMP_IOW(nr, type) _IOW(SECCOMP_IOC_MAGIC, nr, type) +#define SECCOMP_IOWR(nr, type) _IOWR(SECCOMP_IOC_MAGIC, nr, type) + +/* Flags for seccomp notification fd ioctl. */ +#define SECCOMP_IOCTL_NOTIF_RECV SECCOMP_IOWR(0, struct seccomp_notif) +#define SECCOMP_IOCTL_NOTIF_SEND SECCOMP_IOWR(1, \ + struct seccomp_notif_resp) +#define SECCOMP_IOCTL_NOTIF_ID_VALID SECCOMP_IOR(2, __u64) + +struct seccomp_notif { + __u64 id; + __u32 pid; + __u32 flags; + struct seccomp_data data; +}; + +struct seccomp_notif_resp { + __u64 id; + __s64 val; + __s32 error; + __u32 flags; +}; + +struct seccomp_notif_sizes { + __u16 seccomp_notif; + __u16 seccomp_notif_resp; + __u16 seccomp_data; +}; +#endif + #ifndef seccomp int seccomp(unsigned int op, unsigned int flags, void *args) { @@ -1563,7 +1608,16 @@ TEST_F(TRACE_poke, getpid_runs_normally) #ifdef SYSCALL_NUM_RET_SHARE_REG # define EXPECT_SYSCALL_RETURN(val, action) EXPECT_EQ(-1, action) #else -# define EXPECT_SYSCALL_RETURN(val, action) EXPECT_EQ(val, action) +# define EXPECT_SYSCALL_RETURN(val, action) \ + do { \ + errno = 0; \ + if (val < 0) { \ + EXPECT_EQ(-1, action); \ + EXPECT_EQ(-(val), errno); \ + } else { \ + EXPECT_EQ(val, action); \ + } \ + } while (0) #endif /* Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for @@ -1602,7 +1656,7 @@ int get_syscall(struct __test_metadata *_metadata, pid_t tracee) /* Architecture-specific syscall changing routine. */ void change_syscall(struct __test_metadata *_metadata, - pid_t tracee, int syscall) + pid_t tracee, int syscall, int result) { int ret; ARCH_REGS regs; @@ -1661,7 +1715,7 @@ void change_syscall(struct __test_metadata *_metadata, #ifdef SYSCALL_NUM_RET_SHARE_REG TH_LOG("Can't modify syscall return on this architecture"); #else - regs.SYSCALL_RET = EPERM; + regs.SYSCALL_RET = result; #endif #ifdef HAVE_GETREGS @@ -1689,14 +1743,19 @@ void tracer_syscall(struct __test_metadata *_metadata, pid_t tracee, case 0x1002: /* change getpid to getppid. */ EXPECT_EQ(__NR_getpid, get_syscall(_metadata, tracee)); - change_syscall(_metadata, tracee, __NR_getppid); + change_syscall(_metadata, tracee, __NR_getppid, 0); break; case 0x1003: - /* skip gettid. */ + /* skip gettid with valid return code. */ EXPECT_EQ(__NR_gettid, get_syscall(_metadata, tracee)); - change_syscall(_metadata, tracee, -1); + change_syscall(_metadata, tracee, -1, 45000); break; case 0x1004: + /* skip openat with error. */ + EXPECT_EQ(__NR_openat, get_syscall(_metadata, tracee)); + change_syscall(_metadata, tracee, -1, -ESRCH); + break; + case 0x1005: /* do nothing (allow getppid) */ EXPECT_EQ(__NR_getppid, get_syscall(_metadata, tracee)); break; @@ -1729,9 +1788,11 @@ void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee, nr = get_syscall(_metadata, tracee); if (nr == __NR_getpid) - change_syscall(_metadata, tracee, __NR_getppid); + change_syscall(_metadata, tracee, __NR_getppid, 0); + if (nr == __NR_gettid) + change_syscall(_metadata, tracee, -1, 45000); if (nr == __NR_openat) - change_syscall(_metadata, tracee, -1); + change_syscall(_metadata, tracee, -1, -ESRCH); } FIXTURE_DATA(TRACE_syscall) { @@ -1748,8 +1809,10 @@ FIXTURE_SETUP(TRACE_syscall) BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1002), BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_gettid, 0, 1), BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1003), - BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_openat, 0, 1), BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1004), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1005), BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), }; @@ -1797,15 +1860,26 @@ TEST_F(TRACE_syscall, ptrace_syscall_redirected) EXPECT_NE(self->mypid, syscall(__NR_getpid)); } -TEST_F(TRACE_syscall, ptrace_syscall_dropped) +TEST_F(TRACE_syscall, ptrace_syscall_errno) +{ + /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */ + teardown_trace_fixture(_metadata, self->tracer); + self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL, + true); + + /* Tracer should skip the open syscall, resulting in ESRCH. */ + EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat)); +} + +TEST_F(TRACE_syscall, ptrace_syscall_faked) { /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */ teardown_trace_fixture(_metadata, self->tracer); self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL, true); - /* Tracer should skip the open syscall, resulting in EPERM. */ - EXPECT_SYSCALL_RETURN(EPERM, syscall(__NR_openat)); + /* Tracer should skip the gettid syscall, resulting fake pid. */ + EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid)); } TEST_F(TRACE_syscall, syscall_allowed) @@ -1838,7 +1912,21 @@ TEST_F(TRACE_syscall, syscall_redirected) EXPECT_NE(self->mypid, syscall(__NR_getpid)); } -TEST_F(TRACE_syscall, syscall_dropped) +TEST_F(TRACE_syscall, syscall_errno) +{ + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); + ASSERT_EQ(0, ret); + + /* openat has been skipped and an errno return. */ + EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat)); +} + +TEST_F(TRACE_syscall, syscall_faked) { long ret; @@ -1849,8 +1937,7 @@ TEST_F(TRACE_syscall, syscall_dropped) ASSERT_EQ(0, ret); /* gettid has been skipped and an altered return value stored. */ - EXPECT_SYSCALL_RETURN(EPERM, syscall(__NR_gettid)); - EXPECT_NE(self->mytid, syscall(__NR_gettid)); + EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid)); } TEST_F(TRACE_syscall, skip_after_RET_TRACE) @@ -2077,7 +2164,8 @@ TEST(detect_seccomp_filter_flags) { unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC, SECCOMP_FILTER_FLAG_LOG, - SECCOMP_FILTER_FLAG_SPEC_ALLOW }; + SECCOMP_FILTER_FLAG_SPEC_ALLOW, + SECCOMP_FILTER_FLAG_NEW_LISTENER }; unsigned int flag, all_flags; int i; long ret; @@ -2731,9 +2819,14 @@ TEST(syscall_restart) ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); ASSERT_EQ(true, WIFSTOPPED(status)); ASSERT_EQ(SIGSTOP, WSTOPSIG(status)); - /* Verify signal delivery came from parent now. */ ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info)); - EXPECT_EQ(getpid(), info.si_pid); + /* + * There is no siginfo on SIGSTOP any more, so we can't verify + * signal delivery came from parent now (getpid() == info.si_pid). + * https://lkml.kernel.org/r/CAGXu5jJaZAOzP1qFz66tYrtbuywqb+UN2SOA1VLHpCCOiYvYeg@mail.gmail.com + * At least verify the SIGSTOP via PTRACE_GETSIGINFO. + */ + EXPECT_EQ(SIGSTOP, info.si_signo); /* Restart nanosleep with SIGCONT, which triggers restart_syscall. */ ASSERT_EQ(0, kill(child_pid, SIGCONT)); @@ -2933,6 +3026,403 @@ skip: ASSERT_EQ(0, kill(pid, SIGKILL)); } +static int user_trap_syscall(int nr, unsigned int flags) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD+BPF_W+BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, nr, 0, 1), + BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_USER_NOTIF), + BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW), + }; + + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + + return seccomp(SECCOMP_SET_MODE_FILTER, flags, &prog); +} + +#define USER_NOTIF_MAGIC 116983961184613L +TEST(user_notification_basic) +{ + pid_t pid; + long ret; + int status, listener; + struct seccomp_notif req = {}; + struct seccomp_notif_resp resp = {}; + struct pollfd pollfd; + + struct sock_filter filter[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + + pid = fork(); + ASSERT_GE(pid, 0); + + /* Check that we get -ENOSYS with no listener attached */ + if (pid == 0) { + if (user_trap_syscall(__NR_getpid, 0) < 0) + exit(1); + ret = syscall(__NR_getpid); + exit(ret >= 0 || errno != ENOSYS); + } + + EXPECT_EQ(waitpid(pid, &status, 0), pid); + EXPECT_EQ(true, WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); + + /* Add some no-op filters so for grins. */ + EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0); + EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0); + EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0); + EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0); + + /* Check that the basic notification machinery works */ + listener = user_trap_syscall(__NR_getpid, + SECCOMP_FILTER_FLAG_NEW_LISTENER); + ASSERT_GE(listener, 0); + + /* Installing a second listener in the chain should EBUSY */ + EXPECT_EQ(user_trap_syscall(__NR_getpid, + SECCOMP_FILTER_FLAG_NEW_LISTENER), + -1); + EXPECT_EQ(errno, EBUSY); + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + ret = syscall(__NR_getpid); + exit(ret != USER_NOTIF_MAGIC); + } + + pollfd.fd = listener; + pollfd.events = POLLIN | POLLOUT; + + EXPECT_GT(poll(&pollfd, 1, -1), 0); + EXPECT_EQ(pollfd.revents, POLLIN); + + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); + + pollfd.fd = listener; + pollfd.events = POLLIN | POLLOUT; + + EXPECT_GT(poll(&pollfd, 1, -1), 0); + EXPECT_EQ(pollfd.revents, POLLOUT); + + EXPECT_EQ(req.data.nr, __NR_getpid); + + resp.id = req.id; + resp.error = 0; + resp.val = USER_NOTIF_MAGIC; + + /* check that we make sure flags == 0 */ + resp.flags = 1; + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1); + EXPECT_EQ(errno, EINVAL); + + resp.flags = 0; + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); + + EXPECT_EQ(waitpid(pid, &status, 0), pid); + EXPECT_EQ(true, WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); +} + +TEST(user_notification_kill_in_middle) +{ + pid_t pid; + long ret; + int listener; + struct seccomp_notif req = {}; + struct seccomp_notif_resp resp = {}; + + listener = user_trap_syscall(__NR_getpid, + SECCOMP_FILTER_FLAG_NEW_LISTENER); + ASSERT_GE(listener, 0); + + /* + * Check that nothing bad happens when we kill the task in the middle + * of a syscall. + */ + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + ret = syscall(__NR_getpid); + exit(ret != USER_NOTIF_MAGIC); + } + + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ID_VALID, &req.id), 0); + + EXPECT_EQ(kill(pid, SIGKILL), 0); + EXPECT_EQ(waitpid(pid, NULL, 0), pid); + + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ID_VALID, &req.id), -1); + + resp.id = req.id; + ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp); + EXPECT_EQ(ret, -1); + EXPECT_EQ(errno, ENOENT); +} + +static int handled = -1; + +static void signal_handler(int signal) +{ + if (write(handled, "c", 1) != 1) + perror("write from signal"); +} + +TEST(user_notification_signal) +{ + pid_t pid; + long ret; + int status, listener, sk_pair[2]; + struct seccomp_notif req = {}; + struct seccomp_notif_resp resp = {}; + char c; + + ASSERT_EQ(socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair), 0); + + listener = user_trap_syscall(__NR_gettid, + SECCOMP_FILTER_FLAG_NEW_LISTENER); + ASSERT_GE(listener, 0); + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + close(sk_pair[0]); + handled = sk_pair[1]; + if (signal(SIGUSR1, signal_handler) == SIG_ERR) { + perror("signal"); + exit(1); + } + /* + * ERESTARTSYS behavior is a bit hard to test, because we need + * to rely on a signal that has not yet been handled. Let's at + * least check that the error code gets propagated through, and + * hope that it doesn't break when there is actually a signal :) + */ + ret = syscall(__NR_gettid); + exit(!(ret == -1 && errno == 512)); + } + + close(sk_pair[1]); + + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); + + EXPECT_EQ(kill(pid, SIGUSR1), 0); + + /* + * Make sure the signal really is delivered, which means we're not + * stuck in the user notification code any more and the notification + * should be dead. + */ + EXPECT_EQ(read(sk_pair[0], &c, 1), 1); + + resp.id = req.id; + resp.error = -EPERM; + resp.val = 0; + + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1); + EXPECT_EQ(errno, ENOENT); + + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); + + resp.id = req.id; + resp.error = -512; /* -ERESTARTSYS */ + resp.val = 0; + + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); + + EXPECT_EQ(waitpid(pid, &status, 0), pid); + EXPECT_EQ(true, WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); +} + +TEST(user_notification_closed_listener) +{ + pid_t pid; + long ret; + int status, listener; + + listener = user_trap_syscall(__NR_getpid, + SECCOMP_FILTER_FLAG_NEW_LISTENER); + ASSERT_GE(listener, 0); + + /* + * Check that we get an ENOSYS when the listener is closed. + */ + pid = fork(); + ASSERT_GE(pid, 0); + if (pid == 0) { + close(listener); + ret = syscall(__NR_getpid); + exit(ret != -1 && errno != ENOSYS); + } + + close(listener); + + EXPECT_EQ(waitpid(pid, &status, 0), pid); + EXPECT_EQ(true, WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); +} + +/* + * Check that a pid in a child namespace still shows up as valid in ours. + */ +TEST(user_notification_child_pid_ns) +{ + pid_t pid; + int status, listener; + struct seccomp_notif req = {}; + struct seccomp_notif_resp resp = {}; + + ASSERT_EQ(unshare(CLONE_NEWPID), 0); + + listener = user_trap_syscall(__NR_getpid, SECCOMP_FILTER_FLAG_NEW_LISTENER); + ASSERT_GE(listener, 0); + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) + exit(syscall(__NR_getpid) != USER_NOTIF_MAGIC); + + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); + EXPECT_EQ(req.pid, pid); + + resp.id = req.id; + resp.error = 0; + resp.val = USER_NOTIF_MAGIC; + + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); + + EXPECT_EQ(waitpid(pid, &status, 0), pid); + EXPECT_EQ(true, WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); + close(listener); +} + +/* + * Check that a pid in a sibling (i.e. unrelated) namespace shows up as 0, i.e. + * invalid. + */ +TEST(user_notification_sibling_pid_ns) +{ + pid_t pid, pid2; + int status, listener; + struct seccomp_notif req = {}; + struct seccomp_notif_resp resp = {}; + + listener = user_trap_syscall(__NR_getpid, SECCOMP_FILTER_FLAG_NEW_LISTENER); + ASSERT_GE(listener, 0); + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + ASSERT_EQ(unshare(CLONE_NEWPID), 0); + + pid2 = fork(); + ASSERT_GE(pid2, 0); + + if (pid2 == 0) + exit(syscall(__NR_getpid) != USER_NOTIF_MAGIC); + + EXPECT_EQ(waitpid(pid2, &status, 0), pid2); + EXPECT_EQ(true, WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); + exit(WEXITSTATUS(status)); + } + + /* Create the sibling ns, and sibling in it. */ + EXPECT_EQ(unshare(CLONE_NEWPID), 0); + EXPECT_EQ(errno, 0); + + pid2 = fork(); + EXPECT_GE(pid2, 0); + + if (pid2 == 0) { + ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); + /* + * The pid should be 0, i.e. the task is in some namespace that + * we can't "see". + */ + ASSERT_EQ(req.pid, 0); + + resp.id = req.id; + resp.error = 0; + resp.val = USER_NOTIF_MAGIC; + + ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); + exit(0); + } + + close(listener); + + EXPECT_EQ(waitpid(pid, &status, 0), pid); + EXPECT_EQ(true, WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); + + EXPECT_EQ(waitpid(pid2, &status, 0), pid2); + EXPECT_EQ(true, WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); +} + +TEST(user_notification_fault_recv) +{ + pid_t pid; + int status, listener; + struct seccomp_notif req = {}; + struct seccomp_notif_resp resp = {}; + + listener = user_trap_syscall(__NR_getpid, SECCOMP_FILTER_FLAG_NEW_LISTENER); + ASSERT_GE(listener, 0); + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) + exit(syscall(__NR_getpid) != USER_NOTIF_MAGIC); + + /* Do a bad recv() */ + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, NULL), -1); + EXPECT_EQ(errno, EFAULT); + + /* We should still be able to receive this notification, though. */ + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); + EXPECT_EQ(req.pid, pid); + + resp.id = req.id; + resp.error = 0; + resp.val = USER_NOTIF_MAGIC; + + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); + + EXPECT_EQ(waitpid(pid, &status, 0), pid); + EXPECT_EQ(true, WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); +} + +TEST(seccomp_get_notif_sizes) +{ + struct seccomp_notif_sizes sizes; + + ASSERT_EQ(seccomp(SECCOMP_GET_NOTIF_SIZES, 0, &sizes), 0); + EXPECT_EQ(sizes.seccomp_notif, sizeof(struct seccomp_notif)); + EXPECT_EQ(sizes.seccomp_notif_resp, sizeof(struct seccomp_notif_resp)); +} + /* * TODO: * - add microbenchmarks diff --git a/tools/testing/selftests/tc-testing/.gitignore b/tools/testing/selftests/tc-testing/.gitignore index 7a60b85e148f..c5cc160948b3 100644 --- a/tools/testing/selftests/tc-testing/.gitignore +++ b/tools/testing/selftests/tc-testing/.gitignore @@ -1,2 +1,5 @@ __pycache__/ *.pyc +plugins/ +*.xml +*.tap diff --git a/tools/testing/selftests/tc-testing/README b/tools/testing/selftests/tc-testing/README index 49a6f8c3fdae..f9281e8aa313 100644 --- a/tools/testing/selftests/tc-testing/README +++ b/tools/testing/selftests/tc-testing/README @@ -232,6 +232,8 @@ directory: and the other is a test whether the command leaked memory or not. (This one is a preliminary version, it may not work quite right yet, but the overall template is there and it should only need tweaks.) + - buildebpfPlugin.py: + builds all programs in $EBPFDIR. ACKNOWLEDGEMENTS diff --git a/tools/testing/selftests/tc-testing/TdcPlugin.py b/tools/testing/selftests/tc-testing/TdcPlugin.py index 3ee9a6dacb52..1d9e279331eb 100644 --- a/tools/testing/selftests/tc-testing/TdcPlugin.py +++ b/tools/testing/selftests/tc-testing/TdcPlugin.py @@ -18,11 +18,12 @@ class TdcPlugin: if self.args.verbose > 1: print(' -- {}.post_suite'.format(self.sub_class)) - def pre_case(self, test_ordinal, testid): + def pre_case(self, test_ordinal, testid, test_name): '''run commands before test_runner does one test''' if self.args.verbose > 1: print(' -- {}.pre_case'.format(self.sub_class)) self.args.testid = testid + self.args.test_name = test_name self.args.test_ordinal = test_ordinal def post_case(self): diff --git a/tools/testing/selftests/tc-testing/TdcResults.py b/tools/testing/selftests/tc-testing/TdcResults.py new file mode 100644 index 000000000000..1e4d95fdf8d0 --- /dev/null +++ b/tools/testing/selftests/tc-testing/TdcResults.py @@ -0,0 +1,132 @@ +#!/usr/bin/env python3 + +from enum import Enum + +class ResultState(Enum): + noresult = -1 + skip = 0 + success = 1 + fail = 2 + +class TestResult: + def __init__(self, test_id="", test_name=""): + self.test_id = test_id + self.test_name = test_name + self.result = ResultState.noresult + self.failmsg = "" + self.errormsg = "" + self.steps = [] + + def set_result(self, result): + if (isinstance(result, ResultState)): + self.result = result + return True + else: + raise TypeError('Unknown result type, must be type ResultState') + + def get_result(self): + return self.result + + def set_errormsg(self, errormsg): + self.errormsg = errormsg + return True + + def append_errormsg(self, errormsg): + self.errormsg = '{}\n{}'.format(self.errormsg, errormsg) + + def get_errormsg(self): + return self.errormsg + + def set_failmsg(self, failmsg): + self.failmsg = failmsg + return True + + def append_failmsg(self, failmsg): + self.failmsg = '{}\n{}'.format(self.failmsg, failmsg) + + def get_failmsg(self): + return self.failmsg + + def add_steps(self, newstep): + if type(newstep) == list: + self.steps.extend(newstep) + elif type(newstep) == str: + self.steps.append(step) + else: + raise TypeError('TdcResults.add_steps() requires a list or str') + + def get_executed_steps(self): + return self.steps + +class TestSuiteReport(): + _testsuite = [] + + def add_resultdata(self, result_data): + if isinstance(result_data, TestResult): + self._testsuite.append(result_data) + return True + + def count_tests(self): + return len(self._testsuite) + + def count_failures(self): + return sum(1 for t in self._testsuite if t.result == ResultState.fail) + + def count_skips(self): + return sum(1 for t in self._testsuite if t.result == ResultState.skip) + + def find_result(self, test_id): + return next((tr for tr in self._testsuite if tr.test_id == test_id), None) + + def update_result(self, result_data): + orig = self.find_result(result_data.test_id) + if orig != None: + idx = self._testsuite.index(orig) + self._testsuite[idx] = result_data + else: + self.add_resultdata(result_data) + + def format_tap(self): + ftap = "" + ftap += '1..{}\n'.format(self.count_tests()) + index = 1 + for t in self._testsuite: + if t.result == ResultState.fail: + ftap += 'not ' + ftap += 'ok {} {} - {}'.format(str(index), t.test_id, t.test_name) + if t.result == ResultState.skip or t.result == ResultState.noresult: + ftap += ' # skipped - {}\n'.format(t.errormsg) + elif t.result == ResultState.fail: + if len(t.steps) > 0: + ftap += '\tCommands executed in this test case:' + for step in t.steps: + ftap += '\n\t\t{}'.format(step) + ftap += '\n\t{}'.format(t.failmsg) + ftap += '\n' + index += 1 + return ftap + + def format_xunit(self): + from xml.sax.saxutils import escape + xunit = "<testsuites>\n" + xunit += '\t<testsuite tests=\"{}\" skips=\"{}\">\n'.format(self.count_tests(), self.count_skips()) + for t in self._testsuite: + xunit += '\t\t<testcase classname=\"{}\" '.format(escape(t.test_id)) + xunit += 'name=\"{}\">\n'.format(escape(t.test_name)) + if t.failmsg: + xunit += '\t\t\t<failure>\n' + if len(t.steps) > 0: + xunit += 'Commands executed in this test case:\n' + for step in t.steps: + xunit += '\t{}\n'.format(escape(step)) + xunit += 'FAILURE: {}\n'.format(escape(t.failmsg)) + xunit += '\t\t\t</failure>\n' + if t.errormsg: + xunit += '\t\t\t<error>\n{}\n'.format(escape(t.errormsg)) + xunit += '\t\t\t</error>\n' + if t.result == ResultState.skip: + xunit += '\t\t\t<skipped/>\n' + xunit += '\t\t</testcase>\n' + xunit += '\t</testsuite>\n' + xunit += '</testsuites>\n' + return xunit diff --git a/tools/testing/selftests/tc-testing/bpf/Makefile b/tools/testing/selftests/tc-testing/bpf/Makefile new file mode 100644 index 000000000000..be5a5e542804 --- /dev/null +++ b/tools/testing/selftests/tc-testing/bpf/Makefile @@ -0,0 +1,30 @@ +# SPDX-License-Identifier: GPL-2.0 + +APIDIR := ../../../../include/uapi +TEST_GEN_FILES = action.o + +top_srcdir = ../../../../.. +KSFT_KHDR_INSTALL := 1 +include ../../lib.mk + +CLANG ?= clang +LLC ?= llc +PROBE := $(shell $(LLC) -march=bpf -mcpu=probe -filetype=null /dev/null 2>&1) + +ifeq ($(PROBE),) + CPU ?= probe +else + CPU ?= generic +endif + +CLANG_SYS_INCLUDES := $(shell $(CLANG) -v -E - </dev/null 2>&1 \ + | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') + +CLANG_FLAGS = -I. -I$(APIDIR) \ + $(CLANG_SYS_INCLUDES) \ + -Wno-compare-distinct-pointer-types + +$(OUTPUT)/%.o: %.c + $(CLANG) $(CLANG_FLAGS) \ + -O2 -target bpf -emit-llvm -c $< -o - | \ + $(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@ diff --git a/tools/testing/selftests/tc-testing/bpf/action.c b/tools/testing/selftests/tc-testing/bpf/action.c new file mode 100644 index 000000000000..c32b99b80e19 --- /dev/null +++ b/tools/testing/selftests/tc-testing/bpf/action.c @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Copyright (c) 2018 Davide Caratti, Red Hat inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ + +#include <linux/bpf.h> +#include <linux/pkt_cls.h> + +__attribute__((section("action-ok"),used)) int action_ok(struct __sk_buff *s) +{ + return TC_ACT_OK; +} + +__attribute__((section("action-ko"),used)) int action_ko(struct __sk_buff *s) +{ + s->data = 0x0; + return TC_ACT_OK; +} + +char _license[] __attribute__((section("license"),used)) = "GPL"; diff --git a/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py new file mode 100644 index 000000000000..9f0ba10c44b4 --- /dev/null +++ b/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py @@ -0,0 +1,66 @@ +''' +build ebpf program +''' + +import os +import signal +from string import Template +import subprocess +import time +from TdcPlugin import TdcPlugin +from tdc_config import * + +class SubPlugin(TdcPlugin): + def __init__(self): + self.sub_class = 'buildebpf/SubPlugin' + self.tap = '' + super().__init__() + + def pre_suite(self, testcount, testidlist): + super().pre_suite(testcount, testidlist) + + if self.args.buildebpf: + self._ebpf_makeall() + + def post_suite(self, index): + super().post_suite(index) + + self._ebpf_makeclean() + + def add_args(self, parser): + super().add_args(parser) + + self.argparser_group = self.argparser.add_argument_group( + 'buildebpf', + 'options for buildebpfPlugin') + self.argparser_group.add_argument( + '-B', '--buildebpf', action='store_true', + help='build eBPF programs') + + return self.argparser + + def _ebpf_makeall(self): + if self.args.buildebpf: + self._make('all') + + def _ebpf_makeclean(self): + if self.args.buildebpf: + self._make('clean') + + def _make(self, target): + command = 'make -C {} {}'.format(self.args.NAMES['EBPFDIR'], target) + proc = subprocess.Popen(command, + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env=ENVIR) + (rawout, serr) = proc.communicate() + + if proc.returncode != 0 and len(serr) > 0: + foutput = serr.decode("utf-8") + else: + foutput = rawout.decode("utf-8") + + proc.stdout.close() + proc.stderr.close() + return proc, foutput diff --git a/tools/testing/selftests/tc-testing/plugin-lib/valgrindPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/valgrindPlugin.py index 477a7bd7d7fb..e00c798de0bb 100644 --- a/tools/testing/selftests/tc-testing/plugin-lib/valgrindPlugin.py +++ b/tools/testing/selftests/tc-testing/plugin-lib/valgrindPlugin.py @@ -11,6 +11,7 @@ from string import Template import subprocess import time from TdcPlugin import TdcPlugin +from TdcResults import * from tdc_config import * @@ -21,6 +22,7 @@ class SubPlugin(TdcPlugin): def __init__(self): self.sub_class = 'valgrind/SubPlugin' self.tap = '' + self._tsr = TestSuiteReport() super().__init__() def pre_suite(self, testcount, testidlist): @@ -34,10 +36,14 @@ class SubPlugin(TdcPlugin): def post_suite(self, index): '''run commands after test_runner goes into a test loop''' super().post_suite(index) - self._add_to_tap('\n|---\n') if self.args.verbose > 1: print('{}.post_suite'.format(self.sub_class)) - print('{}'.format(self.tap)) + #print('{}'.format(self.tap)) + for xx in range(index - 1, self.testcount): + res = TestResult('{}-mem'.format(self.testidlist[xx]), 'Test skipped') + res.set_result(ResultState.skip) + res.set_errormsg('Skipped because of prior setup/teardown failure') + self._add_results(res) if self.args.verbose < 4: subprocess.check_output('rm -f vgnd-*.log', shell=True) @@ -128,8 +134,17 @@ class SubPlugin(TdcPlugin): nle_num = int(nle_mo.group(1)) mem_results = '' + res = TestResult('{}-mem'.format(self.args.testid), + '{} memory leak check'.format(self.args.test_name)) if (def_num > 0) or (ind_num > 0) or (pos_num > 0) or (nle_num > 0): mem_results += 'not ' + res.set_result(ResultState.fail) + res.set_failmsg('Memory leak detected') + res.append_failmsg(content) + else: + res.set_result(ResultState.success) + + self._add_results(res) mem_results += 'ok {} - {}-mem # {}\n'.format( self.args.test_ordinal, self.args.testid, 'memory leak check') @@ -138,5 +153,8 @@ class SubPlugin(TdcPlugin): print('{}'.format(content)) self._add_to_tap(content) + def _add_results(self, res): + self._tsr.add_resultdata(res) + def _add_to_tap(self, more_tap_output): self.tap += more_tap_output diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json index 6f289a49e5ec..5970cee6d05f 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json @@ -55,7 +55,6 @@ "bpf" ], "setup": [ - "printf '#include <linux/bpf.h>\nchar l[] __attribute__((section(\"license\"),used))=\"GPL\"; __attribute__((section(\"action\"),used)) int m(struct __sk_buff *s) { return 2; }' | clang -O2 -x c -c - -target bpf -o _b.o", [ "$TC action flush action bpf", 0, @@ -63,14 +62,13 @@ 255 ] ], - "cmdUnderTest": "$TC action add action bpf object-file _b.o index 667", + "cmdUnderTest": "$TC action add action bpf object-file $EBPFDIR/action.o section action-ok index 667", "expExitCode": "0", "verifyCmd": "$TC action get action bpf index 667", - "matchPattern": "action order [0-9]*: bpf _b.o:\\[action\\] id [0-9]* tag 3b185187f1855c4c( jited)? default-action pipe.*index 667 ref", + "matchPattern": "action order [0-9]*: bpf action.o:\\[action-ok\\] id [0-9]* tag [0-9a-f]{16}( jited)? default-action pipe.*index 667 ref", "matchCount": "1", "teardown": [ - "$TC action flush action bpf", - "rm -f _b.o" + "$TC action flush action bpf" ] }, { @@ -81,7 +79,6 @@ "bpf" ], "setup": [ - "printf '#include <linux/bpf.h>\nchar l[] __attribute__((section(\"license\"),used))=\"GPL\"; __attribute__((section(\"action\"),used)) int m(struct __sk_buff *s) { s->data = 0x0; return 2; }' | clang -O2 -x c -c - -target bpf -o _c.o", [ "$TC action flush action bpf", 0, @@ -89,10 +86,10 @@ 255 ] ], - "cmdUnderTest": "$TC action add action bpf object-file _c.o index 667", + "cmdUnderTest": "$TC action add action bpf object-file $EBPFDIR/action.o section action-ko index 667", "expExitCode": "255", "verifyCmd": "$TC action get action bpf index 667", - "matchPattern": "action order [0-9]*: bpf _c.o:\\[action\\] id [0-9].*index 667 ref", + "matchPattern": "action order [0-9]*: bpf action.o:\\[action-ko\\] id [0-9].*index 667 ref", "matchCount": "0", "teardown": [ [ @@ -100,8 +97,7 @@ 0, 1, 255 - ], - "rm -f _c.o" + ] ] }, { diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json b/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json index 68c91023cdb9..89189a03ce3d 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json @@ -536,5 +536,29 @@ "matchPattern": "^[ \t]+index [0-9]+ ref", "matchCount": "0", "teardown": [] + }, + { + "id": "8e47", + "name": "Add gact action with random determ goto chain control action", + "category": [ + "actions", + "gact" + ], + "setup": [ + [ + "$TC actions flush action gact", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pass random determ goto chain 1 2 index 90", + "expExitCode": "255", + "verifyCmd": "$TC actions list action gact", + "matchPattern": "action order [0-9]*: gact action pass random type determ goto chain 1 val 2.*index 90 ref", + "matchCount": "0", + "teardown": [ + "$TC actions flush action gact" + ] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json b/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json index 637ea0219617..0da3545cabdb 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json @@ -17,7 +17,7 @@ "cmdUnderTest": "$TC actions add action ife encode allow mark pass index 2", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 2", - "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xED3E.*allow mark.*index 2", + "matchPattern": "action order [0-9]*: ife encode action pass.*type 0[xX]ED3E.*allow mark.*index 2", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -41,7 +41,7 @@ "cmdUnderTest": "$TC actions add action ife encode use mark 10 pipe index 2", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 2", - "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*use mark.*index 2", + "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0[xX]ED3E.*use mark.*index 2", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -65,7 +65,7 @@ "cmdUnderTest": "$TC actions add action ife encode allow mark continue index 2", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 2", - "matchPattern": "action order [0-9]*: ife encode action continue.*type 0xED3E.*allow mark.*index 2", + "matchPattern": "action order [0-9]*: ife encode action continue.*type 0[xX]ED3E.*allow mark.*index 2", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -89,7 +89,7 @@ "cmdUnderTest": "$TC actions add action ife encode use mark 789 drop index 2", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 2", - "matchPattern": "action order [0-9]*: ife encode action drop.*type 0xED3E.*use mark 789.*index 2", + "matchPattern": "action order [0-9]*: ife encode action drop.*type 0[xX]ED3E.*use mark 789.*index 2", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -113,7 +113,7 @@ "cmdUnderTest": "$TC actions add action ife encode use mark 656768 reclassify index 2", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 2", - "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xED3E.*use mark 656768.*index 2", + "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0[xX]ED3E.*use mark 656768.*index 2", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -137,7 +137,7 @@ "cmdUnderTest": "$TC actions add action ife encode use mark 65 jump 1 index 2", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 2", - "matchPattern": "action order [0-9]*: ife encode action jump 1.*type 0xED3E.*use mark 65.*index 2", + "matchPattern": "action order [0-9]*: ife encode action jump 1.*type 0[xX]ED3E.*use mark 65.*index 2", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -161,7 +161,7 @@ "cmdUnderTest": "$TC actions add action ife encode use mark 4294967295 reclassify index 90", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 90", - "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xED3E.*use mark 4294967295.*index 90", + "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0[xX]ED3E.*use mark 4294967295.*index 90", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -185,7 +185,7 @@ "cmdUnderTest": "$TC actions add action ife encode use mark 4294967295999 pipe index 90", "expExitCode": "255", "verifyCmd": "$TC actions get action ife index 90", - "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*use mark 4294967295999.*index 90", + "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0[xX]ED3E.*use mark 4294967295999.*index 90", "matchCount": "0", "teardown": [] }, @@ -207,7 +207,7 @@ "cmdUnderTest": "$TC actions add action ife encode allow prio pass index 9", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 9", - "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xED3E.*allow prio.*index 9", + "matchPattern": "action order [0-9]*: ife encode action pass.*type 0[xX]ED3E.*allow prio.*index 9", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -231,7 +231,7 @@ "cmdUnderTest": "$TC actions add action ife encode use prio 7 pipe index 9", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 9", - "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*use prio 7.*index 9", + "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0[xX]ED3E.*use prio 7.*index 9", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -255,7 +255,7 @@ "cmdUnderTest": "$TC actions add action ife encode use prio 3 continue index 9", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 9", - "matchPattern": "action order [0-9]*: ife encode action continue.*type 0xED3E.*use prio 3.*index 9", + "matchPattern": "action order [0-9]*: ife encode action continue.*type 0[xX]ED3E.*use prio 3.*index 9", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -279,7 +279,7 @@ "cmdUnderTest": "$TC actions add action ife encode allow prio drop index 9", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 9", - "matchPattern": "action order [0-9]*: ife encode action drop.*type 0xED3E.*allow prio.*index 9", + "matchPattern": "action order [0-9]*: ife encode action drop.*type 0[xX]ED3E.*allow prio.*index 9", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -303,7 +303,7 @@ "cmdUnderTest": "$TC actions add action ife encode use prio 998877 reclassify index 9", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 9", - "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xED3E.*use prio 998877.*index 9", + "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0[xX]ED3E.*use prio 998877.*index 9", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -327,7 +327,7 @@ "cmdUnderTest": "$TC actions add action ife encode use prio 998877 jump 10 index 9", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 9", - "matchPattern": "action order [0-9]*: ife encode action jump 10.*type 0xED3E.*use prio 998877.*index 9", + "matchPattern": "action order [0-9]*: ife encode action jump 10.*type 0[xX]ED3E.*use prio 998877.*index 9", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -351,7 +351,7 @@ "cmdUnderTest": "$TC actions add action ife encode use prio 4294967295 reclassify index 99", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 99", - "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xED3E.*use prio 4294967295.*index 99", + "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0[xX]ED3E.*use prio 4294967295.*index 99", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -375,7 +375,7 @@ "cmdUnderTest": "$TC actions add action ife encode use prio 4294967298 pipe index 99", "expExitCode": "255", "verifyCmd": "$TC actions get action ife index 99", - "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*use prio 4294967298.*index 99", + "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0[xX]ED3E.*use prio 4294967298.*index 99", "matchCount": "0", "teardown": [] }, @@ -397,7 +397,7 @@ "cmdUnderTest": "$TC actions add action ife encode allow tcindex pass index 1", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 1", - "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xED3E.*allow tcindex.*index 1", + "matchPattern": "action order [0-9]*: ife encode action pass.*type 0[xX]ED3E.*allow tcindex.*index 1", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -421,7 +421,7 @@ "cmdUnderTest": "$TC actions add action ife encode use tcindex 111 pipe index 1", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 1", - "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*use tcindex 111.*index 1", + "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0[xX]ED3E.*use tcindex 111.*index 1", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -445,7 +445,7 @@ "cmdUnderTest": "$TC actions add action ife encode use tcindex 1 continue index 1", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 1", - "matchPattern": "action order [0-9]*: ife encode action continue.*type 0xED3E.*use tcindex 1.*index 1", + "matchPattern": "action order [0-9]*: ife encode action continue.*type 0[xX]ED3E.*use tcindex 1.*index 1", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -469,7 +469,7 @@ "cmdUnderTest": "$TC actions add action ife encode use tcindex 1 continue index 1", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 1", - "matchPattern": "action order [0-9]*: ife encode action continue.*type 0xED3E.*use tcindex 1.*index 1", + "matchPattern": "action order [0-9]*: ife encode action continue.*type 0[xX]ED3E.*use tcindex 1.*index 1", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -493,7 +493,7 @@ "cmdUnderTest": "$TC actions add action ife encode allow tcindex drop index 77", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 77", - "matchPattern": "action order [0-9]*: ife encode action drop.*type 0xED3E.*allow tcindex.*index 77", + "matchPattern": "action order [0-9]*: ife encode action drop.*type 0[xX]ED3E.*allow tcindex.*index 77", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -517,7 +517,7 @@ "cmdUnderTest": "$TC actions add action ife encode allow tcindex reclassify index 77", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 77", - "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xED3E.*allow tcindex.*index 77", + "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0[xX]ED3E.*allow tcindex.*index 77", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -541,7 +541,7 @@ "cmdUnderTest": "$TC actions add action ife encode allow tcindex jump 999 index 77", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 77", - "matchPattern": "action order [0-9]*: ife encode action jump 999.*type 0xED3E.*allow tcindex.*index 77", + "matchPattern": "action order [0-9]*: ife encode action jump 999.*type 0[xX]ED3E.*allow tcindex.*index 77", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -565,7 +565,7 @@ "cmdUnderTest": "$TC actions add action ife encode use tcindex 65535 pass index 1", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 1", - "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xED3E.*use tcindex 65535.*index 1", + "matchPattern": "action order [0-9]*: ife encode action pass.*type 0[xX]ED3E.*use tcindex 65535.*index 1", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -589,7 +589,7 @@ "cmdUnderTest": "$TC actions add action ife encode use tcindex 65539 pipe index 1", "expExitCode": "255", "verifyCmd": "$TC actions get action ife index 1", - "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*use tcindex 65539.*index 1", + "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0[xX]ED3E.*use tcindex 65539.*index 1", "matchCount": "0", "teardown": [] }, @@ -611,7 +611,7 @@ "cmdUnderTest": "$TC actions add action ife encode allow mark src 00:11:22:33:44:55 pipe index 1", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 1", - "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*allow mark src 00:11:22:33:44:55.*index 1", + "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0[xX]ED3E.*allow mark src 00:11:22:33:44:55.*index 1", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -635,7 +635,7 @@ "cmdUnderTest": "$TC actions add action ife encode use prio 9876 dst 00:11:22:33:44:55 reclassify index 1", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 1", - "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xED3E.*use prio 9876 dst 00:11:22:33:44:55.*index 1", + "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0[xX]ED3E.*use prio 9876 dst 00:11:22:33:44:55.*index 1", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -659,7 +659,7 @@ "cmdUnderTest": "$TC actions add action ife encode allow tcindex src 00:aa:bb:cc:dd:ee dst 00:11:22:33:44:55 pass index 11", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 11", - "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xED3E.*allow tcindex dst 00:11:22:33:44:55 src 00:aa:bb:cc:dd:ee .*index 11", + "matchPattern": "action order [0-9]*: ife encode action pass.*type 0[xX]ED3E.*allow tcindex dst 00:11:22:33:44:55 src 00:aa:bb:cc:dd:ee .*index 11", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -683,7 +683,7 @@ "cmdUnderTest": "$TC actions add action ife encode use mark 7 type 0xfefe pass index 1", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 1", - "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xFEFE.*use mark 7.*index 1", + "matchPattern": "action order [0-9]*: ife encode action pass.*type 0[xX]FEFE.*use mark 7.*index 1", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -707,7 +707,7 @@ "cmdUnderTest": "$TC actions add action ife encode use prio 444 type 0xabba pipe index 21", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 21", - "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xABBA.*use prio 444.*index 21", + "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0[xX]ABBA.*use prio 444.*index 21", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -731,7 +731,7 @@ "cmdUnderTest": "$TC actions add action ife encode use tcindex 5000 type 0xabcd reclassify index 21", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 21", - "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0xABCD.*use tcindex 5000.*index 21", + "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0[xX]ABCD.*use tcindex 5000.*index 21", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -739,7 +739,7 @@ }, { "id": "fac3", - "name": "Create valid ife encode action with index at 32-bit maximnum", + "name": "Create valid ife encode action with index at 32-bit maximum", "category": [ "actions", "ife" @@ -755,7 +755,7 @@ "cmdUnderTest": "$TC actions add action ife encode allow mark pass index 4294967295", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 4294967295", - "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xED3E.*allow mark.*index 4294967295", + "matchPattern": "action order [0-9]*: ife encode action pass.*type 0[xX]ED3E.*allow mark.*index 4294967295", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -779,7 +779,7 @@ "cmdUnderTest": "$TC actions add action ife decode pass index 1", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 1", - "matchPattern": "action order [0-9]*: ife decode action pass.*type 0x0.*allow mark allow tcindex allow prio.*index 1", + "matchPattern": "action order [0-9]*: ife decode action pass.*type 0(x0)?.*allow mark allow tcindex allow prio.*index 1", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -803,7 +803,7 @@ "cmdUnderTest": "$TC actions add action ife decode pipe index 1", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 1", - "matchPattern": "action order [0-9]*: ife decode action pipe.*type 0x0.*allow mark allow tcindex allow prio.*index 1", + "matchPattern": "action order [0-9]*: ife decode action pipe.*type 0(x0)?.*allow mark allow tcindex allow prio.*index 1", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -827,7 +827,7 @@ "cmdUnderTest": "$TC actions add action ife decode continue index 1", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 1", - "matchPattern": "action order [0-9]*: ife decode action continue.*type 0x0.*allow mark allow tcindex allow prio.*index 1", + "matchPattern": "action order [0-9]*: ife decode action continue.*type 0(x0)?.*allow mark allow tcindex allow prio.*index 1", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -851,7 +851,7 @@ "cmdUnderTest": "$TC actions add action ife decode drop index 1", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 1", - "matchPattern": "action order [0-9]*: ife decode action drop.*type 0x0.*allow mark allow tcindex allow prio.*index 1", + "matchPattern": "action order [0-9]*: ife decode action drop.*type 0(x0)?.*allow mark allow tcindex allow prio.*index 1", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -875,7 +875,7 @@ "cmdUnderTest": "$TC actions add action ife decode reclassify index 1", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 1", - "matchPattern": "action order [0-9]*: ife decode action reclassify.*type 0x0.*allow mark allow tcindex allow prio.*index 1", + "matchPattern": "action order [0-9]*: ife decode action reclassify.*type 0(x0)?.*allow mark allow tcindex allow prio.*index 1", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -899,7 +899,7 @@ "cmdUnderTest": "$TC actions add action ife decode jump 10 index 1", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 1", - "matchPattern": "action order [0-9]*: ife decode action jump 10.*type 0x0.*allow mark allow tcindex allow prio.*index 1", + "matchPattern": "action order [0-9]*: ife decode action jump 10.*type 0(x0)?.*allow mark allow tcindex allow prio.*index 1", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -923,7 +923,7 @@ "cmdUnderTest": "$TC actions add action ife encode allow mark pass index 4294967295999", "expExitCode": "255", "verifyCmd": "$TC actions get action ife index 4294967295999", - "matchPattern": "action order [0-9]*: ife encode action pass.*type 0xED3E.*allow mark.*index 4294967295999", + "matchPattern": "action order [0-9]*: ife encode action pass.*type 0[xX]ED3E.*allow mark.*index 4294967295999", "matchCount": "0", "teardown": [] }, @@ -945,7 +945,7 @@ "cmdUnderTest": "$TC actions add action ife encode allow mark kuka index 4", "expExitCode": "255", "verifyCmd": "$TC actions get action ife index 4", - "matchPattern": "action order [0-9]*: ife encode action kuka.*type 0xED3E.*allow mark.*index 4", + "matchPattern": "action order [0-9]*: ife encode action kuka.*type 0[xX]ED3E.*allow mark.*index 4", "matchCount": "0", "teardown": [] }, @@ -967,7 +967,7 @@ "cmdUnderTest": "$TC actions add action ife encode allow prio pipe index 4 cookie aabbccddeeff112233445566778800a1", "expExitCode": "0", "verifyCmd": "$TC actions get action ife index 4", - "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*allow prio.*index 4.*cookie aabbccddeeff112233445566778800a1", + "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0[xX]ED3E.*allow prio.*index 4.*cookie aabbccddeeff112233445566778800a1", "matchCount": "1", "teardown": [ "$TC actions flush action ife" @@ -991,7 +991,7 @@ "cmdUnderTest": "$TC actions add action ife encode allow foo pipe index 4", "expExitCode": "255", "verifyCmd": "$TC actions get action ife index 4", - "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0xED3E.*allow foo.*index 4", + "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0[xX]ED3E.*allow foo.*index 4", "matchCount": "0", "teardown": [] }, @@ -1013,7 +1013,7 @@ "cmdUnderTest": "$TC actions add action ife encode allow prio type 70000 pipe index 4", "expExitCode": "255", "verifyCmd": "$TC actions get action ife index 4", - "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0x11170.*allow prio.*index 4", + "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0[xX]11170.*allow prio.*index 4", "matchCount": "0", "teardown": [] }, diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/police.json b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json index 30f9b54bd666..4086a50a670e 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/police.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json @@ -715,5 +715,29 @@ "teardown": [ "$TC actions flush action police" ] + }, + { + "id": "b48b", + "name": "Add police action with exceed goto chain control action", + "category": [ + "actions", + "police" + ], + "setup": [ + [ + "$TC actions flush action police", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action police rate 1mbit burst 1k conform-exceed pass / goto chain 42", + "expExitCode": "255", + "verifyCmd": "$TC actions ls action police", + "matchPattern": "action order [0-9]*: police 0x1 rate 1Mbit burst 1Kb mtu 2Kb action pass/goto chain 42", + "matchCount": "0", + "teardown": [ + "$TC actions flush action police" + ] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json b/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json index 10b2d894e436..e7e15a7336b6 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json @@ -82,35 +82,6 @@ ] }, { - "id": "ba4e", - "name": "Add tunnel_key set action with missing mandatory id parameter", - "category": [ - "actions", - "tunnel_key" - ], - "setup": [ - [ - "$TC actions flush action tunnel_key", - 0, - 1, - 255 - ] - ], - "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 20.20.20.2", - "expExitCode": "255", - "verifyCmd": "$TC actions list action tunnel_key", - "matchPattern": "action order [0-9]+: tunnel_key set.*src_ip 10.10.10.1.*dst_ip 20.20.20.2", - "matchCount": "0", - "teardown": [ - [ - "$TC actions flush action tunnel_key", - 0, - 1, - 255 - ] - ] - }, - { "id": "a5e0", "name": "Add tunnel_key set action with invalid src_ip parameter", "category": [ @@ -634,7 +605,7 @@ "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 10.10.10.2 id 7 index 4 cookie aa11bb22cc33dd44ee55ff66aa11b1b2", "expExitCode": "0", "verifyCmd": "$TC actions get action tunnel_key index 4", - "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 10.10.10.1.*dst_ip 10.10.10.2.*key_id 7.*dst_port 0.*csum pipe.*index 4 ref.*cookie aa11bb22cc33dd44ee55ff66aa11b1b2", + "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 10.10.10.1.*dst_ip 10.10.10.2.*key_id 7.*csum pipe.*index 4 ref.*cookie aa11bb22cc33dd44ee55ff66aa11b1b2", "matchCount": "1", "teardown": [ "$TC actions flush action tunnel_key" diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py index 87a04a8a5945..e6e4ce80a726 100755 --- a/tools/testing/selftests/tc-testing/tdc.py +++ b/tools/testing/selftests/tc-testing/tdc.py @@ -23,6 +23,7 @@ from tdc_config import * from tdc_helper import * import TdcPlugin +from TdcResults import * class PluginMgrTestFail(Exception): @@ -60,10 +61,10 @@ class PluginMgr: for pgn_inst in reversed(self.plugin_instances): pgn_inst.post_suite(index) - def call_pre_case(self, test_ordinal, testid): + def call_pre_case(self, test_ordinal, testid, test_name): for pgn_inst in self.plugin_instances: try: - pgn_inst.pre_case(test_ordinal, testid) + pgn_inst.pre_case(test_ordinal, testid, test_name) except Exception as ee: print('exception {} in call to pre_case for {} plugin'. format(ee, pgn_inst.__class__)) @@ -102,7 +103,6 @@ class PluginMgr: self.argparser = argparse.ArgumentParser( description='Linux TC unit tests') - def replace_keywords(cmd): """ For a given executable command, substitute any known @@ -131,12 +131,16 @@ def exec_cmd(args, pm, stage, command): stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=ENVIR) - (rawout, serr) = proc.communicate() - if proc.returncode != 0 and len(serr) > 0: - foutput = serr.decode("utf-8") - else: - foutput = rawout.decode("utf-8") + try: + (rawout, serr) = proc.communicate(timeout=NAMES['TIMEOUT']) + if proc.returncode != 0 and len(serr) > 0: + foutput = serr.decode("utf-8", errors="ignore") + else: + foutput = rawout.decode("utf-8", errors="ignore") + except subprocess.TimeoutExpired: + foutput = "Command \"{}\" timed out\n".format(command) + proc.returncode = 255 proc.stdout.close() proc.stderr.close() @@ -169,6 +173,8 @@ def prepare_env(args, pm, stage, prefix, cmdlist, output = None): file=sys.stderr) print("\n{} *** Error message: \"{}\"".format(prefix, foutput), file=sys.stderr) + print("returncode {}; expected {}".format(proc.returncode, + exit_codes)) print("\n{} *** Aborting test run.".format(prefix), file=sys.stderr) print("\n\n{} *** stdout ***".format(proc.stdout), file=sys.stderr) print("\n\n{} *** stderr ***".format(proc.stderr), file=sys.stderr) @@ -181,6 +187,7 @@ def run_one_test(pm, args, index, tidx): result = True tresult = "" tap = "" + res = TestResult(tidx['id'], tidx['name']) if args.verbose > 0: print("\t====================\n=====> ", end="") print("Test " + tidx["id"] + ": " + tidx["name"]) @@ -188,19 +195,26 @@ def run_one_test(pm, args, index, tidx): # populate NAMES with TESTID for this test NAMES['TESTID'] = tidx['id'] - pm.call_pre_case(index, tidx['id']) + pm.call_pre_case(index, tidx['id'], tidx['name']) prepare_env(args, pm, 'setup', "-----> prepare stage", tidx["setup"]) if (args.verbose > 0): print('-----> execute stage') pm.call_pre_execute() (p, procout) = exec_cmd(args, pm, 'execute', tidx["cmdUnderTest"]) - exit_code = p.returncode + if p: + exit_code = p.returncode + else: + exit_code = None + pm.call_post_execute() - if (exit_code != int(tidx["expExitCode"])): - result = False - print("exit:", exit_code, int(tidx["expExitCode"])) + if (exit_code is None or exit_code != int(tidx["expExitCode"])): + print("exit: {!r}".format(exit_code)) + print("exit: {}".format(int(tidx["expExitCode"]))) + #print("exit: {!r} {}".format(exit_code, int(tidx["expExitCode"]))) + res.set_result(ResultState.fail) + res.set_failmsg('Command exited with {}, expected {}\n{}'.format(exit_code, tidx["expExitCode"], procout)) print(procout) else: if args.verbose > 0: @@ -211,20 +225,15 @@ def run_one_test(pm, args, index, tidx): if procout: match_index = re.findall(match_pattern, procout) if len(match_index) != int(tidx["matchCount"]): - result = False + res.set_result(ResultState.fail) + res.set_failmsg('Could not match regex pattern. Verify command output:\n{}'.format(procout)) + else: + res.set_result(ResultState.success) elif int(tidx["matchCount"]) != 0: - result = False - - if not result: - tresult += 'not ' - tresult += 'ok {} - {} # {}\n'.format(str(index), tidx['id'], tidx['name']) - tap += tresult - - if result == False: - if procout: - tap += procout + res.set_result(ResultState.fail) + res.set_failmsg('No output generated by verify command.') else: - tap += 'No output!\n' + res.set_result(ResultState.success) prepare_env(args, pm, 'teardown', '-----> teardown stage', tidx['teardown'], procout) pm.call_post_case() @@ -233,7 +242,7 @@ def run_one_test(pm, args, index, tidx): # remove TESTID from NAMES del(NAMES['TESTID']) - return tap + return res def test_runner(pm, args, filtered_tests): """ @@ -253,25 +262,15 @@ def test_runner(pm, args, filtered_tests): emergency_exit = False emergency_exit_message = '' - if args.notap: - if args.verbose: - tap = 'notap requested: omitting test plan\n' - else: - tap = str(index) + ".." + str(tcount) + "\n" + tsr = TestSuiteReport() + try: pm.call_pre_suite(tcount, [tidx['id'] for tidx in testlist]) except Exception as ee: ex_type, ex, ex_tb = sys.exc_info() print('Exception {} {} (caught in pre_suite).'. format(ex_type, ex)) - # when the extra print statements are uncommented, - # the traceback does not appear between them - # (it appears way earlier in the tdc.py output) - # so don't bother ... - # print('--------------------(') - # print('traceback') traceback.print_tb(ex_tb) - # print('--------------------)') emergency_exit_message = 'EMERGENCY EXIT, call_pre_suite failed with exception {} {}\n'.format(ex_type, ex) emergency_exit = True stage = 'pre-SUITE' @@ -287,15 +286,26 @@ def test_runner(pm, args, filtered_tests): if args.verbose > 1: print('Not executing test {} {} because DEV2 not defined'. format(tidx['id'], tidx['name'])) + res = TestResult(tidx['id'], tidx['name']) + res.set_result(ResultState.skip) + res.set_errormsg('Not executed because DEV2 is not defined') + tsr.add_resultdata(res) continue try: badtest = tidx # in case it goes bad - tap += run_one_test(pm, args, index, tidx) + res = run_one_test(pm, args, index, tidx) + tsr.add_resultdata(res) except PluginMgrTestFail as pmtf: ex_type, ex, ex_tb = sys.exc_info() stage = pmtf.stage message = pmtf.message output = pmtf.output + res = TestResult(tidx['id'], tidx['name']) + res.set_result(ResultState.skip) + res.set_errormsg(pmtf.message) + res.set_failmsg(pmtf.output) + tsr.add_resultdata(res) + index += 1 print(message) print('Exception {} {} (caught in test_runner, running test {} {} {} stage {})'. format(ex_type, ex, index, tidx['id'], tidx['name'], stage)) @@ -314,16 +324,16 @@ def test_runner(pm, args, filtered_tests): # if we failed in setup or teardown, # fill in the remaining tests with ok-skipped count = index - if not args.notap: - tap += 'about to flush the tap output if tests need to be skipped\n' - if tcount + 1 != index: - for tidx in testlist[index - 1:]: - msg = 'skipped - previous {} failed'.format(stage) - tap += 'ok {} - {} # {} {} {}\n'.format( - count, tidx['id'], msg, index, badtest.get('id', '--Unknown--')) - count += 1 - tap += 'done flushing skipped test tap output\n' + if tcount + 1 != count: + for tidx in testlist[count - 1:]: + res = TestResult(tidx['id'], tidx['name']) + res.set_result(ResultState.skip) + msg = 'skipped - previous {} failed {} {}'.format(stage, + index, badtest.get('id', '--Unknown--')) + res.set_errormsg(msg) + tsr.add_resultdata(res) + count += 1 if args.pause: print('Want to pause\nPress enter to continue ...') @@ -332,7 +342,7 @@ def test_runner(pm, args, filtered_tests): pm.call_post_suite(index) - return tap + return tsr def has_blank_ids(idlist): """ @@ -373,6 +383,10 @@ def set_args(parser): Set the command line arguments for tdc. """ parser.add_argument( + '--outfile', type=str, + help='Path to the file in which results should be saved. ' + + 'Default target is the current directory.') + parser.add_argument( '-p', '--path', type=str, help='The full path to the tc executable to use') sg = parser.add_argument_group( @@ -408,8 +422,9 @@ def set_args(parser): '-v', '--verbose', action='count', default=0, help='Show the commands that are being run') parser.add_argument( - '-N', '--notap', action='store_true', - help='Suppress tap results for command under test') + '--format', default='tap', const='tap', nargs='?', + choices=['none', 'xunit', 'tap'], + help='Specify the format for test results. (Default: TAP)') parser.add_argument('-d', '--device', help='Execute the test case in flower category') parser.add_argument( @@ -430,6 +445,8 @@ def check_default_settings(args, remaining, pm): NAMES['TC'] = args.path if args.device != None: NAMES['DEV2'] = args.device + if 'TIMEOUT' not in NAMES: + NAMES['TIMEOUT'] = None if not os.path.isfile(NAMES['TC']): print("The specified tc path " + NAMES['TC'] + " does not exist.") exit(1) @@ -624,12 +641,30 @@ def set_operation_mode(pm, args): if len(alltests): catresults = test_runner(pm, args, alltests) + if args.format == 'none': + print('Test results output suppression requested\n') + else: + print('\nAll test results: \n') + if args.format == 'xunit': + suffix = 'xml' + res = catresults.format_xunit() + elif args.format == 'tap': + suffix = 'tap' + res = catresults.format_tap() + print(res) + print('\n\n') + if not args.outfile: + fname = 'test-results.{}'.format(suffix) + else: + fname = args.outfile + with open(fname, 'w') as fh: + fh.write(res) + fh.close() + if os.getenv('SUDO_UID') is not None: + os.chown(fname, uid=int(os.getenv('SUDO_UID')), + gid=int(os.getenv('SUDO_GID'))) else: - catresults = 'No tests found\n' - if args.notap: - print('Tap output suppression requested\n') - else: - print('All test results: \n\n{}'.format(catresults)) + print('No tests found\n') def main(): """ diff --git a/tools/testing/selftests/tc-testing/tdc_config.py b/tools/testing/selftests/tc-testing/tdc_config.py index a023d0d62b25..6d91e48c2625 100644 --- a/tools/testing/selftests/tc-testing/tdc_config.py +++ b/tools/testing/selftests/tc-testing/tdc_config.py @@ -15,8 +15,12 @@ NAMES = { 'DEV1': 'v0p1', 'DEV2': '', 'BATCH_FILE': './batch.txt', + # Length of time in seconds to wait before terminating a command + 'TIMEOUT': 12, # Name of the namespace to use - 'NS': 'tcut' + 'NS': 'tcut', + # Directory containing eBPF test programs + 'EBPFDIR': './bpf' } diff --git a/tools/testing/selftests/timers/Makefile b/tools/testing/selftests/timers/Makefile index c02683cfb6c9..7656c7ce79d9 100644 --- a/tools/testing/selftests/timers/Makefile +++ b/tools/testing/selftests/timers/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 CFLAGS += -O3 -Wl,-no-as-needed -Wall -LDFLAGS += -lrt -lpthread -lm +LDLIBS += -lrt -lpthread -lm # these are all "safe" tests that don't modify # system time or require escalated privileges diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore index af5ff83f6d7f..31b3c98b6d34 100644 --- a/tools/testing/selftests/vm/.gitignore +++ b/tools/testing/selftests/vm/.gitignore @@ -13,3 +13,4 @@ mlock-random-test virtual_address_range gup_benchmark va_128TBswitch +map_fixed_noreplace diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index e94b7b14bcb2..e13eb6cc8901 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -12,6 +12,7 @@ TEST_GEN_FILES += gup_benchmark TEST_GEN_FILES += hugepage-mmap TEST_GEN_FILES += hugepage-shm TEST_GEN_FILES += map_hugetlb +TEST_GEN_FILES += map_fixed_noreplace TEST_GEN_FILES += map_populate TEST_GEN_FILES += mlock-random-test TEST_GEN_FILES += mlock2-tests @@ -24,6 +25,7 @@ TEST_GEN_FILES += virtual_address_range TEST_PROGS := run_vmtests +KSFT_KHDR_INSTALL := 1 include ../lib.mk $(OUTPUT)/userfaultfd: LDLIBS += -lpthread diff --git a/tools/testing/selftests/vm/gup_benchmark.c b/tools/testing/selftests/vm/gup_benchmark.c index 36df55132036..c0534e298b51 100644 --- a/tools/testing/selftests/vm/gup_benchmark.c +++ b/tools/testing/selftests/vm/gup_benchmark.c @@ -15,23 +15,29 @@ #define PAGE_SIZE sysconf(_SC_PAGESIZE) #define GUP_FAST_BENCHMARK _IOWR('g', 1, struct gup_benchmark) +#define GUP_LONGTERM_BENCHMARK _IOWR('g', 2, struct gup_benchmark) +#define GUP_BENCHMARK _IOWR('g', 3, struct gup_benchmark) struct gup_benchmark { - __u64 delta_usec; + __u64 get_delta_usec; + __u64 put_delta_usec; __u64 addr; __u64 size; __u32 nr_pages_per_call; __u32 flags; + __u64 expansion[10]; /* For future use */ }; int main(int argc, char **argv) { struct gup_benchmark gup; unsigned long size = 128 * MB; - int i, fd, opt, nr_pages = 1, thp = -1, repeats = 1, write = 0; + int i, fd, filed, opt, nr_pages = 1, thp = -1, repeats = 1, write = 0; + int cmd = GUP_FAST_BENCHMARK, flags = MAP_PRIVATE; + char *file = "/dev/zero"; char *p; - while ((opt = getopt(argc, argv, "m:r:n:tT")) != -1) { + while ((opt = getopt(argc, argv, "m:r:n:f:tTLUSH")) != -1) { switch (opt) { case 'm': size = atoi(optarg) * MB; @@ -48,13 +54,36 @@ int main(int argc, char **argv) case 'T': thp = 0; break; + case 'L': + cmd = GUP_LONGTERM_BENCHMARK; + break; + case 'U': + cmd = GUP_BENCHMARK; + break; case 'w': write = 1; + break; + case 'f': + file = optarg; + break; + case 'S': + flags &= ~MAP_PRIVATE; + flags |= MAP_SHARED; + break; + case 'H': + flags |= MAP_HUGETLB; + break; default: return -1; } } + filed = open(file, O_RDWR|O_CREAT); + if (filed < 0) { + perror("open"); + exit(filed); + } + gup.nr_pages_per_call = nr_pages; gup.flags = write; @@ -62,8 +91,7 @@ int main(int argc, char **argv) if (fd == -1) perror("open"), exit(1); - p = mmap(NULL, size, PROT_READ | PROT_WRITE, - MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + p = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, filed, 0); if (p == MAP_FAILED) perror("mmap"), exit(1); gup.addr = (unsigned long)p; @@ -78,10 +106,11 @@ int main(int argc, char **argv) for (i = 0; i < repeats; i++) { gup.size = size; - if (ioctl(fd, GUP_FAST_BENCHMARK, &gup)) + if (ioctl(fd, cmd, &gup)) perror("ioctl"), exit(1); - printf("Time: %lld us", gup.delta_usec); + printf("Time: get:%lld put:%lld us", gup.get_delta_usec, + gup.put_delta_usec); if (gup.size != size) printf(", truncated (size: %lld)", gup.size); printf("\n"); diff --git a/tools/testing/selftests/vm/map_fixed_noreplace.c b/tools/testing/selftests/vm/map_fixed_noreplace.c new file mode 100644 index 000000000000..d91bde511268 --- /dev/null +++ b/tools/testing/selftests/vm/map_fixed_noreplace.c @@ -0,0 +1,206 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Test that MAP_FIXED_NOREPLACE works. + * + * Copyright 2018, Jann Horn <jannh@google.com> + * Copyright 2018, Michael Ellerman, IBM Corporation. + */ + +#include <sys/mman.h> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#ifndef MAP_FIXED_NOREPLACE +#define MAP_FIXED_NOREPLACE 0x100000 +#endif + +#define BASE_ADDRESS (256ul * 1024 * 1024) + + +static void dump_maps(void) +{ + char cmd[32]; + + snprintf(cmd, sizeof(cmd), "cat /proc/%d/maps", getpid()); + system(cmd); +} + +int main(void) +{ + unsigned long flags, addr, size, page_size; + char *p; + + page_size = sysconf(_SC_PAGE_SIZE); + + flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE; + + // Check we can map all the areas we need below + errno = 0; + addr = BASE_ADDRESS; + size = 5 * page_size; + p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0); + + printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + + if (p == MAP_FAILED) { + dump_maps(); + printf("Error: couldn't map the space we need for the test\n"); + return 1; + } + + errno = 0; + if (munmap((void *)addr, 5 * page_size) != 0) { + dump_maps(); + printf("Error: munmap failed!?\n"); + return 1; + } + printf("unmap() successful\n"); + + errno = 0; + addr = BASE_ADDRESS + page_size; + size = 3 * page_size; + p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0); + printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + + if (p == MAP_FAILED) { + dump_maps(); + printf("Error: first mmap() failed unexpectedly\n"); + return 1; + } + + /* + * Exact same mapping again: + * base | free | new + * +1 | mapped | new + * +2 | mapped | new + * +3 | mapped | new + * +4 | free | new + */ + errno = 0; + addr = BASE_ADDRESS; + size = 5 * page_size; + p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0); + printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + + if (p != MAP_FAILED) { + dump_maps(); + printf("Error:1: mmap() succeeded when it shouldn't have\n"); + return 1; + } + + /* + * Second mapping contained within first: + * + * base | free | + * +1 | mapped | + * +2 | mapped | new + * +3 | mapped | + * +4 | free | + */ + errno = 0; + addr = BASE_ADDRESS + (2 * page_size); + size = page_size; + p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0); + printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + + if (p != MAP_FAILED) { + dump_maps(); + printf("Error:2: mmap() succeeded when it shouldn't have\n"); + return 1; + } + + /* + * Overlap end of existing mapping: + * base | free | + * +1 | mapped | + * +2 | mapped | + * +3 | mapped | new + * +4 | free | new + */ + errno = 0; + addr = BASE_ADDRESS + (3 * page_size); + size = 2 * page_size; + p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0); + printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + + if (p != MAP_FAILED) { + dump_maps(); + printf("Error:3: mmap() succeeded when it shouldn't have\n"); + return 1; + } + + /* + * Overlap start of existing mapping: + * base | free | new + * +1 | mapped | new + * +2 | mapped | + * +3 | mapped | + * +4 | free | + */ + errno = 0; + addr = BASE_ADDRESS; + size = 2 * page_size; + p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0); + printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + + if (p != MAP_FAILED) { + dump_maps(); + printf("Error:4: mmap() succeeded when it shouldn't have\n"); + return 1; + } + + /* + * Adjacent to start of existing mapping: + * base | free | new + * +1 | mapped | + * +2 | mapped | + * +3 | mapped | + * +4 | free | + */ + errno = 0; + addr = BASE_ADDRESS; + size = page_size; + p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0); + printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + + if (p == MAP_FAILED) { + dump_maps(); + printf("Error:5: mmap() failed when it shouldn't have\n"); + return 1; + } + + /* + * Adjacent to end of existing mapping: + * base | free | + * +1 | mapped | + * +2 | mapped | + * +3 | mapped | + * +4 | free | new + */ + errno = 0; + addr = BASE_ADDRESS + (4 * page_size); + size = page_size; + p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0); + printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + + if (p == MAP_FAILED) { + dump_maps(); + printf("Error:6: mmap() failed when it shouldn't have\n"); + return 1; + } + + addr = BASE_ADDRESS; + size = 5 * page_size; + if (munmap((void *)addr, size) != 0) { + dump_maps(); + printf("Error: munmap failed!?\n"); + return 1; + } + printf("unmap() successful\n"); + + printf("OK\n"); + return 0; +} diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index 7b8171e3128a..5d1db824f73a 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -34,18 +34,6 @@ * per-CPU threads 1 by triggering userfaults inside * pthread_mutex_lock will also verify the atomicity of the memory * transfer (UFFDIO_COPY). - * - * The program takes two parameters: the amounts of physical memory in - * megabytes (MiB) of the area and the number of bounces to execute. - * - * # 100MiB 99999 bounces - * ./userfaultfd 100 99999 - * - * # 1GiB 99 bounces - * ./userfaultfd 1000 99 - * - * # 10MiB-~6GiB 999 bounces, continue forever unless an error triggers - * while ./userfaultfd $[RANDOM % 6000 + 10] 999; do true; done */ #define _GNU_SOURCE @@ -115,6 +103,30 @@ pthread_attr_t attr; ~(unsigned long)(sizeof(unsigned long long) \ - 1))) +const char *examples = + "# Run anonymous memory test on 100MiB region with 99999 bounces:\n" + "./userfaultfd anon 100 99999\n\n" + "# Run share memory test on 1GiB region with 99 bounces:\n" + "./userfaultfd shmem 1000 99\n\n" + "# Run hugetlb memory test on 256MiB region with 50 bounces (using /dev/hugepages/hugefile):\n" + "./userfaultfd hugetlb 256 50 /dev/hugepages/hugefile\n\n" + "# Run the same hugetlb test but using shmem:\n" + "./userfaultfd hugetlb_shared 256 50 /dev/hugepages/hugefile\n\n" + "# 10MiB-~6GiB 999 bounces anonymous test, " + "continue forever unless an error triggers\n" + "while ./userfaultfd anon $[RANDOM % 6000 + 10] 999; do true; done\n\n"; + +static void usage(void) +{ + fprintf(stderr, "\nUsage: ./userfaultfd <test type> <MiB> <bounces> " + "[hugetlbfs_file]\n\n"); + fprintf(stderr, "Supported <test type>: anon, hugetlb, " + "hugetlb_shared, shmem\n\n"); + fprintf(stderr, "Examples:\n\n"); + fprintf(stderr, examples); + exit(1); +} + static int anon_release_pages(char *rel_area) { int ret = 0; @@ -439,6 +451,43 @@ static int copy_page(int ufd, unsigned long offset) return __copy_page(ufd, offset, false); } +static int uffd_read_msg(int ufd, struct uffd_msg *msg) +{ + int ret = read(uffd, msg, sizeof(*msg)); + + if (ret != sizeof(*msg)) { + if (ret < 0) { + if (errno == EAGAIN) + return 1; + else + perror("blocking read error"), exit(1); + } else { + fprintf(stderr, "short read\n"), exit(1); + } + } + + return 0; +} + +/* Return 1 if page fault handled by us; otherwise 0 */ +static int uffd_handle_page_fault(struct uffd_msg *msg) +{ + unsigned long offset; + + if (msg->event != UFFD_EVENT_PAGEFAULT) + fprintf(stderr, "unexpected msg event %u\n", + msg->event), exit(1); + + if (bounces & BOUNCE_VERIFY && + msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE) + fprintf(stderr, "unexpected write fault\n"), exit(1); + + offset = (char *)(unsigned long)msg->arg.pagefault.address - area_dst; + offset &= ~(page_size-1); + + return copy_page(uffd, offset); +} + static void *uffd_poll_thread(void *arg) { unsigned long cpu = (unsigned long) arg; @@ -446,7 +495,6 @@ static void *uffd_poll_thread(void *arg) struct uffd_msg msg; struct uffdio_register uffd_reg; int ret; - unsigned long offset; char tmp_chr; unsigned long userfaults = 0; @@ -470,25 +518,15 @@ static void *uffd_poll_thread(void *arg) if (!(pollfd[0].revents & POLLIN)) fprintf(stderr, "pollfd[0].revents %d\n", pollfd[0].revents), exit(1); - ret = read(uffd, &msg, sizeof(msg)); - if (ret < 0) { - if (errno == EAGAIN) - continue; - perror("nonblocking read error"), exit(1); - } + if (uffd_read_msg(uffd, &msg)) + continue; switch (msg.event) { default: fprintf(stderr, "unexpected msg event %u\n", msg.event), exit(1); break; case UFFD_EVENT_PAGEFAULT: - if (msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE) - fprintf(stderr, "unexpected write fault\n"), exit(1); - offset = (char *)(unsigned long)msg.arg.pagefault.address - - area_dst; - offset &= ~(page_size-1); - if (copy_page(uffd, offset)) - userfaults++; + userfaults += uffd_handle_page_fault(&msg); break; case UFFD_EVENT_FORK: close(uffd); @@ -516,8 +554,6 @@ static void *uffd_read_thread(void *arg) { unsigned long *this_cpu_userfaults; struct uffd_msg msg; - unsigned long offset; - int ret; this_cpu_userfaults = (unsigned long *) arg; *this_cpu_userfaults = 0; @@ -526,24 +562,9 @@ static void *uffd_read_thread(void *arg) /* from here cancellation is ok */ for (;;) { - ret = read(uffd, &msg, sizeof(msg)); - if (ret != sizeof(msg)) { - if (ret < 0) - perror("blocking read error"), exit(1); - else - fprintf(stderr, "short read\n"), exit(1); - } - if (msg.event != UFFD_EVENT_PAGEFAULT) - fprintf(stderr, "unexpected msg event %u\n", - msg.event), exit(1); - if (bounces & BOUNCE_VERIFY && - msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE) - fprintf(stderr, "unexpected write fault\n"), exit(1); - offset = (char *)(unsigned long)msg.arg.pagefault.address - - area_dst; - offset &= ~(page_size-1); - if (copy_page(uffd, offset)) - (*this_cpu_userfaults)++; + if (uffd_read_msg(uffd, &msg)) + continue; + (*this_cpu_userfaults) += uffd_handle_page_fault(&msg); } return (void *)NULL; } @@ -605,6 +626,12 @@ static int stress(unsigned long *userfaults) if (uffd_test_ops->release_pages(area_src)) return 1; + + finished = 1; + for (cpu = 0; cpu < nr_cpus; cpu++) + if (pthread_join(locking_threads[cpu], NULL)) + return 1; + for (cpu = 0; cpu < nr_cpus; cpu++) { char c; if (bounces & BOUNCE_POLL) { @@ -622,11 +649,6 @@ static int stress(unsigned long *userfaults) } } - finished = 1; - for (cpu = 0; cpu < nr_cpus; cpu++) - if (pthread_join(locking_threads[cpu], NULL)) - return 1; - return 0; } @@ -1272,8 +1294,7 @@ static void sigalrm(int sig) int main(int argc, char **argv) { if (argc < 4) - fprintf(stderr, "Usage: <test type> <MiB> <bounces> [hugetlbfs_file]\n"), - exit(1); + usage(); if (signal(SIGALRM, sigalrm) == SIG_ERR) fprintf(stderr, "failed to arm SIGALRM"), exit(1); @@ -1286,20 +1307,19 @@ int main(int argc, char **argv) nr_cpus; if (!nr_pages_per_cpu) { fprintf(stderr, "invalid MiB\n"); - fprintf(stderr, "Usage: <MiB> <bounces>\n"), exit(1); + usage(); } bounces = atoi(argv[3]); if (bounces <= 0) { fprintf(stderr, "invalid bounces\n"); - fprintf(stderr, "Usage: <MiB> <bounces>\n"), exit(1); + usage(); } nr_pages = nr_pages_per_cpu * nr_cpus; if (test_type == TEST_HUGETLB) { if (argc < 5) - fprintf(stderr, "Usage: hugetlb <MiB> <bounces> <hugetlbfs_file>\n"), - exit(1); + usage(); huge_fd = open(argv[4], O_CREAT | O_RDWR, 0755); if (huge_fd < 0) { fprintf(stderr, "Open of %s failed", argv[3]); diff --git a/tools/testing/selftests/watchdog/watchdog-test.c b/tools/testing/selftests/watchdog/watchdog-test.c index 6e290874b70e..c2333c78cf04 100644 --- a/tools/testing/selftests/watchdog/watchdog-test.c +++ b/tools/testing/selftests/watchdog/watchdog-test.c @@ -19,7 +19,7 @@ int fd; const char v = 'V'; -static const char sopts[] = "bdehp:t:"; +static const char sopts[] = "bdehp:t:Tn:NL"; static const struct option lopts[] = { {"bootstatus", no_argument, NULL, 'b'}, {"disable", no_argument, NULL, 'd'}, @@ -27,6 +27,10 @@ static const struct option lopts[] = { {"help", no_argument, NULL, 'h'}, {"pingrate", required_argument, NULL, 'p'}, {"timeout", required_argument, NULL, 't'}, + {"gettimeout", no_argument, NULL, 'T'}, + {"pretimeout", required_argument, NULL, 'n'}, + {"getpretimeout", no_argument, NULL, 'N'}, + {"gettimeleft", no_argument, NULL, 'L'}, {NULL, no_argument, NULL, 0x0} }; @@ -71,9 +75,14 @@ static void usage(char *progname) printf(" -h, --help Print the help message\n"); printf(" -p, --pingrate=P Set ping rate to P seconds (default %d)\n", DEFAULT_PING_RATE); printf(" -t, --timeout=T Set timeout to T seconds\n"); + printf(" -T, --gettimeout Get the timeout\n"); + printf(" -n, --pretimeout=T Set the pretimeout to T seconds\n"); + printf(" -N, --getpretimeout Get the pretimeout\n"); + printf(" -L, --gettimeleft Get the time left until timer expires\n"); printf("\n"); printf("Parameters are parsed left-to-right in real-time.\n"); printf("Example: %s -d -t 10 -p 5 -e\n", progname); + printf("Example: %s -t 12 -T -n 7 -N\n", progname); } int main(int argc, char *argv[]) @@ -89,7 +98,13 @@ int main(int argc, char *argv[]) fd = open("/dev/watchdog", O_WRONLY); if (fd == -1) { - printf("Watchdog device not enabled.\n"); + if (errno == ENOENT) + printf("Watchdog device not enabled.\n"); + else if (errno == EACCES) + printf("Run watchdog as root.\n"); + else + printf("Watchdog device open failed %s\n", + strerror(errno)); exit(-1); } @@ -103,23 +118,27 @@ int main(int argc, char *argv[]) printf("Last boot is caused by: %s.\n", (flags != 0) ? "Watchdog" : "Power-On-Reset"); else - printf("WDIOC_GETBOOTSTATUS errno '%s'\n", strerror(errno)); + printf("WDIOC_GETBOOTSTATUS error '%s'\n", strerror(errno)); break; case 'd': flags = WDIOS_DISABLECARD; ret = ioctl(fd, WDIOC_SETOPTIONS, &flags); if (!ret) printf("Watchdog card disabled.\n"); - else - printf("WDIOS_DISABLECARD errno '%s'\n", strerror(errno)); + else { + printf("WDIOS_DISABLECARD error '%s'\n", strerror(errno)); + oneshot = 1; + } break; case 'e': flags = WDIOS_ENABLECARD; ret = ioctl(fd, WDIOC_SETOPTIONS, &flags); if (!ret) printf("Watchdog card enabled.\n"); - else - printf("WDIOS_ENABLECARD errno '%s'\n", strerror(errno)); + else { + printf("WDIOS_ENABLECARD error '%s'\n", strerror(errno)); + oneshot = 1; + } break; case 'p': ping_rate = strtoul(optarg, NULL, 0); @@ -132,9 +151,46 @@ int main(int argc, char *argv[]) ret = ioctl(fd, WDIOC_SETTIMEOUT, &flags); if (!ret) printf("Watchdog timeout set to %u seconds.\n", flags); + else { + printf("WDIOC_SETTIMEOUT error '%s'\n", strerror(errno)); + oneshot = 1; + } + break; + case 'T': + oneshot = 1; + ret = ioctl(fd, WDIOC_GETTIMEOUT, &flags); + if (!ret) + printf("WDIOC_GETTIMEOUT returns %u seconds.\n", flags); else - printf("WDIOC_SETTIMEOUT errno '%s'\n", strerror(errno)); + printf("WDIOC_GETTIMEOUT error '%s'\n", strerror(errno)); break; + case 'n': + flags = strtoul(optarg, NULL, 0); + ret = ioctl(fd, WDIOC_SETPRETIMEOUT, &flags); + if (!ret) + printf("Watchdog pretimeout set to %u seconds.\n", flags); + else { + printf("WDIOC_SETPRETIMEOUT error '%s'\n", strerror(errno)); + oneshot = 1; + } + break; + case 'N': + oneshot = 1; + ret = ioctl(fd, WDIOC_GETPRETIMEOUT, &flags); + if (!ret) + printf("WDIOC_GETPRETIMEOUT returns %u seconds.\n", flags); + else + printf("WDIOC_GETPRETIMEOUT error '%s'\n", strerror(errno)); + break; + case 'L': + oneshot = 1; + ret = ioctl(fd, WDIOC_GETTIMELEFT, &flags); + if (!ret) + printf("WDIOC_GETTIMELEFT returns %u seconds.\n", flags); + else + printf("WDIOC_GETTIMELEFT error '%s'\n", strerror(errno)); + break; + default: usage(argv[0]); goto end; diff --git a/tools/testing/selftests/x86/mpx-mini-test.c b/tools/testing/selftests/x86/mpx-mini-test.c index 50f7e9272481..bf1bb15b6fbe 100644 --- a/tools/testing/selftests/x86/mpx-mini-test.c +++ b/tools/testing/selftests/x86/mpx-mini-test.c @@ -1503,7 +1503,7 @@ exit: exit(20); } if (successes != total_nr_tests) { - eprintf("ERROR: succeded fewer than number of tries (%d != %d)\n", + eprintf("ERROR: succeeded fewer than number of tries (%d != %d)\n", successes, total_nr_tests); exit(21); } diff --git a/tools/testing/selftests/x86/protection_keys.c b/tools/testing/selftests/x86/protection_keys.c index 460b4bdf4c1e..5d546dcdbc80 100644 --- a/tools/testing/selftests/x86/protection_keys.c +++ b/tools/testing/selftests/x86/protection_keys.c @@ -1133,6 +1133,21 @@ void test_pkey_syscalls_bad_args(int *ptr, u16 pkey) pkey_assert(err); } +void become_child(void) +{ + pid_t forkret; + + forkret = fork(); + pkey_assert(forkret >= 0); + dprintf3("[%d] fork() ret: %d\n", getpid(), forkret); + + if (!forkret) { + /* in the child */ + return; + } + exit(0); +} + /* Assumes that all pkeys other than 'pkey' are unallocated */ void test_pkey_alloc_exhaust(int *ptr, u16 pkey) { @@ -1141,7 +1156,7 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey) int nr_allocated_pkeys = 0; int i; - for (i = 0; i < NR_PKEYS*2; i++) { + for (i = 0; i < NR_PKEYS*3; i++) { int new_pkey; dprintf1("%s() alloc loop: %d\n", __func__, i); new_pkey = alloc_pkey(); @@ -1152,21 +1167,27 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey) if ((new_pkey == -1) && (errno == ENOSPC)) { dprintf2("%s() failed to allocate pkey after %d tries\n", __func__, nr_allocated_pkeys); - break; + } else { + /* + * Ensure the number of successes never + * exceeds the number of keys supported + * in the hardware. + */ + pkey_assert(nr_allocated_pkeys < NR_PKEYS); + allocated_pkeys[nr_allocated_pkeys++] = new_pkey; } - pkey_assert(nr_allocated_pkeys < NR_PKEYS); - allocated_pkeys[nr_allocated_pkeys++] = new_pkey; + + /* + * Make sure that allocation state is properly + * preserved across fork(). + */ + if (i == NR_PKEYS*2) + become_child(); } dprintf3("%s()::%d\n", __func__, __LINE__); /* - * ensure it did not reach the end of the loop without - * failure: - */ - pkey_assert(i < NR_PKEYS*2); - - /* * There are 16 pkeys supported in hardware. Three are * allocated by the time we get here: * 1. The default key (0) diff --git a/tools/testing/selftests/x86/unwind_vdso.c b/tools/testing/selftests/x86/unwind_vdso.c index 00a26a82fa98..97311333700e 100644 --- a/tools/testing/selftests/x86/unwind_vdso.c +++ b/tools/testing/selftests/x86/unwind_vdso.c @@ -44,7 +44,6 @@ int main() #include <stdbool.h> #include <sys/ptrace.h> #include <sys/user.h> -#include <sys/ucontext.h> #include <link.h> #include <sys/auxv.h> #include <dlfcn.h> |