aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/crypto
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/crypto')
-rw-r--r--drivers/crypto/Kconfig40
-rw-r--r--drivers/crypto/Makefile3
-rw-r--r--drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c2
-rw-r--r--drivers/crypto/atmel-i2c.c30
-rw-r--r--drivers/crypto/atmel-i2c.h8
-rw-r--r--drivers/crypto/atmel-sha204a.c68
-rw-r--r--drivers/crypto/bcm/spu2.c2
-rw-r--r--drivers/crypto/caam/ctrl.c19
-rw-r--r--drivers/crypto/ccp/Kconfig2
-rw-r--r--drivers/crypto/ccp/platform-access.c11
-rw-r--r--drivers/crypto/ccp/psp-dev.c11
-rw-r--r--drivers/crypto/ccp/sev-dev.c1150
-rw-r--r--drivers/crypto/ccp/sev-dev.h5
-rw-r--r--drivers/crypto/ccp/sp-platform.c14
-rw-r--r--drivers/crypto/hisilicon/debugfs.c123
-rw-r--r--drivers/crypto/hisilicon/hpre/hpre_main.c25
-rw-r--r--drivers/crypto/hisilicon/qm.c192
-rw-r--r--drivers/crypto/hisilicon/sec2/sec_crypto.c37
-rw-r--r--drivers/crypto/hisilicon/sec2/sec_main.c39
-rw-r--r--drivers/crypto/hisilicon/sgl.c5
-rw-r--r--drivers/crypto/hisilicon/zip/zip_crypto.c1
-rw-r--r--drivers/crypto/hisilicon/zip/zip_main.c26
-rw-r--r--drivers/crypto/intel/iaa/iaa_crypto.h41
-rw-r--r--drivers/crypto/intel/iaa/iaa_crypto_comp_fixed.c1
-rw-r--r--drivers/crypto/intel/iaa/iaa_crypto_main.c137
-rw-r--r--drivers/crypto/intel/iaa/iaa_crypto_stats.c215
-rw-r--r--drivers/crypto/intel/iaa/iaa_crypto_stats.h8
-rw-r--r--drivers/crypto/intel/qat/Kconfig14
-rw-r--r--drivers/crypto/intel/qat/qat_420xx/Makefile2
-rw-r--r--drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c69
-rw-r--r--drivers/crypto/intel/qat/qat_4xxx/Makefile2
-rw-r--r--drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c71
-rw-r--r--drivers/crypto/intel/qat/qat_4xxx/adf_drv.c2
-rw-r--r--drivers/crypto/intel/qat/qat_c3xxx/Makefile2
-rw-r--r--drivers/crypto/intel/qat/qat_c3xxx/adf_c3xxx_hw_data.c1
-rw-r--r--drivers/crypto/intel/qat/qat_c3xxxvf/Makefile2
-rw-r--r--drivers/crypto/intel/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c1
-rw-r--r--drivers/crypto/intel/qat/qat_c62x/Makefile2
-rw-r--r--drivers/crypto/intel/qat/qat_c62x/adf_c62x_hw_data.c1
-rw-r--r--drivers/crypto/intel/qat/qat_c62xvf/Makefile2
-rw-r--r--drivers/crypto/intel/qat/qat_c62xvf/adf_c62xvf_hw_data.c1
-rw-r--r--drivers/crypto/intel/qat/qat_common/Makefile8
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_accel_devices.h91
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_aer.c123
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_cfg_strings.h1
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_clock.c3
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_cnv_dbgfs.c1
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_common_drv.h20
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_dev_mgr.c4
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_gen2_hw_csr_data.c101
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_gen2_hw_csr_data.h86
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_gen2_hw_data.c97
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_gen2_hw_data.h76
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_gen4_hw_csr_data.c231
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_gen4_hw_csr_data.h188
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c439
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.h128
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_gen4_pfvf.c8
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_gen4_ras.c6
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_gen4_tl.c1
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_gen4_vf_mig.c1010
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_gen4_vf_mig.h10
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_heartbeat.c20
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_heartbeat.h21
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_heartbeat_dbgfs.c53
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_heartbeat_inject.c76
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_hw_arbiter.c25
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_init.c12
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_isr.c11
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_mstate_mgr.c318
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_mstate_mgr.h89
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_pfvf_msg.h7
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_msg.c64
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_msg.h21
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_proto.c16
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_pfvf_utils.h11
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_pfvf_vf_proto.c6
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_rl.c32
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_rl.h2
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_sriov.c45
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_sysfs.c37
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_telemetry.c21
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_telemetry.h1
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_transport.c4
-rw-r--r--drivers/crypto/intel/qat/qat_common/adf_vf_isr.c2
-rw-r--r--drivers/crypto/intel/qat/qat_common/qat_asym_algs.c66
-rw-r--r--drivers/crypto/intel/qat/qat_common/qat_bl.c6
-rw-r--r--drivers/crypto/intel/qat/qat_common/qat_bl.h11
-rw-r--r--drivers/crypto/intel/qat/qat_common/qat_comp_algs.c9
-rw-r--r--drivers/crypto/intel/qat/qat_common/qat_crypto.c4
-rw-r--r--drivers/crypto/intel/qat/qat_common/qat_mig_dev.c130
-rw-r--r--drivers/crypto/intel/qat/qat_dh895xcc/Makefile2
-rw-r--r--drivers/crypto/intel/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c1
-rw-r--r--drivers/crypto/intel/qat/qat_dh895xccvf/Makefile2
-rw-r--r--drivers/crypto/intel/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c1
-rw-r--r--drivers/crypto/marvell/octeontx2/cn10k_cpt.c4
-rw-r--r--drivers/crypto/marvell/octeontx2/otx2_cpt_devlink.c9
-rw-r--r--drivers/crypto/mxs-dcp.c104
-rw-r--r--drivers/crypto/n2_core.c2
-rw-r--r--drivers/crypto/nx/nx-842.c6
-rw-r--r--drivers/crypto/nx/nx-842.h10
-rw-r--r--drivers/crypto/rockchip/rk3288_crypto.c5
-rw-r--r--drivers/crypto/sahara.c16
-rw-r--r--drivers/crypto/starfive/Kconfig4
-rw-r--r--drivers/crypto/starfive/jh7110-aes.c597
-rw-r--r--drivers/crypto/starfive/jh7110-cryp.c43
-rw-r--r--drivers/crypto/starfive/jh7110-cryp.h10
-rw-r--r--drivers/crypto/starfive/jh7110-hash.c275
-rw-r--r--drivers/crypto/starfive/jh7110-rsa.c14
-rw-r--r--drivers/crypto/stm32/stm32-hash.c570
-rw-r--r--drivers/crypto/tegra/Makefile9
-rw-r--r--drivers/crypto/tegra/tegra-se-aes.c1933
-rw-r--r--drivers/crypto/tegra/tegra-se-hash.c1060
-rw-r--r--drivers/crypto/tegra/tegra-se-key.c156
-rw-r--r--drivers/crypto/tegra/tegra-se-main.c437
-rw-r--r--drivers/crypto/tegra/tegra-se.h560
-rw-r--r--drivers/crypto/virtio/virtio_crypto_akcipher_algs.c12
-rw-r--r--drivers/crypto/virtio/virtio_crypto_core.c3
-rw-r--r--drivers/crypto/vmx/.gitignore3
-rw-r--r--drivers/crypto/vmx/Kconfig14
-rw-r--r--drivers/crypto/vmx/Makefile23
-rw-r--r--drivers/crypto/vmx/aes.c134
-rw-r--r--drivers/crypto/vmx/aes_cbc.c133
-rw-r--r--drivers/crypto/vmx/aes_ctr.c149
-rw-r--r--drivers/crypto/vmx/aes_xts.c162
-rw-r--r--drivers/crypto/vmx/aesp8-ppc.h30
-rw-r--r--drivers/crypto/vmx/aesp8-ppc.pl3889
-rw-r--r--drivers/crypto/vmx/ghash.c185
-rw-r--r--drivers/crypto/vmx/ghashp8-ppc.pl243
-rw-r--r--drivers/crypto/vmx/ppc-xlate.pl231
-rw-r--r--drivers/crypto/vmx/vmx.c77
-rw-r--r--drivers/crypto/xilinx/zynqmp-aes-gcm.c3
132 files changed, 10284 insertions, 6976 deletions
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 0991f026cb07..94f23c6fc93b 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -67,6 +67,7 @@ config CRYPTO_DEV_GEODE
config ZCRYPT
tristate "Support for s390 cryptographic adapters"
depends on S390
+ depends on AP
select HW_RANDOM
help
Select this option if you want to enable support for
@@ -74,23 +75,6 @@ config ZCRYPT
to 8 in Coprocessor (CEXxC), EP11 Coprocessor (CEXxP)
or Accelerator (CEXxA) mode.
-config ZCRYPT_DEBUG
- bool "Enable debug features for s390 cryptographic adapters"
- default n
- depends on DEBUG_KERNEL
- depends on ZCRYPT
- help
- Say 'Y' here to enable some additional debug features on the
- s390 cryptographic adapters driver.
-
- There will be some more sysfs attributes displayed for ap cards
- and queues and some flags on crypto requests are interpreted as
- debugging messages to force error injection.
-
- Do not enable on production level kernel build.
-
- If unsure, say N.
-
config PKEY
tristate "Kernel API for protected key handling"
depends on S390
@@ -611,13 +595,13 @@ config CRYPTO_DEV_QCOM_RNG
To compile this driver as a module, choose M here. The
module will be called qcom-rng. If unsure, say N.
-config CRYPTO_DEV_VMX
- bool "Support for VMX cryptographic acceleration instructions"
- depends on PPC64 && VSX
- help
- Support for VMX cryptographic acceleration instructions.
-
-source "drivers/crypto/vmx/Kconfig"
+#config CRYPTO_DEV_VMX
+# bool "Support for VMX cryptographic acceleration instructions"
+# depends on PPC64 && VSX
+# help
+# Support for VMX cryptographic acceleration instructions.
+#
+#source "drivers/crypto/vmx/Kconfig"
config CRYPTO_DEV_IMGTEC_HASH
tristate "Imagination Technologies hardware hash accelerator"
@@ -660,6 +644,14 @@ config CRYPTO_DEV_ROCKCHIP_DEBUG
This will create /sys/kernel/debug/rk3288_crypto/stats for displaying
the number of requests per algorithm and other internal stats.
+config CRYPTO_DEV_TEGRA
+ tristate "Enable Tegra Security Engine"
+ depends on TEGRA_HOST1X
+ select CRYPTO_ENGINE
+
+ help
+ Select this to enable Tegra Security Engine which accelerates various
+ AES encryption/decryption and HASH algorithms.
config CRYPTO_DEV_ZYNQMP_AES
tristate "Support for Xilinx ZynqMP AES hw accelerator"
diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile
index d859d6a5f3a4..ad4ccef67d12 100644
--- a/drivers/crypto/Makefile
+++ b/drivers/crypto/Makefile
@@ -41,8 +41,9 @@ obj-$(CONFIG_CRYPTO_DEV_SAHARA) += sahara.o
obj-$(CONFIG_CRYPTO_DEV_SL3516) += gemini/
obj-y += stm32/
obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o
+obj-$(CONFIG_CRYPTO_DEV_TEGRA) += tegra/
obj-$(CONFIG_CRYPTO_DEV_VIRTIO) += virtio/
-obj-$(CONFIG_CRYPTO_DEV_VMX) += vmx/
+#obj-$(CONFIG_CRYPTO_DEV_VMX) += vmx/
obj-$(CONFIG_CRYPTO_DEV_BCM_SPU) += bcm/
obj-$(CONFIG_CRYPTO_DEV_SAFEXCEL) += inside-secure/
obj-$(CONFIG_CRYPTO_DEV_ARTPEC6) += axis/
diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c
index d358334e5981..ee2a28c906ed 100644
--- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c
+++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c
@@ -362,7 +362,7 @@ int sun8i_ce_hash_run(struct crypto_engine *engine, void *breq)
digestsize = SHA512_DIGEST_SIZE;
/* the padding could be up to two block. */
- buf = kzalloc(bs * 2, GFP_KERNEL | GFP_DMA);
+ buf = kcalloc(2, bs, GFP_KERNEL | GFP_DMA);
if (!buf) {
err = -ENOMEM;
goto theend;
diff --git a/drivers/crypto/atmel-i2c.c b/drivers/crypto/atmel-i2c.c
index 83a9093eff25..a895e4289efa 100644
--- a/drivers/crypto/atmel-i2c.c
+++ b/drivers/crypto/atmel-i2c.c
@@ -51,7 +51,7 @@ static void atmel_i2c_checksum(struct atmel_i2c_cmd *cmd)
*__crc16 = cpu_to_le16(bitrev16(crc16(0, data, len)));
}
-void atmel_i2c_init_read_cmd(struct atmel_i2c_cmd *cmd)
+void atmel_i2c_init_read_config_cmd(struct atmel_i2c_cmd *cmd)
{
cmd->word_addr = COMMAND;
cmd->opcode = OPCODE_READ;
@@ -68,7 +68,31 @@ void atmel_i2c_init_read_cmd(struct atmel_i2c_cmd *cmd)
cmd->msecs = MAX_EXEC_TIME_READ;
cmd->rxsize = READ_RSP_SIZE;
}
-EXPORT_SYMBOL(atmel_i2c_init_read_cmd);
+EXPORT_SYMBOL(atmel_i2c_init_read_config_cmd);
+
+int atmel_i2c_init_read_otp_cmd(struct atmel_i2c_cmd *cmd, u16 addr)
+{
+ if (addr < 0 || addr > OTP_ZONE_SIZE)
+ return -1;
+
+ cmd->word_addr = COMMAND;
+ cmd->opcode = OPCODE_READ;
+ /*
+ * Read the word from OTP zone that may contain e.g. serial
+ * numbers or similar if persistently pre-initialized and locked
+ */
+ cmd->param1 = OTP_ZONE;
+ cmd->param2 = cpu_to_le16(addr);
+ cmd->count = READ_COUNT;
+
+ atmel_i2c_checksum(cmd);
+
+ cmd->msecs = MAX_EXEC_TIME_READ;
+ cmd->rxsize = READ_RSP_SIZE;
+
+ return 0;
+}
+EXPORT_SYMBOL(atmel_i2c_init_read_otp_cmd);
void atmel_i2c_init_random_cmd(struct atmel_i2c_cmd *cmd)
{
@@ -301,7 +325,7 @@ static int device_sanity_check(struct i2c_client *client)
if (!cmd)
return -ENOMEM;
- atmel_i2c_init_read_cmd(cmd);
+ atmel_i2c_init_read_config_cmd(cmd);
ret = atmel_i2c_send_receive(client, cmd);
if (ret)
diff --git a/drivers/crypto/atmel-i2c.h b/drivers/crypto/atmel-i2c.h
index c0bd429ee2c7..72f04c15682f 100644
--- a/drivers/crypto/atmel-i2c.h
+++ b/drivers/crypto/atmel-i2c.h
@@ -64,6 +64,10 @@ struct atmel_i2c_cmd {
/* Definitions for eeprom organization */
#define CONFIGURATION_ZONE 0
+#define OTP_ZONE 1
+
+/* Definitions for eeprom zone sizes */
+#define OTP_ZONE_SIZE 64
/* Definitions for Indexes common to all commands */
#define RSP_DATA_IDX 1 /* buffer index of data in response */
@@ -124,6 +128,7 @@ struct atmel_ecc_driver_data {
* @wake_token : wake token array of zeros
* @wake_token_sz : size in bytes of the wake_token
* @tfm_count : number of active crypto transformations on i2c client
+ * @hwrng : hold the hardware generated rng
*
* Reads and writes from/to the i2c client are sequential. The first byte
* transmitted to the device is treated as the byte size. Any attempt to send
@@ -177,7 +182,8 @@ void atmel_i2c_flush_queue(void);
int atmel_i2c_send_receive(struct i2c_client *client, struct atmel_i2c_cmd *cmd);
-void atmel_i2c_init_read_cmd(struct atmel_i2c_cmd *cmd);
+void atmel_i2c_init_read_config_cmd(struct atmel_i2c_cmd *cmd);
+int atmel_i2c_init_read_otp_cmd(struct atmel_i2c_cmd *cmd, u16 addr);
void atmel_i2c_init_random_cmd(struct atmel_i2c_cmd *cmd);
void atmel_i2c_init_genkey_cmd(struct atmel_i2c_cmd *cmd, u16 keyid);
int atmel_i2c_init_ecdh_cmd(struct atmel_i2c_cmd *cmd,
diff --git a/drivers/crypto/atmel-sha204a.c b/drivers/crypto/atmel-sha204a.c
index c77f482d2a97..24ffdf505023 100644
--- a/drivers/crypto/atmel-sha204a.c
+++ b/drivers/crypto/atmel-sha204a.c
@@ -91,6 +91,62 @@ static int atmel_sha204a_rng_read(struct hwrng *rng, void *data, size_t max,
return max;
}
+static int atmel_sha204a_otp_read(struct i2c_client *client, u16 addr, u8 *otp)
+{
+ struct atmel_i2c_cmd cmd;
+ int ret = -1;
+
+ if (atmel_i2c_init_read_otp_cmd(&cmd, addr) < 0) {
+ dev_err(&client->dev, "failed, invalid otp address %04X\n",
+ addr);
+ return ret;
+ }
+
+ ret = atmel_i2c_send_receive(client, &cmd);
+
+ if (cmd.data[0] == 0xff) {
+ dev_err(&client->dev, "failed, device not ready\n");
+ return -ret;
+ }
+
+ memcpy(otp, cmd.data+1, 4);
+
+ return ret;
+}
+
+static ssize_t otp_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ u16 addr;
+ u8 otp[OTP_ZONE_SIZE];
+ char *str = buf;
+ struct i2c_client *client = to_i2c_client(dev);
+ int i;
+
+ for (addr = 0; addr < OTP_ZONE_SIZE/4; addr++) {
+ if (atmel_sha204a_otp_read(client, addr, otp + addr * 4) < 0) {
+ dev_err(dev, "failed to read otp zone\n");
+ break;
+ }
+ }
+
+ for (i = 0; i < addr*2; i++)
+ str += sprintf(str, "%02X", otp[i]);
+ str += sprintf(str, "\n");
+ return str - buf;
+}
+static DEVICE_ATTR_RO(otp);
+
+static struct attribute *atmel_sha204a_attrs[] = {
+ &dev_attr_otp.attr,
+ NULL
+};
+
+static const struct attribute_group atmel_sha204a_groups = {
+ .name = "atsha204a",
+ .attrs = atmel_sha204a_attrs,
+};
+
static int atmel_sha204a_probe(struct i2c_client *client)
{
struct atmel_i2c_client_priv *i2c_priv;
@@ -111,6 +167,16 @@ static int atmel_sha204a_probe(struct i2c_client *client)
if (ret)
dev_warn(&client->dev, "failed to register RNG (%d)\n", ret);
+ /* otp read out */
+ if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C))
+ return -ENODEV;
+
+ ret = sysfs_create_group(&client->dev.kobj, &atmel_sha204a_groups);
+ if (ret) {
+ dev_err(&client->dev, "failed to register sysfs entry\n");
+ return ret;
+ }
+
return ret;
}
@@ -123,6 +189,8 @@ static void atmel_sha204a_remove(struct i2c_client *client)
return;
}
+ sysfs_remove_group(&client->dev.kobj, &atmel_sha204a_groups);
+
kfree((void *)i2c_priv->hwrng.priv);
}
diff --git a/drivers/crypto/bcm/spu2.c b/drivers/crypto/bcm/spu2.c
index 07989bb8c220..3fdc64b5a65e 100644
--- a/drivers/crypto/bcm/spu2.c
+++ b/drivers/crypto/bcm/spu2.c
@@ -495,7 +495,7 @@ static void spu2_dump_omd(u8 *omd, u16 hash_key_len, u16 ciph_key_len,
if (hash_iv_len) {
packet_log(" Hash IV Length %u bytes\n", hash_iv_len);
packet_dump(" hash IV: ", ptr, hash_iv_len);
- ptr += ciph_key_len;
+ ptr += hash_iv_len;
}
if (ciph_iv_len) {
diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c
index bdf367f3f679..bd418dea586d 100644
--- a/drivers/crypto/caam/ctrl.c
+++ b/drivers/crypto/caam/ctrl.c
@@ -512,6 +512,7 @@ static const struct of_device_id caam_match[] = {
MODULE_DEVICE_TABLE(of, caam_match);
struct caam_imx_data {
+ bool page0_access;
const struct clk_bulk_data *clks;
int num_clks;
};
@@ -524,6 +525,7 @@ static const struct clk_bulk_data caam_imx6_clks[] = {
};
static const struct caam_imx_data caam_imx6_data = {
+ .page0_access = true,
.clks = caam_imx6_clks,
.num_clks = ARRAY_SIZE(caam_imx6_clks),
};
@@ -534,6 +536,7 @@ static const struct clk_bulk_data caam_imx7_clks[] = {
};
static const struct caam_imx_data caam_imx7_data = {
+ .page0_access = true,
.clks = caam_imx7_clks,
.num_clks = ARRAY_SIZE(caam_imx7_clks),
};
@@ -545,6 +548,7 @@ static const struct clk_bulk_data caam_imx6ul_clks[] = {
};
static const struct caam_imx_data caam_imx6ul_data = {
+ .page0_access = true,
.clks = caam_imx6ul_clks,
.num_clks = ARRAY_SIZE(caam_imx6ul_clks),
};
@@ -554,15 +558,19 @@ static const struct clk_bulk_data caam_vf610_clks[] = {
};
static const struct caam_imx_data caam_vf610_data = {
+ .page0_access = true,
.clks = caam_vf610_clks,
.num_clks = ARRAY_SIZE(caam_vf610_clks),
};
+static const struct caam_imx_data caam_imx8ulp_data;
+
static const struct soc_device_attribute caam_imx_soc_table[] = {
{ .soc_id = "i.MX6UL", .data = &caam_imx6ul_data },
{ .soc_id = "i.MX6*", .data = &caam_imx6_data },
{ .soc_id = "i.MX7*", .data = &caam_imx7_data },
{ .soc_id = "i.MX8M*", .data = &caam_imx7_data },
+ { .soc_id = "i.MX8ULP", .data = &caam_imx8ulp_data },
{ .soc_id = "VF*", .data = &caam_vf610_data },
{ .family = "Freescale i.MX" },
{ /* sentinel */ }
@@ -860,6 +868,7 @@ static int caam_probe(struct platform_device *pdev)
int pg_size;
int BLOCK_OFFSET = 0;
bool reg_access = true;
+ const struct caam_imx_data *imx_soc_data;
ctrlpriv = devm_kzalloc(&pdev->dev, sizeof(*ctrlpriv), GFP_KERNEL);
if (!ctrlpriv)
@@ -894,12 +903,20 @@ static int caam_probe(struct platform_device *pdev)
return -EINVAL;
}
+ imx_soc_data = imx_soc_match->data;
+ reg_access = reg_access && imx_soc_data->page0_access;
+ /*
+ * CAAM clocks cannot be controlled from kernel.
+ */
+ if (!imx_soc_data->num_clks)
+ goto iomap_ctrl;
+
ret = init_clocks(dev, imx_soc_match->data);
if (ret)
return ret;
}
-
+iomap_ctrl:
/* Get configuration properties from device tree */
/* First, get register page */
ctrl = devm_of_iomap(dev, nprop, 0, NULL);
diff --git a/drivers/crypto/ccp/Kconfig b/drivers/crypto/ccp/Kconfig
index 32268e239bf1..f394e45e11ab 100644
--- a/drivers/crypto/ccp/Kconfig
+++ b/drivers/crypto/ccp/Kconfig
@@ -38,7 +38,7 @@ config CRYPTO_DEV_CCP_CRYPTO
config CRYPTO_DEV_SP_PSP
bool "Platform Security Processor (PSP) device"
default y
- depends on CRYPTO_DEV_CCP_DD && X86_64
+ depends on CRYPTO_DEV_CCP_DD && X86_64 && AMD_IOMMU
help
Provide support for the AMD Platform Security Processor (PSP).
The PSP is a dedicated processor that provides support for key
diff --git a/drivers/crypto/ccp/platform-access.c b/drivers/crypto/ccp/platform-access.c
index 94367bc49e35..1b8ed3389733 100644
--- a/drivers/crypto/ccp/platform-access.c
+++ b/drivers/crypto/ccp/platform-access.c
@@ -118,9 +118,16 @@ int psp_send_platform_access_msg(enum psp_platform_access_msg msg,
goto unlock;
}
- /* Store the status in request header for caller to investigate */
+ /*
+ * Read status from PSP. If status is non-zero, it indicates an error
+ * occurred during "processing" of the command.
+ * If status is zero, it indicates the command was "processed"
+ * successfully, but the result of the command is in the payload.
+ * Return both cases to the caller as -EIO to investigate.
+ */
cmd_reg = ioread32(cmd);
- req->header.status = FIELD_GET(PSP_CMDRESP_STS, cmd_reg);
+ if (FIELD_GET(PSP_CMDRESP_STS, cmd_reg))
+ req->header.status = FIELD_GET(PSP_CMDRESP_STS, cmd_reg);
if (req->header.status) {
ret = -EIO;
goto unlock;
diff --git a/drivers/crypto/ccp/psp-dev.c b/drivers/crypto/ccp/psp-dev.c
index 124a2e0c8999..56bf832c2947 100644
--- a/drivers/crypto/ccp/psp-dev.c
+++ b/drivers/crypto/ccp/psp-dev.c
@@ -156,11 +156,14 @@ static unsigned int psp_get_capability(struct psp_device *psp)
}
psp->capability = val;
- /* Detect if TSME and SME are both enabled */
+ /* Detect TSME and/or SME status */
if (PSP_CAPABILITY(psp, PSP_SECURITY_REPORTING) &&
- psp->capability & (PSP_SECURITY_TSME_STATUS << PSP_CAPABILITY_PSP_SECURITY_OFFSET) &&
- cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT))
- dev_notice(psp->dev, "psp: Both TSME and SME are active, SME is unnecessary when TSME is active.\n");
+ psp->capability & (PSP_SECURITY_TSME_STATUS << PSP_CAPABILITY_PSP_SECURITY_OFFSET)) {
+ if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT))
+ dev_notice(psp->dev, "psp: Both TSME and SME are active, SME is unnecessary when TSME is active.\n");
+ else
+ dev_notice(psp->dev, "psp: TSME enabled\n");
+ }
return 0;
}
diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c
index b04bc1d3d627..2102377f727b 100644
--- a/drivers/crypto/ccp/sev-dev.c
+++ b/drivers/crypto/ccp/sev-dev.c
@@ -21,14 +21,18 @@
#include <linux/hw_random.h>
#include <linux/ccp.h>
#include <linux/firmware.h>
+#include <linux/panic_notifier.h>
#include <linux/gfp.h>
#include <linux/cpufeature.h>
#include <linux/fs.h>
#include <linux/fs_struct.h>
#include <linux/psp.h>
+#include <linux/amd-iommu.h>
#include <asm/smp.h>
#include <asm/cacheflush.h>
+#include <asm/e820/types.h>
+#include <asm/sev.h>
#include "psp-dev.h"
#include "sev-dev.h"
@@ -37,6 +41,19 @@
#define SEV_FW_FILE "amd/sev.fw"
#define SEV_FW_NAME_SIZE 64
+/* Minimum firmware version required for the SEV-SNP support */
+#define SNP_MIN_API_MAJOR 1
+#define SNP_MIN_API_MINOR 51
+
+/*
+ * Maximum number of firmware-writable buffers that might be specified
+ * in the parameters of a legacy SEV command buffer.
+ */
+#define CMD_BUF_FW_WRITABLE_MAX 2
+
+/* Leave room in the descriptor array for an end-of-list indicator. */
+#define CMD_BUF_DESC_MAX (CMD_BUF_FW_WRITABLE_MAX + 1)
+
static DEFINE_MUTEX(sev_cmd_mutex);
static struct sev_misc_dev *misc_dev;
@@ -68,9 +85,14 @@ static int psp_timeout;
* The TMR is a 1MB area that must be 1MB aligned. Use the page allocator
* to allocate the memory, which will return aligned memory for the specified
* allocation order.
+ *
+ * When SEV-SNP is enabled the TMR needs to be 2MB aligned and 2MB sized.
*/
-#define SEV_ES_TMR_SIZE (1024 * 1024)
+#define SEV_TMR_SIZE (1024 * 1024)
+#define SNP_TMR_SIZE (2 * 1024 * 1024)
+
static void *sev_es_tmr;
+static size_t sev_es_tmr_size = SEV_TMR_SIZE;
/* INIT_EX NV Storage:
* The NV Storage is a 32Kb area and must be 4Kb page aligned. Use the page
@@ -80,6 +102,13 @@ static void *sev_es_tmr;
#define NV_LENGTH (32 * 1024)
static void *sev_init_ex_buffer;
+/*
+ * SEV_DATA_RANGE_LIST:
+ * Array containing range of pages that firmware transitions to HV-fixed
+ * page state.
+ */
+static struct sev_data_range_list *snp_range_list;
+
static inline bool sev_version_greater_or_equal(u8 maj, u8 min)
{
struct sev_device *sev = psp_master->sev_data;
@@ -115,6 +144,25 @@ static int sev_wait_cmd_ioc(struct sev_device *sev,
{
int ret;
+ /*
+ * If invoked during panic handling, local interrupts are disabled,
+ * so the PSP command completion interrupt can't be used. Poll for
+ * PSP command completion instead.
+ */
+ if (irqs_disabled()) {
+ unsigned long timeout_usecs = (timeout * USEC_PER_SEC) / 10;
+
+ /* Poll for SEV command completion: */
+ while (timeout_usecs--) {
+ *reg = ioread32(sev->io_regs + sev->vdata->cmdresp_reg);
+ if (*reg & PSP_CMDRESP_RESP)
+ return 0;
+
+ udelay(10);
+ }
+ return -ETIMEDOUT;
+ }
+
ret = wait_event_timeout(sev->int_queue,
sev->int_rcvd, timeout * HZ);
if (!ret)
@@ -130,6 +178,8 @@ static int sev_cmd_buffer_len(int cmd)
switch (cmd) {
case SEV_CMD_INIT: return sizeof(struct sev_data_init);
case SEV_CMD_INIT_EX: return sizeof(struct sev_data_init_ex);
+ case SEV_CMD_SNP_SHUTDOWN_EX: return sizeof(struct sev_data_snp_shutdown_ex);
+ case SEV_CMD_SNP_INIT_EX: return sizeof(struct sev_data_snp_init_ex);
case SEV_CMD_PLATFORM_STATUS: return sizeof(struct sev_user_data_status);
case SEV_CMD_PEK_CSR: return sizeof(struct sev_data_pek_csr);
case SEV_CMD_PEK_CERT_IMPORT: return sizeof(struct sev_data_pek_cert_import);
@@ -158,23 +208,27 @@ static int sev_cmd_buffer_len(int cmd)
case SEV_CMD_GET_ID: return sizeof(struct sev_data_get_id);
case SEV_CMD_ATTESTATION_REPORT: return sizeof(struct sev_data_attestation_report);
case SEV_CMD_SEND_CANCEL: return sizeof(struct sev_data_send_cancel);
+ case SEV_CMD_SNP_GCTX_CREATE: return sizeof(struct sev_data_snp_addr);
+ case SEV_CMD_SNP_LAUNCH_START: return sizeof(struct sev_data_snp_launch_start);
+ case SEV_CMD_SNP_LAUNCH_UPDATE: return sizeof(struct sev_data_snp_launch_update);
+ case SEV_CMD_SNP_ACTIVATE: return sizeof(struct sev_data_snp_activate);
+ case SEV_CMD_SNP_DECOMMISSION: return sizeof(struct sev_data_snp_addr);
+ case SEV_CMD_SNP_PAGE_RECLAIM: return sizeof(struct sev_data_snp_page_reclaim);
+ case SEV_CMD_SNP_GUEST_STATUS: return sizeof(struct sev_data_snp_guest_status);
+ case SEV_CMD_SNP_LAUNCH_FINISH: return sizeof(struct sev_data_snp_launch_finish);
+ case SEV_CMD_SNP_DBG_DECRYPT: return sizeof(struct sev_data_snp_dbg);
+ case SEV_CMD_SNP_DBG_ENCRYPT: return sizeof(struct sev_data_snp_dbg);
+ case SEV_CMD_SNP_PAGE_UNSMASH: return sizeof(struct sev_data_snp_page_unsmash);
+ case SEV_CMD_SNP_PLATFORM_STATUS: return sizeof(struct sev_data_snp_addr);
+ case SEV_CMD_SNP_GUEST_REQUEST: return sizeof(struct sev_data_snp_guest_request);
+ case SEV_CMD_SNP_CONFIG: return sizeof(struct sev_user_data_snp_config);
+ case SEV_CMD_SNP_COMMIT: return sizeof(struct sev_data_snp_commit);
default: return 0;
}
return 0;
}
-static void *sev_fw_alloc(unsigned long len)
-{
- struct page *page;
-
- page = alloc_pages(GFP_KERNEL, get_order(len));
- if (!page)
- return NULL;
-
- return page_address(page);
-}
-
static struct file *open_file_as_root(const char *filename, int flags, umode_t mode)
{
struct file *fp;
@@ -305,13 +359,485 @@ static int sev_write_init_ex_file_if_required(int cmd_id)
return sev_write_init_ex_file();
}
+/*
+ * snp_reclaim_pages() needs __sev_do_cmd_locked(), and __sev_do_cmd_locked()
+ * needs snp_reclaim_pages(), so a forward declaration is needed.
+ */
+static int __sev_do_cmd_locked(int cmd, void *data, int *psp_ret);
+
+static int snp_reclaim_pages(unsigned long paddr, unsigned int npages, bool locked)
+{
+ int ret, err, i;
+
+ paddr = __sme_clr(ALIGN_DOWN(paddr, PAGE_SIZE));
+
+ for (i = 0; i < npages; i++, paddr += PAGE_SIZE) {
+ struct sev_data_snp_page_reclaim data = {0};
+
+ data.paddr = paddr;
+
+ if (locked)
+ ret = __sev_do_cmd_locked(SEV_CMD_SNP_PAGE_RECLAIM, &data, &err);
+ else
+ ret = sev_do_cmd(SEV_CMD_SNP_PAGE_RECLAIM, &data, &err);
+
+ if (ret)
+ goto cleanup;
+
+ ret = rmp_make_shared(__phys_to_pfn(paddr), PG_LEVEL_4K);
+ if (ret)
+ goto cleanup;
+ }
+
+ return 0;
+
+cleanup:
+ /*
+ * If there was a failure reclaiming the page then it is no longer safe
+ * to release it back to the system; leak it instead.
+ */
+ snp_leak_pages(__phys_to_pfn(paddr), npages - i);
+ return ret;
+}
+
+static int rmp_mark_pages_firmware(unsigned long paddr, unsigned int npages, bool locked)
+{
+ unsigned long pfn = __sme_clr(paddr) >> PAGE_SHIFT;
+ int rc, i;
+
+ for (i = 0; i < npages; i++, pfn++) {
+ rc = rmp_make_private(pfn, 0, PG_LEVEL_4K, 0, true);
+ if (rc)
+ goto cleanup;
+ }
+
+ return 0;
+
+cleanup:
+ /*
+ * Try unrolling the firmware state changes by
+ * reclaiming the pages which were already changed to the
+ * firmware state.
+ */
+ snp_reclaim_pages(paddr, i, locked);
+
+ return rc;
+}
+
+static struct page *__snp_alloc_firmware_pages(gfp_t gfp_mask, int order)
+{
+ unsigned long npages = 1ul << order, paddr;
+ struct sev_device *sev;
+ struct page *page;
+
+ if (!psp_master || !psp_master->sev_data)
+ return NULL;
+
+ page = alloc_pages(gfp_mask, order);
+ if (!page)
+ return NULL;
+
+ /* If SEV-SNP is initialized then add the page in RMP table. */
+ sev = psp_master->sev_data;
+ if (!sev->snp_initialized)
+ return page;
+
+ paddr = __pa((unsigned long)page_address(page));
+ if (rmp_mark_pages_firmware(paddr, npages, false))
+ return NULL;
+
+ return page;
+}
+
+void *snp_alloc_firmware_page(gfp_t gfp_mask)
+{
+ struct page *page;
+
+ page = __snp_alloc_firmware_pages(gfp_mask, 0);
+
+ return page ? page_address(page) : NULL;
+}
+EXPORT_SYMBOL_GPL(snp_alloc_firmware_page);
+
+static void __snp_free_firmware_pages(struct page *page, int order, bool locked)
+{
+ struct sev_device *sev = psp_master->sev_data;
+ unsigned long paddr, npages = 1ul << order;
+
+ if (!page)
+ return;
+
+ paddr = __pa((unsigned long)page_address(page));
+ if (sev->snp_initialized &&
+ snp_reclaim_pages(paddr, npages, locked))
+ return;
+
+ __free_pages(page, order);
+}
+
+void snp_free_firmware_page(void *addr)
+{
+ if (!addr)
+ return;
+
+ __snp_free_firmware_pages(virt_to_page(addr), 0, false);
+}
+EXPORT_SYMBOL_GPL(snp_free_firmware_page);
+
+static void *sev_fw_alloc(unsigned long len)
+{
+ struct page *page;
+
+ page = __snp_alloc_firmware_pages(GFP_KERNEL, get_order(len));
+ if (!page)
+ return NULL;
+
+ return page_address(page);
+}
+
+/**
+ * struct cmd_buf_desc - descriptors for managing legacy SEV command address
+ * parameters corresponding to buffers that may be written to by firmware.
+ *
+ * @paddr_ptr: pointer to the address parameter in the command buffer which may
+ * need to be saved/restored depending on whether a bounce buffer
+ * is used. In the case of a bounce buffer, the command buffer
+ * needs to be updated with the address of the new bounce buffer
+ * snp_map_cmd_buf_desc() has allocated specifically for it. Must
+ * be NULL if this descriptor is only an end-of-list indicator.
+ *
+ * @paddr_orig: storage for the original address parameter, which can be used to
+ * restore the original value in @paddr_ptr in cases where it is
+ * replaced with the address of a bounce buffer.
+ *
+ * @len: length of buffer located at the address originally stored at @paddr_ptr
+ *
+ * @guest_owned: true if the address corresponds to guest-owned pages, in which
+ * case bounce buffers are not needed.
+ */
+struct cmd_buf_desc {
+ u64 *paddr_ptr;
+ u64 paddr_orig;
+ u32 len;
+ bool guest_owned;
+};
+
+/*
+ * If a legacy SEV command parameter is a memory address, those pages in
+ * turn need to be transitioned to/from firmware-owned before/after
+ * executing the firmware command.
+ *
+ * Additionally, in cases where those pages are not guest-owned, a bounce
+ * buffer is needed in place of the original memory address parameter.
+ *
+ * A set of descriptors are used to keep track of this handling, and
+ * initialized here based on the specific commands being executed.
+ */
+static void snp_populate_cmd_buf_desc_list(int cmd, void *cmd_buf,
+ struct cmd_buf_desc *desc_list)
+{
+ switch (cmd) {
+ case SEV_CMD_PDH_CERT_EXPORT: {
+ struct sev_data_pdh_cert_export *data = cmd_buf;
+
+ desc_list[0].paddr_ptr = &data->pdh_cert_address;
+ desc_list[0].len = data->pdh_cert_len;
+ desc_list[1].paddr_ptr = &data->cert_chain_address;
+ desc_list[1].len = data->cert_chain_len;
+ break;
+ }
+ case SEV_CMD_GET_ID: {
+ struct sev_data_get_id *data = cmd_buf;
+
+ desc_list[0].paddr_ptr = &data->address;
+ desc_list[0].len = data->len;
+ break;
+ }
+ case SEV_CMD_PEK_CSR: {
+ struct sev_data_pek_csr *data = cmd_buf;
+
+ desc_list[0].paddr_ptr = &data->address;
+ desc_list[0].len = data->len;
+ break;
+ }
+ case SEV_CMD_LAUNCH_UPDATE_DATA: {
+ struct sev_data_launch_update_data *data = cmd_buf;
+
+ desc_list[0].paddr_ptr = &data->address;
+ desc_list[0].len = data->len;
+ desc_list[0].guest_owned = true;
+ break;
+ }
+ case SEV_CMD_LAUNCH_UPDATE_VMSA: {
+ struct sev_data_launch_update_vmsa *data = cmd_buf;
+
+ desc_list[0].paddr_ptr = &data->address;
+ desc_list[0].len = data->len;
+ desc_list[0].guest_owned = true;
+ break;
+ }
+ case SEV_CMD_LAUNCH_MEASURE: {
+ struct sev_data_launch_measure *data = cmd_buf;
+
+ desc_list[0].paddr_ptr = &data->address;
+ desc_list[0].len = data->len;
+ break;
+ }
+ case SEV_CMD_LAUNCH_UPDATE_SECRET: {
+ struct sev_data_launch_secret *data = cmd_buf;
+
+ desc_list[0].paddr_ptr = &data->guest_address;
+ desc_list[0].len = data->guest_len;
+ desc_list[0].guest_owned = true;
+ break;
+ }
+ case SEV_CMD_DBG_DECRYPT: {
+ struct sev_data_dbg *data = cmd_buf;
+
+ desc_list[0].paddr_ptr = &data->dst_addr;
+ desc_list[0].len = data->len;
+ desc_list[0].guest_owned = true;
+ break;
+ }
+ case SEV_CMD_DBG_ENCRYPT: {
+ struct sev_data_dbg *data = cmd_buf;
+
+ desc_list[0].paddr_ptr = &data->dst_addr;
+ desc_list[0].len = data->len;
+ desc_list[0].guest_owned = true;
+ break;
+ }
+ case SEV_CMD_ATTESTATION_REPORT: {
+ struct sev_data_attestation_report *data = cmd_buf;
+
+ desc_list[0].paddr_ptr = &data->address;
+ desc_list[0].len = data->len;
+ break;
+ }
+ case SEV_CMD_SEND_START: {
+ struct sev_data_send_start *data = cmd_buf;
+
+ desc_list[0].paddr_ptr = &data->session_address;
+ desc_list[0].len = data->session_len;
+ break;
+ }
+ case SEV_CMD_SEND_UPDATE_DATA: {
+ struct sev_data_send_update_data *data = cmd_buf;
+
+ desc_list[0].paddr_ptr = &data->hdr_address;
+ desc_list[0].len = data->hdr_len;
+ desc_list[1].paddr_ptr = &data->trans_address;
+ desc_list[1].len = data->trans_len;
+ break;
+ }
+ case SEV_CMD_SEND_UPDATE_VMSA: {
+ struct sev_data_send_update_vmsa *data = cmd_buf;
+
+ desc_list[0].paddr_ptr = &data->hdr_address;
+ desc_list[0].len = data->hdr_len;
+ desc_list[1].paddr_ptr = &data->trans_address;
+ desc_list[1].len = data->trans_len;
+ break;
+ }
+ case SEV_CMD_RECEIVE_UPDATE_DATA: {
+ struct sev_data_receive_update_data *data = cmd_buf;
+
+ desc_list[0].paddr_ptr = &data->guest_address;
+ desc_list[0].len = data->guest_len;
+ desc_list[0].guest_owned = true;
+ break;
+ }
+ case SEV_CMD_RECEIVE_UPDATE_VMSA: {
+ struct sev_data_receive_update_vmsa *data = cmd_buf;
+
+ desc_list[0].paddr_ptr = &data->guest_address;
+ desc_list[0].len = data->guest_len;
+ desc_list[0].guest_owned = true;
+ break;
+ }
+ default:
+ break;
+ }
+}
+
+static int snp_map_cmd_buf_desc(struct cmd_buf_desc *desc)
+{
+ unsigned int npages;
+
+ if (!desc->len)
+ return 0;
+
+ /* Allocate a bounce buffer if this isn't a guest owned page. */
+ if (!desc->guest_owned) {
+ struct page *page;
+
+ page = alloc_pages(GFP_KERNEL_ACCOUNT, get_order(desc->len));
+ if (!page) {
+ pr_warn("Failed to allocate bounce buffer for SEV legacy command.\n");
+ return -ENOMEM;
+ }
+
+ desc->paddr_orig = *desc->paddr_ptr;
+ *desc->paddr_ptr = __psp_pa(page_to_virt(page));
+ }
+
+ npages = PAGE_ALIGN(desc->len) >> PAGE_SHIFT;
+
+ /* Transition the buffer to firmware-owned. */
+ if (rmp_mark_pages_firmware(*desc->paddr_ptr, npages, true)) {
+ pr_warn("Error moving pages to firmware-owned state for SEV legacy command.\n");
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+static int snp_unmap_cmd_buf_desc(struct cmd_buf_desc *desc)
+{
+ unsigned int npages;
+
+ if (!desc->len)
+ return 0;
+
+ npages = PAGE_ALIGN(desc->len) >> PAGE_SHIFT;
+
+ /* Transition the buffers back to hypervisor-owned. */
+ if (snp_reclaim_pages(*desc->paddr_ptr, npages, true)) {
+ pr_warn("Failed to reclaim firmware-owned pages while issuing SEV legacy command.\n");
+ return -EFAULT;
+ }
+
+ /* Copy data from bounce buffer and then free it. */
+ if (!desc->guest_owned) {
+ void *bounce_buf = __va(__sme_clr(*desc->paddr_ptr));
+ void *dst_buf = __va(__sme_clr(desc->paddr_orig));
+
+ memcpy(dst_buf, bounce_buf, desc->len);
+ __free_pages(virt_to_page(bounce_buf), get_order(desc->len));
+
+ /* Restore the original address in the command buffer. */
+ *desc->paddr_ptr = desc->paddr_orig;
+ }
+
+ return 0;
+}
+
+static int snp_map_cmd_buf_desc_list(int cmd, void *cmd_buf, struct cmd_buf_desc *desc_list)
+{
+ int i;
+
+ snp_populate_cmd_buf_desc_list(cmd, cmd_buf, desc_list);
+
+ for (i = 0; i < CMD_BUF_DESC_MAX; i++) {
+ struct cmd_buf_desc *desc = &desc_list[i];
+
+ if (!desc->paddr_ptr)
+ break;
+
+ if (snp_map_cmd_buf_desc(desc))
+ goto err_unmap;
+ }
+
+ return 0;
+
+err_unmap:
+ for (i--; i >= 0; i--)
+ snp_unmap_cmd_buf_desc(&desc_list[i]);
+
+ return -EFAULT;
+}
+
+static int snp_unmap_cmd_buf_desc_list(struct cmd_buf_desc *desc_list)
+{
+ int i, ret = 0;
+
+ for (i = 0; i < CMD_BUF_DESC_MAX; i++) {
+ struct cmd_buf_desc *desc = &desc_list[i];
+
+ if (!desc->paddr_ptr)
+ break;
+
+ if (snp_unmap_cmd_buf_desc(&desc_list[i]))
+ ret = -EFAULT;
+ }
+
+ return ret;
+}
+
+static bool sev_cmd_buf_writable(int cmd)
+{
+ switch (cmd) {
+ case SEV_CMD_PLATFORM_STATUS:
+ case SEV_CMD_GUEST_STATUS:
+ case SEV_CMD_LAUNCH_START:
+ case SEV_CMD_RECEIVE_START:
+ case SEV_CMD_LAUNCH_MEASURE:
+ case SEV_CMD_SEND_START:
+ case SEV_CMD_SEND_UPDATE_DATA:
+ case SEV_CMD_SEND_UPDATE_VMSA:
+ case SEV_CMD_PEK_CSR:
+ case SEV_CMD_PDH_CERT_EXPORT:
+ case SEV_CMD_GET_ID:
+ case SEV_CMD_ATTESTATION_REPORT:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/* After SNP is INIT'ed, the behavior of legacy SEV commands is changed. */
+static bool snp_legacy_handling_needed(int cmd)
+{
+ struct sev_device *sev = psp_master->sev_data;
+
+ return cmd < SEV_CMD_SNP_INIT && sev->snp_initialized;
+}
+
+static int snp_prep_cmd_buf(int cmd, void *cmd_buf, struct cmd_buf_desc *desc_list)
+{
+ if (!snp_legacy_handling_needed(cmd))
+ return 0;
+
+ if (snp_map_cmd_buf_desc_list(cmd, cmd_buf, desc_list))
+ return -EFAULT;
+
+ /*
+ * Before command execution, the command buffer needs to be put into
+ * the firmware-owned state.
+ */
+ if (sev_cmd_buf_writable(cmd)) {
+ if (rmp_mark_pages_firmware(__pa(cmd_buf), 1, true))
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+static int snp_reclaim_cmd_buf(int cmd, void *cmd_buf)
+{
+ if (!snp_legacy_handling_needed(cmd))
+ return 0;
+
+ /*
+ * After command completion, the command buffer needs to be put back
+ * into the hypervisor-owned state.
+ */
+ if (sev_cmd_buf_writable(cmd))
+ if (snp_reclaim_pages(__pa(cmd_buf), 1, true))
+ return -EFAULT;
+
+ return 0;
+}
+
static int __sev_do_cmd_locked(int cmd, void *data, int *psp_ret)
{
+ struct cmd_buf_desc desc_list[CMD_BUF_DESC_MAX] = {0};
struct psp_device *psp = psp_master;
struct sev_device *sev;
unsigned int cmdbuff_hi, cmdbuff_lo;
unsigned int phys_lsb, phys_msb;
unsigned int reg, ret = 0;
+ void *cmd_buf;
int buf_len;
if (!psp || !psp->sev_data)
@@ -331,12 +857,47 @@ static int __sev_do_cmd_locked(int cmd, void *data, int *psp_ret)
* work for some memory, e.g. vmalloc'd addresses, and @data may not be
* physically contiguous.
*/
- if (data)
- memcpy(sev->cmd_buf, data, buf_len);
+ if (data) {
+ /*
+ * Commands are generally issued one at a time and require the
+ * sev_cmd_mutex, but there could be recursive firmware requests
+ * due to SEV_CMD_SNP_PAGE_RECLAIM needing to be issued while
+ * preparing buffers for another command. This is the only known
+ * case of nesting in the current code, so exactly one
+ * additional command buffer is available for that purpose.
+ */
+ if (!sev->cmd_buf_active) {
+ cmd_buf = sev->cmd_buf;
+ sev->cmd_buf_active = true;
+ } else if (!sev->cmd_buf_backup_active) {
+ cmd_buf = sev->cmd_buf_backup;
+ sev->cmd_buf_backup_active = true;
+ } else {
+ dev_err(sev->dev,
+ "SEV: too many firmware commands in progress, no command buffers available.\n");
+ return -EBUSY;
+ }
+
+ memcpy(cmd_buf, data, buf_len);
+
+ /*
+ * The behavior of the SEV-legacy commands is altered when the
+ * SNP firmware is in the INIT state.
+ */
+ ret = snp_prep_cmd_buf(cmd, cmd_buf, desc_list);
+ if (ret) {
+ dev_err(sev->dev,
+ "SEV: failed to prepare buffer for legacy command 0x%x. Error: %d\n",
+ cmd, ret);
+ return ret;
+ }
+ } else {
+ cmd_buf = sev->cmd_buf;
+ }
/* Get the physical address of the command buffer */
- phys_lsb = data ? lower_32_bits(__psp_pa(sev->cmd_buf)) : 0;
- phys_msb = data ? upper_32_bits(__psp_pa(sev->cmd_buf)) : 0;
+ phys_lsb = data ? lower_32_bits(__psp_pa(cmd_buf)) : 0;
+ phys_msb = data ? upper_32_bits(__psp_pa(cmd_buf)) : 0;
dev_dbg(sev->dev, "sev command id %#x buffer 0x%08x%08x timeout %us\n",
cmd, phys_msb, phys_lsb, psp_timeout);
@@ -390,20 +951,41 @@ static int __sev_do_cmd_locked(int cmd, void *data, int *psp_ret)
ret = sev_write_init_ex_file_if_required(cmd);
}
- print_hex_dump_debug("(out): ", DUMP_PREFIX_OFFSET, 16, 2, data,
- buf_len, false);
-
/*
* Copy potential output from the PSP back to data. Do this even on
* failure in case the caller wants to glean something from the error.
*/
- if (data)
- memcpy(data, sev->cmd_buf, buf_len);
+ if (data) {
+ int ret_reclaim;
+ /*
+ * Restore the page state after the command completes.
+ */
+ ret_reclaim = snp_reclaim_cmd_buf(cmd, cmd_buf);
+ if (ret_reclaim) {
+ dev_err(sev->dev,
+ "SEV: failed to reclaim buffer for legacy command %#x. Error: %d\n",
+ cmd, ret_reclaim);
+ return ret_reclaim;
+ }
+
+ memcpy(data, cmd_buf, buf_len);
+
+ if (sev->cmd_buf_backup_active)
+ sev->cmd_buf_backup_active = false;
+ else
+ sev->cmd_buf_active = false;
+
+ if (snp_unmap_cmd_buf_desc_list(desc_list))
+ return -EFAULT;
+ }
+
+ print_hex_dump_debug("(out): ", DUMP_PREFIX_OFFSET, 16, 2, data,
+ buf_len, false);
return ret;
}
-static int sev_do_cmd(int cmd, void *data, int *psp_ret)
+int sev_do_cmd(int cmd, void *data, int *psp_ret)
{
int rc;
@@ -413,6 +995,7 @@ static int sev_do_cmd(int cmd, void *data, int *psp_ret)
return rc;
}
+EXPORT_SYMBOL_GPL(sev_do_cmd);
static int __sev_init_locked(int *error)
{
@@ -427,7 +1010,7 @@ static int __sev_init_locked(int *error)
data.tmr_address = __pa(sev_es_tmr);
data.flags |= SEV_INIT_FLAGS_SEV_ES;
- data.tmr_len = SEV_ES_TMR_SIZE;
+ data.tmr_len = sev_es_tmr_size;
}
return __sev_do_cmd_locked(SEV_CMD_INIT, &data, error);
@@ -450,7 +1033,7 @@ static int __sev_init_ex_locked(int *error)
data.tmr_address = __pa(sev_es_tmr);
data.flags |= SEV_INIT_FLAGS_SEV_ES;
- data.tmr_len = SEV_ES_TMR_SIZE;
+ data.tmr_len = sev_es_tmr_size;
}
return __sev_do_cmd_locked(SEV_CMD_INIT_EX, &data, error);
@@ -464,26 +1047,218 @@ static inline int __sev_do_init_locked(int *psp_ret)
return __sev_init_locked(psp_ret);
}
-static int __sev_platform_init_locked(int *error)
+static void snp_set_hsave_pa(void *arg)
+{
+ wrmsrl(MSR_VM_HSAVE_PA, 0);
+}
+
+static int snp_filter_reserved_mem_regions(struct resource *rs, void *arg)
+{
+ struct sev_data_range_list *range_list = arg;
+ struct sev_data_range *range = &range_list->ranges[range_list->num_elements];
+ size_t size;
+
+ /*
+ * Ensure the list of HV_FIXED pages that will be passed to firmware
+ * do not exceed the page-sized argument buffer.
+ */
+ if ((range_list->num_elements * sizeof(struct sev_data_range) +
+ sizeof(struct sev_data_range_list)) > PAGE_SIZE)
+ return -E2BIG;
+
+ switch (rs->desc) {
+ case E820_TYPE_RESERVED:
+ case E820_TYPE_PMEM:
+ case E820_TYPE_ACPI:
+ range->base = rs->start & PAGE_MASK;
+ size = PAGE_ALIGN((rs->end + 1) - rs->start);
+ range->page_count = size >> PAGE_SHIFT;
+ range_list->num_elements++;
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int __sev_snp_init_locked(int *error)
{
- int rc = 0, psp_ret = SEV_RET_NO_FW_CALL;
struct psp_device *psp = psp_master;
+ struct sev_data_snp_init_ex data;
struct sev_device *sev;
+ void *arg = &data;
+ int cmd, rc = 0;
- if (!psp || !psp->sev_data)
+ if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
return -ENODEV;
sev = psp->sev_data;
- if (sev->state == SEV_STATE_INIT)
+ if (sev->snp_initialized)
return 0;
- if (sev_init_ex_buffer) {
- rc = sev_read_init_ex_file();
- if (rc)
+ if (!sev_version_greater_or_equal(SNP_MIN_API_MAJOR, SNP_MIN_API_MINOR)) {
+ dev_dbg(sev->dev, "SEV-SNP support requires firmware version >= %d:%d\n",
+ SNP_MIN_API_MAJOR, SNP_MIN_API_MINOR);
+ return 0;
+ }
+
+ /* SNP_INIT requires MSR_VM_HSAVE_PA to be cleared on all CPUs. */
+ on_each_cpu(snp_set_hsave_pa, NULL, 1);
+
+ /*
+ * Starting in SNP firmware v1.52, the SNP_INIT_EX command takes a list
+ * of system physical address ranges to convert into HV-fixed page
+ * states during the RMP initialization. For instance, the memory that
+ * UEFI reserves should be included in the that list. This allows system
+ * components that occasionally write to memory (e.g. logging to UEFI
+ * reserved regions) to not fail due to RMP initialization and SNP
+ * enablement.
+ *
+ */
+ if (sev_version_greater_or_equal(SNP_MIN_API_MAJOR, 52)) {
+ /*
+ * Firmware checks that the pages containing the ranges enumerated
+ * in the RANGES structure are either in the default page state or in the
+ * firmware page state.
+ */
+ snp_range_list = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!snp_range_list) {
+ dev_err(sev->dev,
+ "SEV: SNP_INIT_EX range list memory allocation failed\n");
+ return -ENOMEM;
+ }
+
+ /*
+ * Retrieve all reserved memory regions from the e820 memory map
+ * to be setup as HV-fixed pages.
+ */
+ rc = walk_iomem_res_desc(IORES_DESC_NONE, IORESOURCE_MEM, 0, ~0,
+ snp_range_list, snp_filter_reserved_mem_regions);
+ if (rc) {
+ dev_err(sev->dev,
+ "SEV: SNP_INIT_EX walk_iomem_res_desc failed rc = %d\n", rc);
return rc;
+ }
+
+ memset(&data, 0, sizeof(data));
+ data.init_rmp = 1;
+ data.list_paddr_en = 1;
+ data.list_paddr = __psp_pa(snp_range_list);
+ cmd = SEV_CMD_SNP_INIT_EX;
+ } else {
+ cmd = SEV_CMD_SNP_INIT;
+ arg = NULL;
+ }
+
+ /*
+ * The following sequence must be issued before launching the first SNP
+ * guest to ensure all dirty cache lines are flushed, including from
+ * updates to the RMP table itself via the RMPUPDATE instruction:
+ *
+ * - WBINVD on all running CPUs
+ * - SEV_CMD_SNP_INIT[_EX] firmware command
+ * - WBINVD on all running CPUs
+ * - SEV_CMD_SNP_DF_FLUSH firmware command
+ */
+ wbinvd_on_all_cpus();
+
+ rc = __sev_do_cmd_locked(cmd, arg, error);
+ if (rc)
+ return rc;
+
+ /* Prepare for first SNP guest launch after INIT. */
+ wbinvd_on_all_cpus();
+ rc = __sev_do_cmd_locked(SEV_CMD_SNP_DF_FLUSH, NULL, error);
+ if (rc)
+ return rc;
+
+ sev->snp_initialized = true;
+ dev_dbg(sev->dev, "SEV-SNP firmware initialized\n");
+
+ sev_es_tmr_size = SNP_TMR_SIZE;
+
+ return rc;
+}
+
+static void __sev_platform_init_handle_tmr(struct sev_device *sev)
+{
+ if (sev_es_tmr)
+ return;
+
+ /* Obtain the TMR memory area for SEV-ES use */
+ sev_es_tmr = sev_fw_alloc(sev_es_tmr_size);
+ if (sev_es_tmr) {
+ /* Must flush the cache before giving it to the firmware */
+ if (!sev->snp_initialized)
+ clflush_cache_range(sev_es_tmr, sev_es_tmr_size);
+ } else {
+ dev_warn(sev->dev, "SEV: TMR allocation failed, SEV-ES support unavailable\n");
+ }
+}
+
+/*
+ * If an init_ex_path is provided allocate a buffer for the file and
+ * read in the contents. Additionally, if SNP is initialized, convert
+ * the buffer pages to firmware pages.
+ */
+static int __sev_platform_init_handle_init_ex_path(struct sev_device *sev)
+{
+ struct page *page;
+ int rc;
+
+ if (!init_ex_path)
+ return 0;
+
+ if (sev_init_ex_buffer)
+ return 0;
+
+ page = alloc_pages(GFP_KERNEL, get_order(NV_LENGTH));
+ if (!page) {
+ dev_err(sev->dev, "SEV: INIT_EX NV memory allocation failed\n");
+ return -ENOMEM;
+ }
+
+ sev_init_ex_buffer = page_address(page);
+
+ rc = sev_read_init_ex_file();
+ if (rc)
+ return rc;
+
+ /* If SEV-SNP is initialized, transition to firmware page. */
+ if (sev->snp_initialized) {
+ unsigned long npages;
+
+ npages = 1UL << get_order(NV_LENGTH);
+ if (rmp_mark_pages_firmware(__pa(sev_init_ex_buffer), npages, false)) {
+ dev_err(sev->dev, "SEV: INIT_EX NV memory page state change failed.\n");
+ return -ENOMEM;
+ }
}
+ return 0;
+}
+
+static int __sev_platform_init_locked(int *error)
+{
+ int rc, psp_ret = SEV_RET_NO_FW_CALL;
+ struct sev_device *sev;
+
+ if (!psp_master || !psp_master->sev_data)
+ return -ENODEV;
+
+ sev = psp_master->sev_data;
+
+ if (sev->state == SEV_STATE_INIT)
+ return 0;
+
+ __sev_platform_init_handle_tmr(sev);
+
+ rc = __sev_platform_init_handle_init_ex_path(sev);
+ if (rc)
+ return rc;
+
rc = __sev_do_init_locked(&psp_ret);
if (rc && psp_ret == SEV_RET_SECURE_DATA_INVALID) {
/*
@@ -520,12 +1295,46 @@ static int __sev_platform_init_locked(int *error)
return 0;
}
-int sev_platform_init(int *error)
+static int _sev_platform_init_locked(struct sev_platform_init_args *args)
+{
+ struct sev_device *sev;
+ int rc;
+
+ if (!psp_master || !psp_master->sev_data)
+ return -ENODEV;
+
+ sev = psp_master->sev_data;
+
+ if (sev->state == SEV_STATE_INIT)
+ return 0;
+
+ /*
+ * Legacy guests cannot be running while SNP_INIT(_EX) is executing,
+ * so perform SEV-SNP initialization at probe time.
+ */
+ rc = __sev_snp_init_locked(&args->error);
+ if (rc && rc != -ENODEV) {
+ /*
+ * Don't abort the probe if SNP INIT failed,
+ * continue to initialize the legacy SEV firmware.
+ */
+ dev_err(sev->dev, "SEV-SNP: failed to INIT rc %d, error %#x\n",
+ rc, args->error);
+ }
+
+ /* Defer legacy SEV/SEV-ES support if allowed by caller/module. */
+ if (args->probe && !psp_init_on_probe)
+ return 0;
+
+ return __sev_platform_init_locked(&args->error);
+}
+
+int sev_platform_init(struct sev_platform_init_args *args)
{
int rc;
mutex_lock(&sev_cmd_mutex);
- rc = __sev_platform_init_locked(error);
+ rc = _sev_platform_init_locked(args);
mutex_unlock(&sev_cmd_mutex);
return rc;
@@ -556,17 +1365,6 @@ static int __sev_platform_shutdown_locked(int *error)
return ret;
}
-static int sev_platform_shutdown(int *error)
-{
- int rc;
-
- mutex_lock(&sev_cmd_mutex);
- rc = __sev_platform_shutdown_locked(NULL);
- mutex_unlock(&sev_cmd_mutex);
-
- return rc;
-}
-
static int sev_get_platform_state(int *state, int *error)
{
struct sev_user_data_status data;
@@ -842,6 +1640,72 @@ fw_err:
return ret;
}
+static int __sev_snp_shutdown_locked(int *error, bool panic)
+{
+ struct sev_device *sev = psp_master->sev_data;
+ struct sev_data_snp_shutdown_ex data;
+ int ret;
+
+ if (!sev->snp_initialized)
+ return 0;
+
+ memset(&data, 0, sizeof(data));
+ data.len = sizeof(data);
+ data.iommu_snp_shutdown = 1;
+
+ /*
+ * If invoked during panic handling, local interrupts are disabled
+ * and all CPUs are stopped, so wbinvd_on_all_cpus() can't be called.
+ * In that case, a wbinvd() is done on remote CPUs via the NMI
+ * callback, so only a local wbinvd() is needed here.
+ */
+ if (!panic)
+ wbinvd_on_all_cpus();
+ else
+ wbinvd();
+
+ ret = __sev_do_cmd_locked(SEV_CMD_SNP_SHUTDOWN_EX, &data, error);
+ /* SHUTDOWN may require DF_FLUSH */
+ if (*error == SEV_RET_DFFLUSH_REQUIRED) {
+ ret = __sev_do_cmd_locked(SEV_CMD_SNP_DF_FLUSH, NULL, NULL);
+ if (ret) {
+ dev_err(sev->dev, "SEV-SNP DF_FLUSH failed\n");
+ return ret;
+ }
+ /* reissue the shutdown command */
+ ret = __sev_do_cmd_locked(SEV_CMD_SNP_SHUTDOWN_EX, &data,
+ error);
+ }
+ if (ret) {
+ dev_err(sev->dev, "SEV-SNP firmware shutdown failed\n");
+ return ret;
+ }
+
+ /*
+ * SNP_SHUTDOWN_EX with IOMMU_SNP_SHUTDOWN set to 1 disables SNP
+ * enforcement by the IOMMU and also transitions all pages
+ * associated with the IOMMU to the Reclaim state.
+ * Firmware was transitioning the IOMMU pages to Hypervisor state
+ * before version 1.53. But, accounting for the number of assigned
+ * 4kB pages in a 2M page was done incorrectly by not transitioning
+ * to the Reclaim state. This resulted in RMP #PF when later accessing
+ * the 2M page containing those pages during kexec boot. Hence, the
+ * firmware now transitions these pages to Reclaim state and hypervisor
+ * needs to transition these pages to shared state. SNP Firmware
+ * version 1.53 and above are needed for kexec boot.
+ */
+ ret = amd_iommu_snp_disable();
+ if (ret) {
+ dev_err(sev->dev, "SNP IOMMU shutdown failed\n");
+ return ret;
+ }
+
+ sev->snp_initialized = false;
+ dev_dbg(sev->dev, "SEV-SNP firmware shutdown\n");
+
+ return ret;
+}
+
static int sev_ioctl_do_pek_import(struct sev_issue_cmd *argp, bool writable)
{
struct sev_device *sev = psp_master->sev_data;
@@ -1084,6 +1948,85 @@ e_free_pdh:
return ret;
}
+static int sev_ioctl_do_snp_platform_status(struct sev_issue_cmd *argp)
+{
+ struct sev_device *sev = psp_master->sev_data;
+ struct sev_data_snp_addr buf;
+ struct page *status_page;
+ void *data;
+ int ret;
+
+ if (!sev->snp_initialized || !argp->data)
+ return -EINVAL;
+
+ status_page = alloc_page(GFP_KERNEL_ACCOUNT);
+ if (!status_page)
+ return -ENOMEM;
+
+ data = page_address(status_page);
+
+ /*
+ * Firmware expects status page to be in firmware-owned state, otherwise
+ * it will report firmware error code INVALID_PAGE_STATE (0x1A).
+ */
+ if (rmp_mark_pages_firmware(__pa(data), 1, true)) {
+ ret = -EFAULT;
+ goto cleanup;
+ }
+
+ buf.address = __psp_pa(data);
+ ret = __sev_do_cmd_locked(SEV_CMD_SNP_PLATFORM_STATUS, &buf, &argp->error);
+
+ /*
+ * Status page will be transitioned to Reclaim state upon success, or
+ * left in Firmware state in failure. Use snp_reclaim_pages() to
+ * transition either case back to Hypervisor-owned state.
+ */
+ if (snp_reclaim_pages(__pa(data), 1, true))
+ return -EFAULT;
+
+ if (ret)
+ goto cleanup;
+
+ if (copy_to_user((void __user *)argp->data, data,
+ sizeof(struct sev_user_data_snp_status)))
+ ret = -EFAULT;
+
+cleanup:
+ __free_pages(status_page, 0);
+ return ret;
+}
+
+static int sev_ioctl_do_snp_commit(struct sev_issue_cmd *argp)
+{
+ struct sev_device *sev = psp_master->sev_data;
+ struct sev_data_snp_commit buf;
+
+ if (!sev->snp_initialized)
+ return -EINVAL;
+
+ buf.len = sizeof(buf);
+
+ return __sev_do_cmd_locked(SEV_CMD_SNP_COMMIT, &buf, &argp->error);
+}
+
+static int sev_ioctl_do_snp_set_config(struct sev_issue_cmd *argp, bool writable)
+{
+ struct sev_device *sev = psp_master->sev_data;
+ struct sev_user_data_snp_config config;
+
+ if (!sev->snp_initialized || !argp->data)
+ return -EINVAL;
+
+ if (!writable)
+ return -EPERM;
+
+ if (copy_from_user(&config, (void __user *)argp->data, sizeof(config)))
+ return -EFAULT;
+
+ return __sev_do_cmd_locked(SEV_CMD_SNP_CONFIG, &config, &argp->error);
+}
+
static long sev_ioctl(struct file *file, unsigned int ioctl, unsigned long arg)
{
void __user *argp = (void __user *)arg;
@@ -1135,6 +2078,15 @@ static long sev_ioctl(struct file *file, unsigned int ioctl, unsigned long arg)
case SEV_GET_ID2:
ret = sev_ioctl_do_get_id2(&input);
break;
+ case SNP_PLATFORM_STATUS:
+ ret = sev_ioctl_do_snp_platform_status(&input);
+ break;
+ case SNP_COMMIT:
+ ret = sev_ioctl_do_snp_commit(&input);
+ break;
+ case SNP_SET_CONFIG:
+ ret = sev_ioctl_do_snp_set_config(&input, writable);
+ break;
default:
ret = -EINVAL;
goto out;
@@ -1245,10 +2197,12 @@ int sev_dev_init(struct psp_device *psp)
if (!sev)
goto e_err;
- sev->cmd_buf = (void *)devm_get_free_pages(dev, GFP_KERNEL, 0);
+ sev->cmd_buf = (void *)devm_get_free_pages(dev, GFP_KERNEL, 1);
if (!sev->cmd_buf)
goto e_sev;
+ sev->cmd_buf_backup = (uint8_t *)sev->cmd_buf + PAGE_SIZE;
+
psp->sev_data = sev;
sev->dev = dev;
@@ -1287,24 +2241,51 @@ e_err:
return ret;
}
-static void sev_firmware_shutdown(struct sev_device *sev)
+static void __sev_firmware_shutdown(struct sev_device *sev, bool panic)
{
- sev_platform_shutdown(NULL);
+ int error;
+
+ __sev_platform_shutdown_locked(NULL);
if (sev_es_tmr) {
- /* The TMR area was encrypted, flush it from the cache */
- wbinvd_on_all_cpus();
+ /*
+ * The TMR area was encrypted, flush it from the cache.
+ *
+ * If invoked during panic handling, local interrupts are
+ * disabled and all CPUs are stopped, so wbinvd_on_all_cpus()
+ * can't be used. In that case, wbinvd() is done on remote CPUs
+ * via the NMI callback, and done for this CPU later during
+ * SNP shutdown, so wbinvd_on_all_cpus() can be skipped.
+ */
+ if (!panic)
+ wbinvd_on_all_cpus();
- free_pages((unsigned long)sev_es_tmr,
- get_order(SEV_ES_TMR_SIZE));
+ __snp_free_firmware_pages(virt_to_page(sev_es_tmr),
+ get_order(sev_es_tmr_size),
+ true);
sev_es_tmr = NULL;
}
if (sev_init_ex_buffer) {
- free_pages((unsigned long)sev_init_ex_buffer,
- get_order(NV_LENGTH));
+ __snp_free_firmware_pages(virt_to_page(sev_init_ex_buffer),
+ get_order(NV_LENGTH),
+ true);
sev_init_ex_buffer = NULL;
}
+
+ if (snp_range_list) {
+ kfree(snp_range_list);
+ snp_range_list = NULL;
+ }
+
+ __sev_snp_shutdown_locked(&error, panic);
+}
+
+static void sev_firmware_shutdown(struct sev_device *sev)
+{
+ mutex_lock(&sev_cmd_mutex);
+ __sev_firmware_shutdown(sev, false);
+ mutex_unlock(&sev_cmd_mutex);
}
void sev_dev_destroy(struct psp_device *psp)
@@ -1322,6 +2303,29 @@ void sev_dev_destroy(struct psp_device *psp)
psp_clear_sev_irq_handler(psp);
}
+static int snp_shutdown_on_panic(struct notifier_block *nb,
+ unsigned long reason, void *arg)
+{
+ struct sev_device *sev = psp_master->sev_data;
+
+ /*
+ * If sev_cmd_mutex is already acquired, then it's likely
+ * another PSP command is in flight and issuing a shutdown
+ * would fail in unexpected ways. Rather than create even
+ * more confusion during a panic, just bail out here.
+ */
+ if (mutex_is_locked(&sev_cmd_mutex))
+ return NOTIFY_DONE;
+
+ __sev_firmware_shutdown(sev, true);
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block snp_panic_notifier = {
+ .notifier_call = snp_shutdown_on_panic,
+};
+
int sev_issue_cmd_external_user(struct file *filep, unsigned int cmd,
void *data, int *error)
{
@@ -1335,7 +2339,8 @@ EXPORT_SYMBOL_GPL(sev_issue_cmd_external_user);
void sev_pci_init(void)
{
struct sev_device *sev = psp_master->sev_data;
- int error, rc;
+ struct sev_platform_init_args args = {0};
+ int rc;
if (!sev)
return;
@@ -1348,36 +2353,18 @@ void sev_pci_init(void)
if (sev_update_firmware(sev->dev) == 0)
sev_get_api_version();
- /* If an init_ex_path is provided rely on INIT_EX for PSP initialization
- * instead of INIT.
- */
- if (init_ex_path) {
- sev_init_ex_buffer = sev_fw_alloc(NV_LENGTH);
- if (!sev_init_ex_buffer) {
- dev_err(sev->dev,
- "SEV: INIT_EX NV memory allocation failed\n");
- goto err;
- }
- }
-
- /* Obtain the TMR memory area for SEV-ES use */
- sev_es_tmr = sev_fw_alloc(SEV_ES_TMR_SIZE);
- if (sev_es_tmr)
- /* Must flush the cache before giving it to the firmware */
- clflush_cache_range(sev_es_tmr, SEV_ES_TMR_SIZE);
- else
- dev_warn(sev->dev,
- "SEV: TMR allocation failed, SEV-ES support unavailable\n");
-
- if (!psp_init_on_probe)
- return;
-
/* Initialize the platform */
- rc = sev_platform_init(&error);
+ args.probe = true;
+ rc = sev_platform_init(&args);
if (rc)
dev_err(sev->dev, "SEV: failed to INIT error %#x, rc %d\n",
- error, rc);
+ args.error, rc);
+ dev_info(sev->dev, "SEV%s API:%d.%d build:%d\n", sev->snp_initialized ?
+ "-SNP" : "", sev->api_major, sev->api_minor, sev->build);
+
+ atomic_notifier_chain_register(&panic_notifier_list,
+ &snp_panic_notifier);
return;
err:
@@ -1392,4 +2379,7 @@ void sev_pci_exit(void)
return;
sev_firmware_shutdown(sev);
+
+ atomic_notifier_chain_unregister(&panic_notifier_list,
+ &snp_panic_notifier);
}
diff --git a/drivers/crypto/ccp/sev-dev.h b/drivers/crypto/ccp/sev-dev.h
index 778c95155e74..3e4e5574e88a 100644
--- a/drivers/crypto/ccp/sev-dev.h
+++ b/drivers/crypto/ccp/sev-dev.h
@@ -52,6 +52,11 @@ struct sev_device {
u8 build;
void *cmd_buf;
+ void *cmd_buf_backup;
+ bool cmd_buf_active;
+ bool cmd_buf_backup_active;
+
+ bool snp_initialized;
};
int sev_dev_init(struct psp_device *psp);
diff --git a/drivers/crypto/ccp/sp-platform.c b/drivers/crypto/ccp/sp-platform.c
index 473301237760..ff6ceb4feee0 100644
--- a/drivers/crypto/ccp/sp-platform.c
+++ b/drivers/crypto/ccp/sp-platform.c
@@ -39,44 +39,38 @@ static const struct sp_dev_vdata dev_vdata[] = {
},
};
-#ifdef CONFIG_ACPI
static const struct acpi_device_id sp_acpi_match[] = {
{ "AMDI0C00", (kernel_ulong_t)&dev_vdata[0] },
{ },
};
MODULE_DEVICE_TABLE(acpi, sp_acpi_match);
-#endif
-#ifdef CONFIG_OF
static const struct of_device_id sp_of_match[] = {
{ .compatible = "amd,ccp-seattle-v1a",
.data = (const void *)&dev_vdata[0] },
{ },
};
MODULE_DEVICE_TABLE(of, sp_of_match);
-#endif
static struct sp_dev_vdata *sp_get_of_version(struct platform_device *pdev)
{
-#ifdef CONFIG_OF
const struct of_device_id *match;
match = of_match_node(sp_of_match, pdev->dev.of_node);
if (match && match->data)
return (struct sp_dev_vdata *)match->data;
-#endif
+
return NULL;
}
static struct sp_dev_vdata *sp_get_acpi_version(struct platform_device *pdev)
{
-#ifdef CONFIG_ACPI
const struct acpi_device_id *match;
match = acpi_match_device(sp_acpi_match, &pdev->dev);
if (match && match->driver_data)
return (struct sp_dev_vdata *)match->driver_data;
-#endif
+
return NULL;
}
@@ -212,12 +206,8 @@ static int sp_platform_resume(struct platform_device *pdev)
static struct platform_driver sp_platform_driver = {
.driver = {
.name = "ccp",
-#ifdef CONFIG_ACPI
.acpi_match_table = sp_acpi_match,
-#endif
-#ifdef CONFIG_OF
.of_match_table = sp_of_match,
-#endif
},
.probe = sp_platform_probe,
.remove_new = sp_platform_remove,
diff --git a/drivers/crypto/hisilicon/debugfs.c b/drivers/crypto/hisilicon/debugfs.c
index 80ed4b2d209c..1b9b7bccdeff 100644
--- a/drivers/crypto/hisilicon/debugfs.c
+++ b/drivers/crypto/hisilicon/debugfs.c
@@ -13,6 +13,7 @@
#define QM_DFX_COMMON_LEN 0xC3
#define QM_DFX_REGS_LEN 4UL
#define QM_DBG_TMP_BUF_LEN 22
+#define QM_XQC_ADDR_MASK GENMASK(31, 0)
#define CURRENT_FUN_MASK GENMASK(5, 0)
#define CURRENT_Q_MASK GENMASK(31, 16)
#define QM_SQE_ADDR_MASK GENMASK(7, 0)
@@ -24,6 +25,8 @@
#define QM_DFX_QN_SHIFT 16
#define QM_DFX_CNT_CLR_CE 0x100118
#define QM_DBG_WRITE_LEN 1024
+#define QM_IN_IDLE_ST_REG 0x1040e4
+#define QM_IN_IDLE_STATE 0x1
static const char * const qm_debug_file_name[] = {
[CURRENT_QM] = "current_qm",
@@ -81,6 +84,30 @@ static const struct debugfs_reg32 qm_dfx_regs[] = {
{"QM_DFX_FF_ST5 ", 0x1040dc},
{"QM_DFX_FF_ST6 ", 0x1040e0},
{"QM_IN_IDLE_ST ", 0x1040e4},
+ {"QM_CACHE_CTL ", 0x100050},
+ {"QM_TIMEOUT_CFG ", 0x100070},
+ {"QM_DB_TIMEOUT_CFG ", 0x100074},
+ {"QM_FLR_PENDING_TIME_CFG ", 0x100078},
+ {"QM_ARUSR_MCFG1 ", 0x100088},
+ {"QM_AWUSR_MCFG1 ", 0x100098},
+ {"QM_AXI_M_CFG_ENABLE ", 0x1000B0},
+ {"QM_RAS_CE_THRESHOLD ", 0x1000F8},
+ {"QM_AXI_TIMEOUT_CTRL ", 0x100120},
+ {"QM_AXI_TIMEOUT_STATUS ", 0x100124},
+ {"QM_CQE_AGGR_TIMEOUT_CTRL ", 0x100144},
+ {"ACC_RAS_MSI_INT_SEL ", 0x1040fc},
+ {"QM_CQE_OUT ", 0x104100},
+ {"QM_EQE_OUT ", 0x104104},
+ {"QM_AEQE_OUT ", 0x104108},
+ {"QM_DB_INFO0 ", 0x104180},
+ {"QM_DB_INFO1 ", 0x104184},
+ {"QM_AM_CTRL_GLOBAL ", 0x300000},
+ {"QM_AM_CURR_PORT_STS ", 0x300100},
+ {"QM_AM_CURR_TRANS_RETURN ", 0x300150},
+ {"QM_AM_CURR_RD_MAX_TXID ", 0x300154},
+ {"QM_AM_CURR_WR_MAX_TXID ", 0x300158},
+ {"QM_AM_ALARM_RRESP ", 0x300180},
+ {"QM_AM_ALARM_BRESP ", 0x300184},
};
static const struct debugfs_reg32 qm_vf_dfx_regs[] = {
@@ -141,7 +168,6 @@ static void dump_show(struct hisi_qm *qm, void *info,
static int qm_sqc_dump(struct hisi_qm *qm, char *s, char *name)
{
struct device *dev = &qm->pdev->dev;
- struct qm_sqc *sqc_curr;
struct qm_sqc sqc;
u32 qp_id;
int ret;
@@ -157,6 +183,8 @@ static int qm_sqc_dump(struct hisi_qm *qm, char *s, char *name)
ret = qm_set_and_get_xqc(qm, QM_MB_CMD_SQC, &sqc, qp_id, 1);
if (!ret) {
+ sqc.base_h = cpu_to_le32(QM_XQC_ADDR_MASK);
+ sqc.base_l = cpu_to_le32(QM_XQC_ADDR_MASK);
dump_show(qm, &sqc, sizeof(struct qm_sqc), name);
return 0;
@@ -164,9 +192,10 @@ static int qm_sqc_dump(struct hisi_qm *qm, char *s, char *name)
down_read(&qm->qps_lock);
if (qm->sqc) {
- sqc_curr = qm->sqc + qp_id;
-
- dump_show(qm, sqc_curr, sizeof(*sqc_curr), "SOFT SQC");
+ memcpy(&sqc, qm->sqc + qp_id * sizeof(struct qm_sqc), sizeof(struct qm_sqc));
+ sqc.base_h = cpu_to_le32(QM_XQC_ADDR_MASK);
+ sqc.base_l = cpu_to_le32(QM_XQC_ADDR_MASK);
+ dump_show(qm, &sqc, sizeof(struct qm_sqc), "SOFT SQC");
}
up_read(&qm->qps_lock);
@@ -176,7 +205,6 @@ static int qm_sqc_dump(struct hisi_qm *qm, char *s, char *name)
static int qm_cqc_dump(struct hisi_qm *qm, char *s, char *name)
{
struct device *dev = &qm->pdev->dev;
- struct qm_cqc *cqc_curr;
struct qm_cqc cqc;
u32 qp_id;
int ret;
@@ -192,6 +220,8 @@ static int qm_cqc_dump(struct hisi_qm *qm, char *s, char *name)
ret = qm_set_and_get_xqc(qm, QM_MB_CMD_CQC, &cqc, qp_id, 1);
if (!ret) {
+ cqc.base_h = cpu_to_le32(QM_XQC_ADDR_MASK);
+ cqc.base_l = cpu_to_le32(QM_XQC_ADDR_MASK);
dump_show(qm, &cqc, sizeof(struct qm_cqc), name);
return 0;
@@ -199,9 +229,10 @@ static int qm_cqc_dump(struct hisi_qm *qm, char *s, char *name)
down_read(&qm->qps_lock);
if (qm->cqc) {
- cqc_curr = qm->cqc + qp_id;
-
- dump_show(qm, cqc_curr, sizeof(*cqc_curr), "SOFT CQC");
+ memcpy(&cqc, qm->cqc + qp_id * sizeof(struct qm_cqc), sizeof(struct qm_cqc));
+ cqc.base_h = cpu_to_le32(QM_XQC_ADDR_MASK);
+ cqc.base_l = cpu_to_le32(QM_XQC_ADDR_MASK);
+ dump_show(qm, &cqc, sizeof(struct qm_cqc), "SOFT CQC");
}
up_read(&qm->qps_lock);
@@ -237,6 +268,10 @@ static int qm_eqc_aeqc_dump(struct hisi_qm *qm, char *s, char *name)
if (ret)
return ret;
+ aeqc.base_h = cpu_to_le32(QM_XQC_ADDR_MASK);
+ aeqc.base_l = cpu_to_le32(QM_XQC_ADDR_MASK);
+ eqc.base_h = cpu_to_le32(QM_XQC_ADDR_MASK);
+ eqc.base_l = cpu_to_le32(QM_XQC_ADDR_MASK);
dump_show(qm, xeqc, size, name);
return ret;
@@ -284,27 +319,26 @@ static int q_dump_param_parse(struct hisi_qm *qm, char *s,
static int qm_sq_dump(struct hisi_qm *qm, char *s, char *name)
{
- u16 sq_depth = qm->qp_array->cq_depth;
- void *sqe, *sqe_curr;
+ u16 sq_depth = qm->qp_array->sq_depth;
struct hisi_qp *qp;
u32 qp_id, sqe_id;
+ void *sqe;
int ret;
ret = q_dump_param_parse(qm, s, &sqe_id, &qp_id, sq_depth);
if (ret)
return ret;
- sqe = kzalloc(qm->sqe_size * sq_depth, GFP_KERNEL);
+ sqe = kzalloc(qm->sqe_size, GFP_KERNEL);
if (!sqe)
return -ENOMEM;
qp = &qm->qp_array[qp_id];
- memcpy(sqe, qp->sqe, qm->sqe_size * sq_depth);
- sqe_curr = sqe + (u32)(sqe_id * qm->sqe_size);
- memset(sqe_curr + qm->debug.sqe_mask_offset, QM_SQE_ADDR_MASK,
+ memcpy(sqe, qp->sqe + sqe_id * qm->sqe_size, qm->sqe_size);
+ memset(sqe + qm->debug.sqe_mask_offset, QM_SQE_ADDR_MASK,
qm->debug.sqe_mask_len);
- dump_show(qm, sqe_curr, qm->sqe_size, name);
+ dump_show(qm, sqe, qm->sqe_size, name);
kfree(sqe);
@@ -783,8 +817,14 @@ static void dfx_regs_uninit(struct hisi_qm *qm,
{
int i;
+ if (!dregs)
+ return;
+
/* Setting the pointer is NULL to prevent double free */
for (i = 0; i < reg_len; i++) {
+ if (!dregs[i].regs)
+ continue;
+
kfree(dregs[i].regs);
dregs[i].regs = NULL;
}
@@ -834,14 +874,21 @@ alloc_error:
static int qm_diff_regs_init(struct hisi_qm *qm,
struct dfx_diff_registers *dregs, u32 reg_len)
{
+ int ret;
+
qm->debug.qm_diff_regs = dfx_regs_init(qm, qm_diff_regs, ARRAY_SIZE(qm_diff_regs));
- if (IS_ERR(qm->debug.qm_diff_regs))
- return PTR_ERR(qm->debug.qm_diff_regs);
+ if (IS_ERR(qm->debug.qm_diff_regs)) {
+ ret = PTR_ERR(qm->debug.qm_diff_regs);
+ qm->debug.qm_diff_regs = NULL;
+ return ret;
+ }
qm->debug.acc_diff_regs = dfx_regs_init(qm, dregs, reg_len);
if (IS_ERR(qm->debug.acc_diff_regs)) {
dfx_regs_uninit(qm, qm->debug.qm_diff_regs, ARRAY_SIZE(qm_diff_regs));
- return PTR_ERR(qm->debug.acc_diff_regs);
+ ret = PTR_ERR(qm->debug.acc_diff_regs);
+ qm->debug.acc_diff_regs = NULL;
+ return ret;
}
return 0;
@@ -882,7 +929,9 @@ static int qm_last_regs_init(struct hisi_qm *qm)
static void qm_diff_regs_uninit(struct hisi_qm *qm, u32 reg_len)
{
dfx_regs_uninit(qm, qm->debug.acc_diff_regs, reg_len);
+ qm->debug.acc_diff_regs = NULL;
dfx_regs_uninit(qm, qm->debug.qm_diff_regs, ARRAY_SIZE(qm_diff_regs));
+ qm->debug.qm_diff_regs = NULL;
}
/**
@@ -1001,6 +1050,30 @@ static int qm_diff_regs_show(struct seq_file *s, void *unused)
}
DEFINE_SHOW_ATTRIBUTE(qm_diff_regs);
+static int qm_state_show(struct seq_file *s, void *unused)
+{
+ struct hisi_qm *qm = s->private;
+ u32 val;
+ int ret;
+
+ /* If device is in suspended, directly return the idle state. */
+ ret = hisi_qm_get_dfx_access(qm);
+ if (!ret) {
+ val = readl(qm->io_base + QM_IN_IDLE_ST_REG);
+ hisi_qm_put_dfx_access(qm);
+ } else if (ret == -EAGAIN) {
+ val = QM_IN_IDLE_STATE;
+ } else {
+ return ret;
+ }
+
+ seq_printf(s, "%u\n", val);
+
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(qm_state);
+
static ssize_t qm_status_read(struct file *filp, char __user *buffer,
size_t count, loff_t *pos)
{
@@ -1025,12 +1098,12 @@ static void qm_create_debugfs_file(struct hisi_qm *qm, struct dentry *dir,
{
struct debugfs_file *file = qm->debug.files + index;
- debugfs_create_file(qm_debug_file_name[index], 0600, dir, file,
- &qm_debug_fops);
-
file->index = index;
mutex_init(&file->lock);
file->debug = &qm->debug;
+
+ debugfs_create_file(qm_debug_file_name[index], 0600, dir, file,
+ &qm_debug_fops);
}
static int qm_debugfs_atomic64_set(void *data, u64 val)
@@ -1062,6 +1135,7 @@ DEFINE_DEBUGFS_ATTRIBUTE(qm_atomic64_ops, qm_debugfs_atomic64_get,
void hisi_qm_debug_init(struct hisi_qm *qm)
{
struct dfx_diff_registers *qm_regs = qm->debug.qm_diff_regs;
+ struct qm_dev_dfx *dev_dfx = &qm->debug.dev_dfx;
struct qm_dfx *dfx = &qm->debug.dfx;
struct dentry *qm_d;
void *data;
@@ -1072,6 +1146,9 @@ void hisi_qm_debug_init(struct hisi_qm *qm)
/* only show this in PF */
if (qm->fun_type == QM_HW_PF) {
+ debugfs_create_file("qm_state", 0444, qm->debug.qm_d,
+ qm, &qm_state_fops);
+
qm_create_debugfs_file(qm, qm->debug.debug_root, CURRENT_QM);
for (i = CURRENT_Q; i < DEBUG_FILE_NUM; i++)
qm_create_debugfs_file(qm, qm->debug.qm_d, i);
@@ -1087,6 +1164,10 @@ void hisi_qm_debug_init(struct hisi_qm *qm)
debugfs_create_file("status", 0444, qm->debug.qm_d, qm,
&qm_status_fops);
+
+ debugfs_create_u32("dev_state", 0444, qm->debug.qm_d, &dev_dfx->dev_state);
+ debugfs_create_u32("dev_timeout", 0644, qm->debug.qm_d, &dev_dfx->dev_timeout);
+
for (i = 0; i < ARRAY_SIZE(qm_dfx_files); i++) {
data = (atomic64_t *)((uintptr_t)dfx + qm_dfx_files[i].offset);
debugfs_create_file(qm_dfx_files[i].name,
diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c
index 3255b2a070c7..10aa4da93323 100644
--- a/drivers/crypto/hisilicon/hpre/hpre_main.c
+++ b/drivers/crypto/hisilicon/hpre/hpre_main.c
@@ -106,7 +106,7 @@
#define HPRE_SHAPER_TYPE_RATE 640
#define HPRE_VIA_MSI_DSM 1
#define HPRE_SQE_MASK_OFFSET 8
-#define HPRE_SQE_MASK_LEN 24
+#define HPRE_SQE_MASK_LEN 44
#define HPRE_CTX_Q_NUM_DEF 1
#define HPRE_DFX_BASE 0x301000
@@ -440,7 +440,7 @@ MODULE_PARM_DESC(vfs_num, "Number of VFs to enable(1-63), 0(default)");
struct hisi_qp *hpre_create_qp(u8 type)
{
- int node = cpu_to_node(smp_processor_id());
+ int node = cpu_to_node(raw_smp_processor_id());
struct hisi_qp *qp = NULL;
int ret;
@@ -1074,41 +1074,40 @@ static int hpre_debugfs_init(struct hisi_qm *qm)
struct device *dev = &qm->pdev->dev;
int ret;
- qm->debug.debug_root = debugfs_create_dir(dev_name(dev),
- hpre_debugfs_root);
-
- qm->debug.sqe_mask_offset = HPRE_SQE_MASK_OFFSET;
- qm->debug.sqe_mask_len = HPRE_SQE_MASK_LEN;
ret = hisi_qm_regs_debugfs_init(qm, hpre_diff_regs, ARRAY_SIZE(hpre_diff_regs));
if (ret) {
dev_warn(dev, "Failed to init HPRE diff regs!\n");
- goto debugfs_remove;
+ return ret;
}
+ qm->debug.debug_root = debugfs_create_dir(dev_name(dev),
+ hpre_debugfs_root);
+ qm->debug.sqe_mask_offset = HPRE_SQE_MASK_OFFSET;
+ qm->debug.sqe_mask_len = HPRE_SQE_MASK_LEN;
+
hisi_qm_debug_init(qm);
if (qm->pdev->device == PCI_DEVICE_ID_HUAWEI_HPRE_PF) {
ret = hpre_ctrl_debug_init(qm);
if (ret)
- goto failed_to_create;
+ goto debugfs_remove;
}
hpre_dfx_debug_init(qm);
return 0;
-failed_to_create:
- hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(hpre_diff_regs));
debugfs_remove:
debugfs_remove_recursive(qm->debug.debug_root);
+ hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(hpre_diff_regs));
return ret;
}
static void hpre_debugfs_exit(struct hisi_qm *qm)
{
- hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(hpre_diff_regs));
-
debugfs_remove_recursive(qm->debug.debug_root);
+
+ hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(hpre_diff_regs));
}
static int hpre_pre_store_cap_reg(struct hisi_qm *qm)
diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index 4b20b94e6371..3dac8d8e8568 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -236,6 +236,12 @@
#define QM_DEV_ALG_MAX_LEN 256
+ /* abnormal status value for stopping queue */
+#define QM_STOP_QUEUE_FAIL 1
+#define QM_DUMP_SQC_FAIL 3
+#define QM_DUMP_CQC_FAIL 4
+#define QM_FINISH_WAIT 5
+
#define QM_MK_CQC_DW3_V1(hop_num, pg_sz, buf_sz, cqe_sz) \
(((hop_num) << QM_CQ_HOP_NUM_SHIFT) | \
((pg_sz) << QM_CQ_PAGE_SIZE_SHIFT) | \
@@ -312,6 +318,7 @@ static const struct hisi_qm_cap_info qm_cap_info_comm[] = {
{QM_SUPPORT_DB_ISOLATION, 0x30, 0, BIT(0), 0x0, 0x0, 0x0},
{QM_SUPPORT_FUNC_QOS, 0x3100, 0, BIT(8), 0x0, 0x0, 0x1},
{QM_SUPPORT_STOP_QP, 0x3100, 0, BIT(9), 0x0, 0x0, 0x1},
+ {QM_SUPPORT_STOP_FUNC, 0x3100, 0, BIT(10), 0x0, 0x0, 0x1},
{QM_SUPPORT_MB_COMMAND, 0x3100, 0, BIT(11), 0x0, 0x0, 0x1},
{QM_SUPPORT_SVA_PREFETCH, 0x3100, 0, BIT(14), 0x0, 0x0, 0x1},
};
@@ -638,6 +645,9 @@ int qm_set_and_get_xqc(struct hisi_qm *qm, u8 cmd, void *xqc, u32 qp_id, bool op
tmp_xqc = qm->xqc_buf.aeqc;
xqc_dma = qm->xqc_buf.aeqc_dma;
break;
+ default:
+ dev_err(&qm->pdev->dev, "unknown mailbox cmd %u\n", cmd);
+ return -EINVAL;
}
/* Setting xqc will fail if master OOO is blocked. */
@@ -1674,6 +1684,11 @@ unlock:
return ret;
}
+static int qm_drain_qm(struct hisi_qm *qm)
+{
+ return hisi_qm_mb(qm, QM_MB_CMD_FLUSH_QM, 0, 0, 0);
+}
+
static int qm_stop_qp(struct hisi_qp *qp)
{
return hisi_qm_mb(qp->qm, QM_MB_CMD_STOP_QP, 0, qp->qp_id, 0);
@@ -2031,43 +2046,25 @@ static void qp_stop_fail_cb(struct hisi_qp *qp)
}
}
-/**
- * qm_drain_qp() - Drain a qp.
- * @qp: The qp we want to drain.
- *
- * Determine whether the queue is cleared by judging the tail pointers of
- * sq and cq.
- */
-static int qm_drain_qp(struct hisi_qp *qp)
+static int qm_wait_qp_empty(struct hisi_qm *qm, u32 *state, u32 qp_id)
{
- struct hisi_qm *qm = qp->qm;
struct device *dev = &qm->pdev->dev;
struct qm_sqc sqc;
struct qm_cqc cqc;
int ret, i = 0;
- /* No need to judge if master OOO is blocked. */
- if (qm_check_dev_error(qm))
- return 0;
-
- /* Kunpeng930 supports drain qp by device */
- if (test_bit(QM_SUPPORT_STOP_QP, &qm->caps)) {
- ret = qm_stop_qp(qp);
- if (ret)
- dev_err(dev, "Failed to stop qp(%u)!\n", qp->qp_id);
- return ret;
- }
-
while (++i) {
- ret = qm_set_and_get_xqc(qm, QM_MB_CMD_SQC, &sqc, qp->qp_id, 1);
+ ret = qm_set_and_get_xqc(qm, QM_MB_CMD_SQC, &sqc, qp_id, 1);
if (ret) {
dev_err_ratelimited(dev, "Failed to dump sqc!\n");
+ *state = QM_DUMP_SQC_FAIL;
return ret;
}
- ret = qm_set_and_get_xqc(qm, QM_MB_CMD_CQC, &cqc, qp->qp_id, 1);
+ ret = qm_set_and_get_xqc(qm, QM_MB_CMD_CQC, &cqc, qp_id, 1);
if (ret) {
dev_err_ratelimited(dev, "Failed to dump cqc!\n");
+ *state = QM_DUMP_CQC_FAIL;
return ret;
}
@@ -2076,8 +2073,9 @@ static int qm_drain_qp(struct hisi_qp *qp)
break;
if (i == MAX_WAIT_COUNTS) {
- dev_err(dev, "Fail to empty queue %u!\n", qp->qp_id);
- return -EBUSY;
+ dev_err(dev, "Fail to empty queue %u!\n", qp_id);
+ *state = QM_STOP_QUEUE_FAIL;
+ return -ETIMEDOUT;
}
usleep_range(WAIT_PERIOD_US_MIN, WAIT_PERIOD_US_MAX);
@@ -2086,9 +2084,53 @@ static int qm_drain_qp(struct hisi_qp *qp)
return 0;
}
-static int qm_stop_qp_nolock(struct hisi_qp *qp)
+/**
+ * qm_drain_qp() - Drain a qp.
+ * @qp: The qp we want to drain.
+ *
+ * If the device does not support stopping queue by sending mailbox,
+ * determine whether the queue is cleared by judging the tail pointers of
+ * sq and cq.
+ */
+static int qm_drain_qp(struct hisi_qp *qp)
+{
+ struct hisi_qm *qm = qp->qm;
+ struct hisi_qm *pf_qm = pci_get_drvdata(pci_physfn(qm->pdev));
+ u32 state = 0;
+ int ret;
+
+ /* No need to judge if master OOO is blocked. */
+ if (qm_check_dev_error(pf_qm))
+ return 0;
+
+ /* HW V3 supports drain qp by device */
+ if (test_bit(QM_SUPPORT_STOP_QP, &qm->caps)) {
+ ret = qm_stop_qp(qp);
+ if (ret) {
+ dev_err(&qm->pdev->dev, "Failed to stop qp!\n");
+ state = QM_STOP_QUEUE_FAIL;
+ goto set_dev_state;
+ }
+ return ret;
+ }
+
+ ret = qm_wait_qp_empty(qm, &state, qp->qp_id);
+ if (ret)
+ goto set_dev_state;
+
+ return 0;
+
+set_dev_state:
+ if (qm->debug.dev_dfx.dev_timeout)
+ qm->debug.dev_dfx.dev_state = state;
+
+ return ret;
+}
+
+static void qm_stop_qp_nolock(struct hisi_qp *qp)
{
- struct device *dev = &qp->qm->pdev->dev;
+ struct hisi_qm *qm = qp->qm;
+ struct device *dev = &qm->pdev->dev;
int ret;
/*
@@ -2099,39 +2141,36 @@ static int qm_stop_qp_nolock(struct hisi_qp *qp)
*/
if (atomic_read(&qp->qp_status.flags) != QP_START) {
qp->is_resetting = false;
- return 0;
+ return;
}
atomic_set(&qp->qp_status.flags, QP_STOP);
- ret = qm_drain_qp(qp);
- if (ret)
- dev_err(dev, "Failed to drain out data for stopping!\n");
+ /* V3 supports direct stop function when FLR prepare */
+ if (qm->ver < QM_HW_V3 || qm->status.stop_reason == QM_NORMAL) {
+ ret = qm_drain_qp(qp);
+ if (ret)
+ dev_err(dev, "Failed to drain out data for stopping qp(%u)!\n", qp->qp_id);
+ }
- flush_workqueue(qp->qm->wq);
+ flush_workqueue(qm->wq);
if (unlikely(qp->is_resetting && atomic_read(&qp->qp_status.used)))
qp_stop_fail_cb(qp);
dev_dbg(dev, "stop queue %u!", qp->qp_id);
-
- return 0;
}
/**
* hisi_qm_stop_qp() - Stop a qp in qm.
* @qp: The qp we want to stop.
*
- * This function is reverse of hisi_qm_start_qp. Return 0 if successful.
+ * This function is reverse of hisi_qm_start_qp.
*/
-int hisi_qm_stop_qp(struct hisi_qp *qp)
+void hisi_qm_stop_qp(struct hisi_qp *qp)
{
- int ret;
-
down_write(&qp->qm->qps_lock);
- ret = qm_stop_qp_nolock(qp);
+ qm_stop_qp_nolock(qp);
up_write(&qp->qm->qps_lock);
-
- return ret;
}
EXPORT_SYMBOL_GPL(hisi_qm_stop_qp);
@@ -2309,7 +2348,31 @@ static int hisi_qm_uacce_start_queue(struct uacce_queue *q)
static void hisi_qm_uacce_stop_queue(struct uacce_queue *q)
{
- hisi_qm_stop_qp(q->priv);
+ struct hisi_qp *qp = q->priv;
+ struct hisi_qm *qm = qp->qm;
+ struct qm_dev_dfx *dev_dfx = &qm->debug.dev_dfx;
+ u32 i = 0;
+
+ hisi_qm_stop_qp(qp);
+
+ if (!dev_dfx->dev_timeout || !dev_dfx->dev_state)
+ return;
+
+ /*
+ * After the queue fails to be stopped,
+ * wait for a period of time before releasing the queue.
+ */
+ while (++i) {
+ msleep(WAIT_PERIOD);
+
+ /* Since dev_timeout maybe modified, check i >= dev_timeout */
+ if (i >= dev_dfx->dev_timeout) {
+ dev_err(&qm->pdev->dev, "Stop q %u timeout, state %u\n",
+ qp->qp_id, dev_dfx->dev_state);
+ dev_dfx->dev_state = QM_FINISH_WAIT;
+ break;
+ }
+ }
}
static int hisi_qm_is_q_updated(struct uacce_queue *q)
@@ -2833,12 +2896,9 @@ void hisi_qm_uninit(struct hisi_qm *qm)
hisi_qm_set_state(qm, QM_NOT_READY);
up_write(&qm->qps_lock);
+ qm_remove_uacce(qm);
qm_irqs_unregister(qm);
hisi_qm_pci_uninit(qm);
- if (qm->use_sva) {
- uacce_remove(qm->uacce);
- qm->uacce = NULL;
- }
}
EXPORT_SYMBOL_GPL(hisi_qm_uninit);
@@ -3054,25 +3114,18 @@ static int qm_restart(struct hisi_qm *qm)
}
/* Stop started qps in reset flow */
-static int qm_stop_started_qp(struct hisi_qm *qm)
+static void qm_stop_started_qp(struct hisi_qm *qm)
{
- struct device *dev = &qm->pdev->dev;
struct hisi_qp *qp;
- int i, ret;
+ int i;
for (i = 0; i < qm->qp_num; i++) {
qp = &qm->qp_array[i];
- if (qp && atomic_read(&qp->qp_status.flags) == QP_START) {
+ if (atomic_read(&qp->qp_status.flags) == QP_START) {
qp->is_resetting = true;
- ret = qm_stop_qp_nolock(qp);
- if (ret < 0) {
- dev_err(dev, "Failed to stop qp%d!\n", i);
- return ret;
- }
+ qm_stop_qp_nolock(qp);
}
}
-
- return 0;
}
/**
@@ -3112,21 +3165,31 @@ int hisi_qm_stop(struct hisi_qm *qm, enum qm_stop_reason r)
down_write(&qm->qps_lock);
- qm->status.stop_reason = r;
if (atomic_read(&qm->status.flags) == QM_STOP)
goto err_unlock;
/* Stop all the request sending at first. */
atomic_set(&qm->status.flags, QM_STOP);
+ qm->status.stop_reason = r;
- if (qm->status.stop_reason == QM_SOFT_RESET ||
- qm->status.stop_reason == QM_DOWN) {
+ if (qm->status.stop_reason != QM_NORMAL) {
hisi_qm_set_hw_reset(qm, QM_RESET_STOP_TX_OFFSET);
- ret = qm_stop_started_qp(qm);
- if (ret < 0) {
- dev_err(dev, "Failed to stop started qp!\n");
- goto err_unlock;
+ /*
+ * When performing soft reset, the hardware will no longer
+ * do tasks, and the tasks in the device will be flushed
+ * out directly since the master ooo is closed.
+ */
+ if (test_bit(QM_SUPPORT_STOP_FUNC, &qm->caps) &&
+ r != QM_SOFT_RESET) {
+ ret = qm_drain_qm(qm);
+ if (ret) {
+ dev_err(dev, "failed to drain qm!\n");
+ goto err_unlock;
+ }
}
+
+ qm_stop_started_qp(qm);
+
hisi_qm_set_hw_reset(qm, QM_RESET_STOP_RX_OFFSET);
}
@@ -3141,6 +3204,7 @@ int hisi_qm_stop(struct hisi_qm *qm, enum qm_stop_reason r)
}
qm_clear_queues(qm);
+ qm->status.stop_reason = QM_NORMAL;
err_unlock:
up_write(&qm->qps_lock);
diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c
index f028dcfd0ead..0558f98e221f 100644
--- a/drivers/crypto/hisilicon/sec2/sec_crypto.c
+++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c
@@ -118,7 +118,7 @@ struct sec_aead {
};
/* Get an en/de-cipher queue cyclically to balance load over queues of TFM */
-static inline int sec_alloc_queue_id(struct sec_ctx *ctx, struct sec_req *req)
+static inline u32 sec_alloc_queue_id(struct sec_ctx *ctx, struct sec_req *req)
{
if (req->c_req.encrypt)
return (u32)atomic_inc_return(&ctx->enc_qcyclic) %
@@ -481,12 +481,13 @@ static void sec_alg_resource_free(struct sec_ctx *ctx,
if (ctx->pbuf_supported)
sec_free_pbuf_resource(dev, qp_ctx->res);
- if (ctx->alg_type == SEC_AEAD)
+ if (ctx->alg_type == SEC_AEAD) {
sec_free_mac_resource(dev, qp_ctx->res);
+ sec_free_aiv_resource(dev, qp_ctx->res);
+ }
}
-static int sec_alloc_qp_ctx_resource(struct hisi_qm *qm, struct sec_ctx *ctx,
- struct sec_qp_ctx *qp_ctx)
+static int sec_alloc_qp_ctx_resource(struct sec_ctx *ctx, struct sec_qp_ctx *qp_ctx)
{
u16 q_depth = qp_ctx->qp->sq_depth;
struct device *dev = ctx->dev;
@@ -541,8 +542,7 @@ static void sec_free_qp_ctx_resource(struct sec_ctx *ctx, struct sec_qp_ctx *qp_
kfree(qp_ctx->req_list);
}
-static int sec_create_qp_ctx(struct hisi_qm *qm, struct sec_ctx *ctx,
- int qp_ctx_id, int alg_type)
+static int sec_create_qp_ctx(struct sec_ctx *ctx, int qp_ctx_id)
{
struct sec_qp_ctx *qp_ctx;
struct hisi_qp *qp;
@@ -561,7 +561,7 @@ static int sec_create_qp_ctx(struct hisi_qm *qm, struct sec_ctx *ctx,
idr_init(&qp_ctx->req_idr);
INIT_LIST_HEAD(&qp_ctx->backlog);
- ret = sec_alloc_qp_ctx_resource(qm, ctx, qp_ctx);
+ ret = sec_alloc_qp_ctx_resource(ctx, qp_ctx);
if (ret)
goto err_destroy_idr;
@@ -614,7 +614,7 @@ static int sec_ctx_base_init(struct sec_ctx *ctx)
}
for (i = 0; i < sec->ctx_q_num; i++) {
- ret = sec_create_qp_ctx(&sec->qm, ctx, i, 0);
+ ret = sec_create_qp_ctx(ctx, i);
if (ret)
goto err_sec_release_qp_ctx;
}
@@ -750,9 +750,7 @@ static void sec_skcipher_uninit(struct crypto_skcipher *tfm)
sec_ctx_base_uninit(ctx);
}
-static int sec_skcipher_3des_setkey(struct crypto_skcipher *tfm, const u8 *key,
- const u32 keylen,
- const enum sec_cmode c_mode)
+static int sec_skcipher_3des_setkey(struct crypto_skcipher *tfm, const u8 *key, const u32 keylen)
{
struct sec_ctx *ctx = crypto_skcipher_ctx(tfm);
struct sec_cipher_ctx *c_ctx = &ctx->c_ctx;
@@ -843,7 +841,7 @@ static int sec_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key,
switch (c_alg) {
case SEC_CALG_3DES:
- ret = sec_skcipher_3des_setkey(tfm, key, keylen, c_mode);
+ ret = sec_skcipher_3des_setkey(tfm, key, keylen);
break;
case SEC_CALG_AES:
case SEC_CALG_SM4:
@@ -1371,7 +1369,7 @@ static int sec_skcipher_bd_fill_v3(struct sec_ctx *ctx, struct sec_req *req)
sec_sqe3->bd_param = cpu_to_le32(bd_param);
sec_sqe3->c_len_ivin |= cpu_to_le32(c_req->c_len);
- sec_sqe3->tag = cpu_to_le64(req);
+ sec_sqe3->tag = cpu_to_le64((unsigned long)req);
return 0;
}
@@ -2145,8 +2143,8 @@ static int sec_skcipher_decrypt(struct skcipher_request *sk_req)
return sec_skcipher_crypto(sk_req, false);
}
-#define SEC_SKCIPHER_GEN_ALG(sec_cra_name, sec_set_key, sec_min_key_size, \
- sec_max_key_size, ctx_init, ctx_exit, blk_size, iv_size)\
+#define SEC_SKCIPHER_ALG(sec_cra_name, sec_set_key, \
+ sec_min_key_size, sec_max_key_size, blk_size, iv_size)\
{\
.base = {\
.cra_name = sec_cra_name,\
@@ -2158,8 +2156,8 @@ static int sec_skcipher_decrypt(struct skcipher_request *sk_req)
.cra_ctxsize = sizeof(struct sec_ctx),\
.cra_module = THIS_MODULE,\
},\
- .init = ctx_init,\
- .exit = ctx_exit,\
+ .init = sec_skcipher_ctx_init,\
+ .exit = sec_skcipher_ctx_exit,\
.setkey = sec_set_key,\
.decrypt = sec_skcipher_decrypt,\
.encrypt = sec_skcipher_encrypt,\
@@ -2168,11 +2166,6 @@ static int sec_skcipher_decrypt(struct skcipher_request *sk_req)
.ivsize = iv_size,\
}
-#define SEC_SKCIPHER_ALG(name, key_func, min_key_size, \
- max_key_size, blk_size, iv_size) \
- SEC_SKCIPHER_GEN_ALG(name, key_func, min_key_size, max_key_size, \
- sec_skcipher_ctx_init, sec_skcipher_ctx_exit, blk_size, iv_size)
-
static struct sec_skcipher sec_skciphers[] = {
{
.alg_msk = BIT(0),
diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c
index 7bb99381bbdf..75aad04ffe5e 100644
--- a/drivers/crypto/hisilicon/sec2/sec_main.c
+++ b/drivers/crypto/hisilicon/sec2/sec_main.c
@@ -99,8 +99,8 @@
#define SEC_DBGFS_VAL_MAX_LEN 20
#define SEC_SINGLE_PORT_MAX_TRANS 0x2060
-#define SEC_SQE_MASK_OFFSET 64
-#define SEC_SQE_MASK_LEN 48
+#define SEC_SQE_MASK_OFFSET 16
+#define SEC_SQE_MASK_LEN 108
#define SEC_SHAPER_TYPE_RATE 400
#define SEC_DFX_BASE 0x301000
@@ -152,7 +152,7 @@ static const struct hisi_qm_cap_info sec_basic_info[] = {
{SEC_CORE_TYPE_NUM_CAP, 0x313c, 16, GENMASK(3, 0), 0x1, 0x1, 0x1},
{SEC_CORE_NUM_CAP, 0x313c, 8, GENMASK(7, 0), 0x4, 0x4, 0x4},
{SEC_CORES_PER_CLUSTER_NUM_CAP, 0x313c, 0, GENMASK(7, 0), 0x4, 0x4, 0x4},
- {SEC_CORE_ENABLE_BITMAP, 0x3140, 32, GENMASK(31, 0), 0x17F, 0x17F, 0xF},
+ {SEC_CORE_ENABLE_BITMAP, 0x3140, 0, GENMASK(31, 0), 0x17F, 0x17F, 0xF},
{SEC_DRV_ALG_BITMAP_LOW, 0x3144, 0, GENMASK(31, 0), 0x18050CB, 0x18050CB, 0x18670CF},
{SEC_DRV_ALG_BITMAP_HIGH, 0x3148, 0, GENMASK(31, 0), 0x395C, 0x395C, 0x395C},
{SEC_DEV_ALG_BITMAP_LOW, 0x314c, 0, GENMASK(31, 0), 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF},
@@ -282,6 +282,11 @@ static const struct debugfs_reg32 sec_dfx_regs[] = {
{"SEC_BD_SAA6 ", 0x301C38},
{"SEC_BD_SAA7 ", 0x301C3C},
{"SEC_BD_SAA8 ", 0x301C40},
+ {"SEC_RAS_CE_ENABLE ", 0x301050},
+ {"SEC_RAS_FE_ENABLE ", 0x301054},
+ {"SEC_RAS_NFE_ENABLE ", 0x301058},
+ {"SEC_REQ_TRNG_TIME_TH ", 0x30112C},
+ {"SEC_CHANNEL_RNG_REQ_THLD ", 0x302110},
};
/* define the SEC's dfx regs region and region length */
@@ -374,7 +379,7 @@ void sec_destroy_qps(struct hisi_qp **qps, int qp_num)
struct hisi_qp **sec_create_qps(void)
{
- int node = cpu_to_node(smp_processor_id());
+ int node = cpu_to_node(raw_smp_processor_id());
u32 ctx_num = ctx_q_num;
struct hisi_qp **qps;
int ret;
@@ -896,37 +901,36 @@ static int sec_debugfs_init(struct hisi_qm *qm)
struct device *dev = &qm->pdev->dev;
int ret;
- qm->debug.debug_root = debugfs_create_dir(dev_name(dev),
- sec_debugfs_root);
- qm->debug.sqe_mask_offset = SEC_SQE_MASK_OFFSET;
- qm->debug.sqe_mask_len = SEC_SQE_MASK_LEN;
-
ret = hisi_qm_regs_debugfs_init(qm, sec_diff_regs, ARRAY_SIZE(sec_diff_regs));
if (ret) {
dev_warn(dev, "Failed to init SEC diff regs!\n");
- goto debugfs_remove;
+ return ret;
}
+ qm->debug.debug_root = debugfs_create_dir(dev_name(dev),
+ sec_debugfs_root);
+ qm->debug.sqe_mask_offset = SEC_SQE_MASK_OFFSET;
+ qm->debug.sqe_mask_len = SEC_SQE_MASK_LEN;
+
hisi_qm_debug_init(qm);
ret = sec_debug_init(qm);
if (ret)
- goto failed_to_create;
+ goto debugfs_remove;
return 0;
-failed_to_create:
- hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(sec_diff_regs));
debugfs_remove:
- debugfs_remove_recursive(sec_debugfs_root);
+ debugfs_remove_recursive(qm->debug.debug_root);
+ hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(sec_diff_regs));
return ret;
}
static void sec_debugfs_exit(struct hisi_qm *qm)
{
- hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(sec_diff_regs));
-
debugfs_remove_recursive(qm->debug.debug_root);
+
+ hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(sec_diff_regs));
}
static int sec_show_last_regs_init(struct hisi_qm *qm)
@@ -1319,7 +1323,8 @@ static struct pci_driver sec_pci_driver = {
.probe = sec_probe,
.remove = sec_remove,
.err_handler = &sec_err_handler,
- .sriov_configure = hisi_qm_sriov_configure,
+ .sriov_configure = IS_ENABLED(CONFIG_PCI_IOV) ?
+ hisi_qm_sriov_configure : NULL,
.shutdown = hisi_qm_dev_shutdown,
.driver.pm = &sec_pm_ops,
};
diff --git a/drivers/crypto/hisilicon/sgl.c b/drivers/crypto/hisilicon/sgl.c
index 0beca257c20b..568acd0aee3f 100644
--- a/drivers/crypto/hisilicon/sgl.c
+++ b/drivers/crypto/hisilicon/sgl.c
@@ -161,9 +161,6 @@ static struct hisi_acc_hw_sgl *acc_get_sgl(struct hisi_acc_sgl_pool *pool,
struct mem_block *block;
u32 block_index, offset;
- if (!pool || !hw_sgl_dma || index >= pool->count)
- return ERR_PTR(-EINVAL);
-
block = pool->mem_block;
block_index = index / pool->sgl_num_per_block;
offset = index % pool->sgl_num_per_block;
@@ -230,7 +227,7 @@ hisi_acc_sg_buf_map_to_hw_sgl(struct device *dev,
struct scatterlist *sg;
int sg_n;
- if (!dev || !sgl || !pool || !hw_sgl_dma)
+ if (!dev || !sgl || !pool || !hw_sgl_dma || index >= pool->count)
return ERR_PTR(-EINVAL);
sg_n = sg_nents(sgl);
diff --git a/drivers/crypto/hisilicon/zip/zip_crypto.c b/drivers/crypto/hisilicon/zip/zip_crypto.c
index c650c741a18d..94e2d66b04b6 100644
--- a/drivers/crypto/hisilicon/zip/zip_crypto.c
+++ b/drivers/crypto/hisilicon/zip/zip_crypto.c
@@ -591,6 +591,7 @@ static struct acomp_alg hisi_zip_acomp_deflate = {
.base = {
.cra_name = "deflate",
.cra_driver_name = "hisi-deflate-acomp",
+ .cra_flags = CRYPTO_ALG_ASYNC,
.cra_module = THIS_MODULE,
.cra_priority = HZIP_ALG_PRIORITY,
.cra_ctxsize = sizeof(struct hisi_zip_ctx),
diff --git a/drivers/crypto/hisilicon/zip/zip_main.c b/drivers/crypto/hisilicon/zip/zip_main.c
index 479ba8a1d6b5..c94a7b20d07e 100644
--- a/drivers/crypto/hisilicon/zip/zip_main.c
+++ b/drivers/crypto/hisilicon/zip/zip_main.c
@@ -454,7 +454,7 @@ MODULE_DEVICE_TABLE(pci, hisi_zip_dev_ids);
int zip_create_qps(struct hisi_qp **qps, int qp_num, int node)
{
if (node == NUMA_NO_NODE)
- node = cpu_to_node(smp_processor_id());
+ node = cpu_to_node(raw_smp_processor_id());
return hisi_qm_alloc_qps_node(&zip_devices, qp_num, 0, node, qps);
}
@@ -887,36 +887,34 @@ static int hisi_zip_ctrl_debug_init(struct hisi_qm *qm)
static int hisi_zip_debugfs_init(struct hisi_qm *qm)
{
struct device *dev = &qm->pdev->dev;
- struct dentry *dev_d;
int ret;
- dev_d = debugfs_create_dir(dev_name(dev), hzip_debugfs_root);
-
- qm->debug.sqe_mask_offset = HZIP_SQE_MASK_OFFSET;
- qm->debug.sqe_mask_len = HZIP_SQE_MASK_LEN;
- qm->debug.debug_root = dev_d;
ret = hisi_qm_regs_debugfs_init(qm, hzip_diff_regs, ARRAY_SIZE(hzip_diff_regs));
if (ret) {
dev_warn(dev, "Failed to init ZIP diff regs!\n");
- goto debugfs_remove;
+ return ret;
}
+ qm->debug.sqe_mask_offset = HZIP_SQE_MASK_OFFSET;
+ qm->debug.sqe_mask_len = HZIP_SQE_MASK_LEN;
+ qm->debug.debug_root = debugfs_create_dir(dev_name(dev),
+ hzip_debugfs_root);
+
hisi_qm_debug_init(qm);
if (qm->fun_type == QM_HW_PF) {
ret = hisi_zip_ctrl_debug_init(qm);
if (ret)
- goto failed_to_create;
+ goto debugfs_remove;
}
hisi_zip_dfx_debug_init(qm);
return 0;
-failed_to_create:
- hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(hzip_diff_regs));
debugfs_remove:
- debugfs_remove_recursive(hzip_debugfs_root);
+ debugfs_remove_recursive(qm->debug.debug_root);
+ hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(hzip_diff_regs));
return ret;
}
@@ -940,10 +938,10 @@ static void hisi_zip_debug_regs_clear(struct hisi_qm *qm)
static void hisi_zip_debugfs_exit(struct hisi_qm *qm)
{
- hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(hzip_diff_regs));
-
debugfs_remove_recursive(qm->debug.debug_root);
+ hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(hzip_diff_regs));
+
if (qm->fun_type == QM_HW_PF) {
hisi_zip_debug_regs_clear(qm);
qm->debug.curr_qm_qp_num = 0;
diff --git a/drivers/crypto/intel/iaa/iaa_crypto.h b/drivers/crypto/intel/iaa/iaa_crypto.h
index 014420f7beb0..56985e395263 100644
--- a/drivers/crypto/intel/iaa/iaa_crypto.h
+++ b/drivers/crypto/intel/iaa/iaa_crypto.h
@@ -49,20 +49,18 @@ struct iaa_wq {
struct iaa_device *iaa_device;
- u64 comp_calls;
- u64 comp_bytes;
- u64 decomp_calls;
- u64 decomp_bytes;
+ atomic64_t comp_calls;
+ atomic64_t comp_bytes;
+ atomic64_t decomp_calls;
+ atomic64_t decomp_bytes;
};
struct iaa_device_compression_mode {
const char *name;
struct aecs_comp_table_record *aecs_comp_table;
- struct aecs_decomp_table_record *aecs_decomp_table;
dma_addr_t aecs_comp_table_dma_addr;
- dma_addr_t aecs_decomp_table_dma_addr;
};
/* Representation of IAA device with wqs, populated by probe */
@@ -75,10 +73,10 @@ struct iaa_device {
int n_wq;
struct list_head wqs;
- u64 comp_calls;
- u64 comp_bytes;
- u64 decomp_calls;
- u64 decomp_bytes;
+ atomic64_t comp_calls;
+ atomic64_t comp_bytes;
+ atomic64_t decomp_calls;
+ atomic64_t decomp_bytes;
};
struct wq_table_entry {
@@ -107,23 +105,6 @@ struct aecs_comp_table_record {
u32 reserved_padding[2];
} __packed;
-/* AECS for decompress */
-struct aecs_decomp_table_record {
- u32 crc;
- u32 xor_checksum;
- u32 low_filter_param;
- u32 high_filter_param;
- u32 output_mod_idx;
- u32 drop_init_decomp_out_bytes;
- u32 reserved[36];
- u32 output_accum_data[2];
- u32 out_bits_valid;
- u32 bit_off_indexing;
- u32 input_accum_data[64];
- u8 size_qw[32];
- u32 decomp_state[1220];
-} __packed;
-
int iaa_aecs_init_fixed(void);
void iaa_aecs_cleanup_fixed(void);
@@ -136,9 +117,6 @@ struct iaa_compression_mode {
int ll_table_size;
u32 *d_table;
int d_table_size;
- u32 *header_table;
- int header_table_size;
- u16 gen_decomp_table_flags;
iaa_dev_comp_init_fn_t init;
iaa_dev_comp_free_fn_t free;
};
@@ -148,9 +126,6 @@ int add_iaa_compression_mode(const char *name,
int ll_table_size,
const u32 *d_table,
int d_table_size,
- const u8 *header_table,
- int header_table_size,
- u16 gen_decomp_table_flags,
iaa_dev_comp_init_fn_t init,
iaa_dev_comp_free_fn_t free);
diff --git a/drivers/crypto/intel/iaa/iaa_crypto_comp_fixed.c b/drivers/crypto/intel/iaa/iaa_crypto_comp_fixed.c
index 45cf5d74f0fb..19d9a333ac49 100644
--- a/drivers/crypto/intel/iaa/iaa_crypto_comp_fixed.c
+++ b/drivers/crypto/intel/iaa/iaa_crypto_comp_fixed.c
@@ -78,7 +78,6 @@ int iaa_aecs_init_fixed(void)
sizeof(fixed_ll_sym),
fixed_d_sym,
sizeof(fixed_d_sym),
- NULL, 0, 0,
init_fixed_mode, NULL);
if (!ret)
pr_debug("IAA fixed compression mode initialized\n");
diff --git a/drivers/crypto/intel/iaa/iaa_crypto_main.c b/drivers/crypto/intel/iaa/iaa_crypto_main.c
index dfd3baf0a8d8..e810d286ee8c 100644
--- a/drivers/crypto/intel/iaa/iaa_crypto_main.c
+++ b/drivers/crypto/intel/iaa/iaa_crypto_main.c
@@ -258,16 +258,14 @@ static void free_iaa_compression_mode(struct iaa_compression_mode *mode)
kfree(mode->name);
kfree(mode->ll_table);
kfree(mode->d_table);
- kfree(mode->header_table);
kfree(mode);
}
/*
- * IAA Compression modes are defined by an ll_table, a d_table, and an
- * optional header_table. These tables are typically generated and
- * captured using statistics collected from running actual
- * compress/decompress workloads.
+ * IAA Compression modes are defined by an ll_table and a d_table.
+ * These tables are typically generated and captured using statistics
+ * collected from running actual compress/decompress workloads.
*
* A module or other kernel code can add and remove compression modes
* with a given name using the exported @add_iaa_compression_mode()
@@ -315,9 +313,6 @@ EXPORT_SYMBOL_GPL(remove_iaa_compression_mode);
* @ll_table_size: The ll table size in bytes
* @d_table: The d table
* @d_table_size: The d table size in bytes
- * @header_table: Optional header table
- * @header_table_size: Optional header table size in bytes
- * @gen_decomp_table_flags: Otional flags used to generate the decomp table
* @init: Optional callback function to init the compression mode data
* @free: Optional callback function to free the compression mode data
*
@@ -330,9 +325,6 @@ int add_iaa_compression_mode(const char *name,
int ll_table_size,
const u32 *d_table,
int d_table_size,
- const u8 *header_table,
- int header_table_size,
- u16 gen_decomp_table_flags,
iaa_dev_comp_init_fn_t init,
iaa_dev_comp_free_fn_t free)
{
@@ -355,31 +347,19 @@ int add_iaa_compression_mode(const char *name,
goto free;
if (ll_table) {
- mode->ll_table = kzalloc(ll_table_size, GFP_KERNEL);
+ mode->ll_table = kmemdup(ll_table, ll_table_size, GFP_KERNEL);
if (!mode->ll_table)
goto free;
- memcpy(mode->ll_table, ll_table, ll_table_size);
mode->ll_table_size = ll_table_size;
}
if (d_table) {
- mode->d_table = kzalloc(d_table_size, GFP_KERNEL);
+ mode->d_table = kmemdup(d_table, d_table_size, GFP_KERNEL);
if (!mode->d_table)
goto free;
- memcpy(mode->d_table, d_table, d_table_size);
mode->d_table_size = d_table_size;
}
- if (header_table) {
- mode->header_table = kzalloc(header_table_size, GFP_KERNEL);
- if (!mode->header_table)
- goto free;
- memcpy(mode->header_table, header_table, header_table_size);
- mode->header_table_size = header_table_size;
- }
-
- mode->gen_decomp_table_flags = gen_decomp_table_flags;
-
mode->init = init;
mode->free = free;
@@ -420,10 +400,6 @@ static void free_device_compression_mode(struct iaa_device *iaa_device,
if (device_mode->aecs_comp_table)
dma_free_coherent(dev, size, device_mode->aecs_comp_table,
device_mode->aecs_comp_table_dma_addr);
- if (device_mode->aecs_decomp_table)
- dma_free_coherent(dev, size, device_mode->aecs_decomp_table,
- device_mode->aecs_decomp_table_dma_addr);
-
kfree(device_mode);
}
@@ -440,73 +416,6 @@ static int check_completion(struct device *dev,
bool compress,
bool only_once);
-static int decompress_header(struct iaa_device_compression_mode *device_mode,
- struct iaa_compression_mode *mode,
- struct idxd_wq *wq)
-{
- dma_addr_t src_addr, src2_addr;
- struct idxd_desc *idxd_desc;
- struct iax_hw_desc *desc;
- struct device *dev;
- int ret = 0;
-
- idxd_desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK);
- if (IS_ERR(idxd_desc))
- return PTR_ERR(idxd_desc);
-
- desc = idxd_desc->iax_hw;
-
- dev = &wq->idxd->pdev->dev;
-
- src_addr = dma_map_single(dev, (void *)mode->header_table,
- mode->header_table_size, DMA_TO_DEVICE);
- dev_dbg(dev, "%s: mode->name %s, src_addr %llx, dev %p, src %p, slen %d\n",
- __func__, mode->name, src_addr, dev,
- mode->header_table, mode->header_table_size);
- if (unlikely(dma_mapping_error(dev, src_addr))) {
- dev_dbg(dev, "dma_map_single err, exiting\n");
- ret = -ENOMEM;
- return ret;
- }
-
- desc->flags = IAX_AECS_GEN_FLAG;
- desc->opcode = IAX_OPCODE_DECOMPRESS;
-
- desc->src1_addr = (u64)src_addr;
- desc->src1_size = mode->header_table_size;
-
- src2_addr = device_mode->aecs_decomp_table_dma_addr;
- desc->src2_addr = (u64)src2_addr;
- desc->src2_size = 1088;
- dev_dbg(dev, "%s: mode->name %s, src2_addr %llx, dev %p, src2_size %d\n",
- __func__, mode->name, desc->src2_addr, dev, desc->src2_size);
- desc->max_dst_size = 0; // suppressed output
-
- desc->decompr_flags = mode->gen_decomp_table_flags;
-
- desc->priv = 0;
-
- desc->completion_addr = idxd_desc->compl_dma;
-
- ret = idxd_submit_desc(wq, idxd_desc);
- if (ret) {
- pr_err("%s: submit_desc failed ret=0x%x\n", __func__, ret);
- goto out;
- }
-
- ret = check_completion(dev, idxd_desc->iax_completion, false, false);
- if (ret)
- dev_dbg(dev, "%s: mode->name %s check_completion failed ret=%d\n",
- __func__, mode->name, ret);
- else
- dev_dbg(dev, "%s: mode->name %s succeeded\n", __func__,
- mode->name);
-out:
- dma_unmap_single(dev, src_addr, 1088, DMA_TO_DEVICE);
-
- return ret;
-}
-
static int init_device_compression_mode(struct iaa_device *iaa_device,
struct iaa_compression_mode *mode,
int idx, struct idxd_wq *wq)
@@ -529,24 +438,11 @@ static int init_device_compression_mode(struct iaa_device *iaa_device,
if (!device_mode->aecs_comp_table)
goto free;
- device_mode->aecs_decomp_table = dma_alloc_coherent(dev, size,
- &device_mode->aecs_decomp_table_dma_addr, GFP_KERNEL);
- if (!device_mode->aecs_decomp_table)
- goto free;
-
/* Add Huffman table to aecs */
memset(device_mode->aecs_comp_table, 0, sizeof(*device_mode->aecs_comp_table));
memcpy(device_mode->aecs_comp_table->ll_sym, mode->ll_table, mode->ll_table_size);
memcpy(device_mode->aecs_comp_table->d_sym, mode->d_table, mode->d_table_size);
- if (mode->header_table) {
- ret = decompress_header(device_mode, mode, wq);
- if (ret) {
- pr_debug("iaa header decompression failed: ret=%d\n", ret);
- goto free;
- }
- }
-
if (mode->init) {
ret = mode->init(device_mode);
if (ret)
@@ -908,6 +804,8 @@ static int save_iaa_wq(struct idxd_wq *wq)
return -EINVAL;
cpus_per_iaa = (nr_nodes * nr_cpus_per_node) / nr_iaa;
+ if (!cpus_per_iaa)
+ cpus_per_iaa = 1;
out:
return 0;
}
@@ -923,10 +821,12 @@ static void remove_iaa_wq(struct idxd_wq *wq)
}
}
- if (nr_iaa)
+ if (nr_iaa) {
cpus_per_iaa = (nr_nodes * nr_cpus_per_node) / nr_iaa;
- else
- cpus_per_iaa = 0;
+ if (!cpus_per_iaa)
+ cpus_per_iaa = 1;
+ } else
+ cpus_per_iaa = 1;
}
static int wq_table_add_wqs(int iaa, int cpu)
@@ -1020,7 +920,7 @@ static void rebalance_wq_table(void)
for_each_node_with_cpus(node) {
node_cpus = cpumask_of_node(node);
- for (cpu = 0; cpu < nr_cpus_per_node; cpu++) {
+ for (cpu = 0; cpu < cpumask_weight(node_cpus); cpu++) {
int node_cpu = cpumask_nth(cpu, node_cpus);
if (WARN_ON(node_cpu >= nr_cpu_ids)) {
@@ -1177,8 +1077,8 @@ static void iaa_desc_complete(struct idxd_desc *idxd_desc,
update_total_comp_bytes_out(ctx->req->dlen);
update_wq_comp_bytes(iaa_wq->wq, ctx->req->dlen);
} else {
- update_total_decomp_bytes_in(ctx->req->dlen);
- update_wq_decomp_bytes(iaa_wq->wq, ctx->req->dlen);
+ update_total_decomp_bytes_in(ctx->req->slen);
+ update_wq_decomp_bytes(iaa_wq->wq, ctx->req->slen);
}
if (ctx->compress && compression_ctx->verify_compress) {
@@ -1324,7 +1224,7 @@ static int iaa_compress(struct crypto_tfm *tfm, struct acomp_req *req,
*compression_crc = idxd_desc->iax_completion->crc;
- if (!ctx->async_mode)
+ if (!ctx->async_mode || disable_async)
idxd_free_desc(wq, idxd_desc);
out:
return ret;
@@ -1570,7 +1470,7 @@ static int iaa_decompress(struct crypto_tfm *tfm, struct acomp_req *req,
*dlen = req->dlen;
- if (!ctx->async_mode)
+ if (!ctx->async_mode || disable_async)
idxd_free_desc(wq, idxd_desc);
/* Update stats */
@@ -1916,6 +1816,7 @@ static struct acomp_alg iaa_acomp_fixed_deflate = {
.base = {
.cra_name = "deflate",
.cra_driver_name = "deflate-iaa",
+ .cra_flags = CRYPTO_ALG_ASYNC,
.cra_ctxsize = sizeof(struct iaa_compression_ctx),
.cra_module = THIS_MODULE,
.cra_priority = IAA_ALG_PRIORITY,
@@ -2102,7 +2003,7 @@ static int __init iaa_crypto_init_module(void)
int ret = 0;
int node;
- nr_cpus = num_online_cpus();
+ nr_cpus = num_possible_cpus();
for_each_node_with_cpus(node)
nr_nodes++;
if (!nr_nodes) {
diff --git a/drivers/crypto/intel/iaa/iaa_crypto_stats.c b/drivers/crypto/intel/iaa/iaa_crypto_stats.c
index 2e3b7b73af20..f5cc3d29ca19 100644
--- a/drivers/crypto/intel/iaa/iaa_crypto_stats.c
+++ b/drivers/crypto/intel/iaa/iaa_crypto_stats.c
@@ -17,165 +17,117 @@
#include "iaa_crypto.h"
#include "iaa_crypto_stats.h"
-static u64 total_comp_calls;
-static u64 total_decomp_calls;
-static u64 total_sw_decomp_calls;
-static u64 max_comp_delay_ns;
-static u64 max_decomp_delay_ns;
-static u64 max_acomp_delay_ns;
-static u64 max_adecomp_delay_ns;
-static u64 total_comp_bytes_out;
-static u64 total_decomp_bytes_in;
-static u64 total_completion_einval_errors;
-static u64 total_completion_timeout_errors;
-static u64 total_completion_comp_buf_overflow_errors;
+static atomic64_t total_comp_calls;
+static atomic64_t total_decomp_calls;
+static atomic64_t total_sw_decomp_calls;
+static atomic64_t total_comp_bytes_out;
+static atomic64_t total_decomp_bytes_in;
+static atomic64_t total_completion_einval_errors;
+static atomic64_t total_completion_timeout_errors;
+static atomic64_t total_completion_comp_buf_overflow_errors;
static struct dentry *iaa_crypto_debugfs_root;
void update_total_comp_calls(void)
{
- total_comp_calls++;
+ atomic64_inc(&total_comp_calls);
}
void update_total_comp_bytes_out(int n)
{
- total_comp_bytes_out += n;
+ atomic64_add(n, &total_comp_bytes_out);
}
void update_total_decomp_calls(void)
{
- total_decomp_calls++;
+ atomic64_inc(&total_decomp_calls);
}
void update_total_sw_decomp_calls(void)
{
- total_sw_decomp_calls++;
+ atomic64_inc(&total_sw_decomp_calls);
}
void update_total_decomp_bytes_in(int n)
{
- total_decomp_bytes_in += n;
+ atomic64_add(n, &total_decomp_bytes_in);
}
void update_completion_einval_errs(void)
{
- total_completion_einval_errors++;
+ atomic64_inc(&total_completion_einval_errors);
}
void update_completion_timeout_errs(void)
{
- total_completion_timeout_errors++;
+ atomic64_inc(&total_completion_timeout_errors);
}
void update_completion_comp_buf_overflow_errs(void)
{
- total_completion_comp_buf_overflow_errors++;
-}
-
-void update_max_comp_delay_ns(u64 start_time_ns)
-{
- u64 time_diff;
-
- time_diff = ktime_get_ns() - start_time_ns;
-
- if (time_diff > max_comp_delay_ns)
- max_comp_delay_ns = time_diff;
-}
-
-void update_max_decomp_delay_ns(u64 start_time_ns)
-{
- u64 time_diff;
-
- time_diff = ktime_get_ns() - start_time_ns;
-
- if (time_diff > max_decomp_delay_ns)
- max_decomp_delay_ns = time_diff;
-}
-
-void update_max_acomp_delay_ns(u64 start_time_ns)
-{
- u64 time_diff;
-
- time_diff = ktime_get_ns() - start_time_ns;
-
- if (time_diff > max_acomp_delay_ns)
- max_acomp_delay_ns = time_diff;
-}
-
-void update_max_adecomp_delay_ns(u64 start_time_ns)
-{
- u64 time_diff;
-
- time_diff = ktime_get_ns() - start_time_ns;
-
- if (time_diff > max_adecomp_delay_ns)
- max_adecomp_delay_ns = time_diff;
+ atomic64_inc(&total_completion_comp_buf_overflow_errors);
}
void update_wq_comp_calls(struct idxd_wq *idxd_wq)
{
struct iaa_wq *wq = idxd_wq_get_private(idxd_wq);
- wq->comp_calls++;
- wq->iaa_device->comp_calls++;
+ atomic64_inc(&wq->comp_calls);
+ atomic64_inc(&wq->iaa_device->comp_calls);
}
void update_wq_comp_bytes(struct idxd_wq *idxd_wq, int n)
{
struct iaa_wq *wq = idxd_wq_get_private(idxd_wq);
- wq->comp_bytes += n;
- wq->iaa_device->comp_bytes += n;
+ atomic64_add(n, &wq->comp_bytes);
+ atomic64_add(n, &wq->iaa_device->comp_bytes);
}
void update_wq_decomp_calls(struct idxd_wq *idxd_wq)
{
struct iaa_wq *wq = idxd_wq_get_private(idxd_wq);
- wq->decomp_calls++;
- wq->iaa_device->decomp_calls++;
+ atomic64_inc(&wq->decomp_calls);
+ atomic64_inc(&wq->iaa_device->decomp_calls);
}
void update_wq_decomp_bytes(struct idxd_wq *idxd_wq, int n)
{
struct iaa_wq *wq = idxd_wq_get_private(idxd_wq);
- wq->decomp_bytes += n;
- wq->iaa_device->decomp_bytes += n;
+ atomic64_add(n, &wq->decomp_bytes);
+ atomic64_add(n, &wq->iaa_device->decomp_bytes);
}
static void reset_iaa_crypto_stats(void)
{
- total_comp_calls = 0;
- total_decomp_calls = 0;
- total_sw_decomp_calls = 0;
- max_comp_delay_ns = 0;
- max_decomp_delay_ns = 0;
- max_acomp_delay_ns = 0;
- max_adecomp_delay_ns = 0;
- total_comp_bytes_out = 0;
- total_decomp_bytes_in = 0;
- total_completion_einval_errors = 0;
- total_completion_timeout_errors = 0;
- total_completion_comp_buf_overflow_errors = 0;
+ atomic64_set(&total_comp_calls, 0);
+ atomic64_set(&total_decomp_calls, 0);
+ atomic64_set(&total_sw_decomp_calls, 0);
+ atomic64_set(&total_comp_bytes_out, 0);
+ atomic64_set(&total_decomp_bytes_in, 0);
+ atomic64_set(&total_completion_einval_errors, 0);
+ atomic64_set(&total_completion_timeout_errors, 0);
+ atomic64_set(&total_completion_comp_buf_overflow_errors, 0);
}
static void reset_wq_stats(struct iaa_wq *wq)
{
- wq->comp_calls = 0;
- wq->comp_bytes = 0;
- wq->decomp_calls = 0;
- wq->decomp_bytes = 0;
+ atomic64_set(&wq->comp_calls, 0);
+ atomic64_set(&wq->comp_bytes, 0);
+ atomic64_set(&wq->decomp_calls, 0);
+ atomic64_set(&wq->decomp_bytes, 0);
}
static void reset_device_stats(struct iaa_device *iaa_device)
{
struct iaa_wq *iaa_wq;
- iaa_device->comp_calls = 0;
- iaa_device->comp_bytes = 0;
- iaa_device->decomp_calls = 0;
- iaa_device->decomp_bytes = 0;
+ atomic64_set(&iaa_device->comp_calls, 0);
+ atomic64_set(&iaa_device->comp_bytes, 0);
+ atomic64_set(&iaa_device->decomp_calls, 0);
+ atomic64_set(&iaa_device->decomp_bytes, 0);
list_for_each_entry(iaa_wq, &iaa_device->wqs, list)
reset_wq_stats(iaa_wq);
@@ -184,10 +136,14 @@ static void reset_device_stats(struct iaa_device *iaa_device)
static void wq_show(struct seq_file *m, struct iaa_wq *iaa_wq)
{
seq_printf(m, " name: %s\n", iaa_wq->wq->name);
- seq_printf(m, " comp_calls: %llu\n", iaa_wq->comp_calls);
- seq_printf(m, " comp_bytes: %llu\n", iaa_wq->comp_bytes);
- seq_printf(m, " decomp_calls: %llu\n", iaa_wq->decomp_calls);
- seq_printf(m, " decomp_bytes: %llu\n\n", iaa_wq->decomp_bytes);
+ seq_printf(m, " comp_calls: %llu\n",
+ atomic64_read(&iaa_wq->comp_calls));
+ seq_printf(m, " comp_bytes: %llu\n",
+ atomic64_read(&iaa_wq->comp_bytes));
+ seq_printf(m, " decomp_calls: %llu\n",
+ atomic64_read(&iaa_wq->decomp_calls));
+ seq_printf(m, " decomp_bytes: %llu\n\n",
+ atomic64_read(&iaa_wq->decomp_bytes));
}
static void device_stats_show(struct seq_file *m, struct iaa_device *iaa_device)
@@ -197,30 +153,41 @@ static void device_stats_show(struct seq_file *m, struct iaa_device *iaa_device)
seq_puts(m, "iaa device:\n");
seq_printf(m, " id: %d\n", iaa_device->idxd->id);
seq_printf(m, " n_wqs: %d\n", iaa_device->n_wq);
- seq_printf(m, " comp_calls: %llu\n", iaa_device->comp_calls);
- seq_printf(m, " comp_bytes: %llu\n", iaa_device->comp_bytes);
- seq_printf(m, " decomp_calls: %llu\n", iaa_device->decomp_calls);
- seq_printf(m, " decomp_bytes: %llu\n", iaa_device->decomp_bytes);
+ seq_printf(m, " comp_calls: %llu\n",
+ atomic64_read(&iaa_device->comp_calls));
+ seq_printf(m, " comp_bytes: %llu\n",
+ atomic64_read(&iaa_device->comp_bytes));
+ seq_printf(m, " decomp_calls: %llu\n",
+ atomic64_read(&iaa_device->decomp_calls));
+ seq_printf(m, " decomp_bytes: %llu\n",
+ atomic64_read(&iaa_device->decomp_bytes));
seq_puts(m, " wqs:\n");
list_for_each_entry(iaa_wq, &iaa_device->wqs, list)
wq_show(m, iaa_wq);
}
-static void global_stats_show(struct seq_file *m)
+static int global_stats_show(struct seq_file *m, void *v)
{
seq_puts(m, "global stats:\n");
- seq_printf(m, " total_comp_calls: %llu\n", total_comp_calls);
- seq_printf(m, " total_decomp_calls: %llu\n", total_decomp_calls);
- seq_printf(m, " total_sw_decomp_calls: %llu\n", total_sw_decomp_calls);
- seq_printf(m, " total_comp_bytes_out: %llu\n", total_comp_bytes_out);
- seq_printf(m, " total_decomp_bytes_in: %llu\n", total_decomp_bytes_in);
+ seq_printf(m, " total_comp_calls: %llu\n",
+ atomic64_read(&total_comp_calls));
+ seq_printf(m, " total_decomp_calls: %llu\n",
+ atomic64_read(&total_decomp_calls));
+ seq_printf(m, " total_sw_decomp_calls: %llu\n",
+ atomic64_read(&total_sw_decomp_calls));
+ seq_printf(m, " total_comp_bytes_out: %llu\n",
+ atomic64_read(&total_comp_bytes_out));
+ seq_printf(m, " total_decomp_bytes_in: %llu\n",
+ atomic64_read(&total_decomp_bytes_in));
seq_printf(m, " total_completion_einval_errors: %llu\n",
- total_completion_einval_errors);
+ atomic64_read(&total_completion_einval_errors));
seq_printf(m, " total_completion_timeout_errors: %llu\n",
- total_completion_timeout_errors);
+ atomic64_read(&total_completion_timeout_errors));
seq_printf(m, " total_completion_comp_buf_overflow_errors: %llu\n\n",
- total_completion_comp_buf_overflow_errors);
+ atomic64_read(&total_completion_comp_buf_overflow_errors));
+
+ return 0;
}
static int wq_stats_show(struct seq_file *m, void *v)
@@ -229,8 +196,6 @@ static int wq_stats_show(struct seq_file *m, void *v)
mutex_lock(&iaa_devices_lock);
- global_stats_show(m);
-
list_for_each_entry(iaa_device, &iaa_devices, list)
device_stats_show(m, iaa_device);
@@ -267,6 +232,18 @@ static const struct file_operations wq_stats_fops = {
.release = single_release,
};
+static int global_stats_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, global_stats_show, file);
+}
+
+static const struct file_operations global_stats_fops = {
+ .open = global_stats_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
DEFINE_DEBUGFS_ATTRIBUTE(wq_stats_reset_fops, NULL, iaa_crypto_stats_reset, "%llu\n");
int __init iaa_crypto_debugfs_init(void)
@@ -275,27 +252,9 @@ int __init iaa_crypto_debugfs_init(void)
return -ENODEV;
iaa_crypto_debugfs_root = debugfs_create_dir("iaa_crypto", NULL);
- if (!iaa_crypto_debugfs_root)
- return -ENOMEM;
-
- debugfs_create_u64("max_comp_delay_ns", 0644,
- iaa_crypto_debugfs_root, &max_comp_delay_ns);
- debugfs_create_u64("max_decomp_delay_ns", 0644,
- iaa_crypto_debugfs_root, &max_decomp_delay_ns);
- debugfs_create_u64("max_acomp_delay_ns", 0644,
- iaa_crypto_debugfs_root, &max_comp_delay_ns);
- debugfs_create_u64("max_adecomp_delay_ns", 0644,
- iaa_crypto_debugfs_root, &max_decomp_delay_ns);
- debugfs_create_u64("total_comp_calls", 0644,
- iaa_crypto_debugfs_root, &total_comp_calls);
- debugfs_create_u64("total_decomp_calls", 0644,
- iaa_crypto_debugfs_root, &total_decomp_calls);
- debugfs_create_u64("total_sw_decomp_calls", 0644,
- iaa_crypto_debugfs_root, &total_sw_decomp_calls);
- debugfs_create_u64("total_comp_bytes_out", 0644,
- iaa_crypto_debugfs_root, &total_comp_bytes_out);
- debugfs_create_u64("total_decomp_bytes_in", 0644,
- iaa_crypto_debugfs_root, &total_decomp_bytes_in);
+
+ debugfs_create_file("global_stats", 0644, iaa_crypto_debugfs_root, NULL,
+ &global_stats_fops);
debugfs_create_file("wq_stats", 0644, iaa_crypto_debugfs_root, NULL,
&wq_stats_fops);
debugfs_create_file("stats_reset", 0644, iaa_crypto_debugfs_root, NULL,
diff --git a/drivers/crypto/intel/iaa/iaa_crypto_stats.h b/drivers/crypto/intel/iaa/iaa_crypto_stats.h
index c10b87b86fa4..3787a5f507eb 100644
--- a/drivers/crypto/intel/iaa/iaa_crypto_stats.h
+++ b/drivers/crypto/intel/iaa/iaa_crypto_stats.h
@@ -13,10 +13,6 @@ void update_total_comp_bytes_out(int n);
void update_total_decomp_calls(void);
void update_total_sw_decomp_calls(void);
void update_total_decomp_bytes_in(int n);
-void update_max_comp_delay_ns(u64 start_time_ns);
-void update_max_decomp_delay_ns(u64 start_time_ns);
-void update_max_acomp_delay_ns(u64 start_time_ns);
-void update_max_adecomp_delay_ns(u64 start_time_ns);
void update_completion_einval_errs(void);
void update_completion_timeout_errs(void);
void update_completion_comp_buf_overflow_errs(void);
@@ -35,10 +31,6 @@ static inline void update_total_comp_bytes_out(int n) {}
static inline void update_total_decomp_calls(void) {}
static inline void update_total_sw_decomp_calls(void) {}
static inline void update_total_decomp_bytes_in(int n) {}
-static inline void update_max_comp_delay_ns(u64 start_time_ns) {}
-static inline void update_max_decomp_delay_ns(u64 start_time_ns) {}
-static inline void update_max_acomp_delay_ns(u64 start_time_ns) {}
-static inline void update_max_adecomp_delay_ns(u64 start_time_ns) {}
static inline void update_completion_einval_errs(void) {}
static inline void update_completion_timeout_errs(void) {}
static inline void update_completion_comp_buf_overflow_errs(void) {}
diff --git a/drivers/crypto/intel/qat/Kconfig b/drivers/crypto/intel/qat/Kconfig
index c120f6715a09..02fb8abe4e6e 100644
--- a/drivers/crypto/intel/qat/Kconfig
+++ b/drivers/crypto/intel/qat/Kconfig
@@ -106,3 +106,17 @@ config CRYPTO_DEV_QAT_C62XVF
To compile this as a module, choose M here: the module
will be called qat_c62xvf.
+
+config CRYPTO_DEV_QAT_ERROR_INJECTION
+ bool "Support for Intel(R) QAT Devices Heartbeat Error Injection"
+ depends on CRYPTO_DEV_QAT
+ depends on DEBUG_FS
+ help
+ Enables a mechanism that allows to inject a heartbeat error on
+ Intel(R) QuickAssist devices for testing purposes.
+
+ This is intended for developer use only.
+ If unsure, say N.
+
+ This functionality is available via debugfs entry of the Intel(R)
+ QuickAssist device
diff --git a/drivers/crypto/intel/qat/qat_420xx/Makefile b/drivers/crypto/intel/qat/qat_420xx/Makefile
index a90fbe00b3c8..45728659fbc4 100644
--- a/drivers/crypto/intel/qat/qat_420xx/Makefile
+++ b/drivers/crypto/intel/qat/qat_420xx/Makefile
@@ -1,4 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
-ccflags-y := -I $(srctree)/$(src)/../qat_common
+ccflags-y := -I $(src)/../qat_common
obj-$(CONFIG_CRYPTO_DEV_QAT_420XX) += qat_420xx.o
qat_420xx-objs := adf_drv.o adf_420xx_hw_data.o
diff --git a/drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c b/drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c
index a87d29ae724f..78f0ea49254d 100644
--- a/drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c
+++ b/drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c
@@ -10,12 +10,14 @@
#include <adf_fw_config.h>
#include <adf_gen4_config.h>
#include <adf_gen4_dc.h>
+#include <adf_gen4_hw_csr_data.h>
#include <adf_gen4_hw_data.h>
#include <adf_gen4_pfvf.h>
#include <adf_gen4_pm.h>
#include <adf_gen4_ras.h>
#include <adf_gen4_timer.h>
#include <adf_gen4_tl.h>
+#include <adf_gen4_vf_mig.h>
#include "adf_420xx_hw_data.h"
#include "icp_qat_hw.h"
@@ -296,7 +298,7 @@ static const u32 *adf_get_arbiter_mapping(struct adf_accel_dev *accel_dev)
{
if (adf_gen4_init_thd2arb_map(accel_dev))
dev_warn(&GET_DEV(accel_dev),
- "Generate of the thread to arbiter map failed");
+ "Failed to generate thread to arbiter mapping");
return GET_HW_DATA(accel_dev)->thd_to_arb_map;
}
@@ -361,53 +363,6 @@ static u32 get_ena_thd_mask(struct adf_accel_dev *accel_dev, u32 obj_num)
}
}
-static u16 get_ring_to_svc_map(struct adf_accel_dev *accel_dev)
-{
- enum adf_cfg_service_type rps[RP_GROUP_COUNT] = { };
- const struct adf_fw_config *fw_config;
- u16 ring_to_svc_map;
- int i, j;
-
- fw_config = get_fw_config(accel_dev);
- if (!fw_config)
- return 0;
-
- for (i = 0; i < RP_GROUP_COUNT; i++) {
- switch (fw_config[i].ae_mask) {
- case ADF_AE_GROUP_0:
- j = RP_GROUP_0;
- break;
- case ADF_AE_GROUP_1:
- j = RP_GROUP_1;
- break;
- default:
- return 0;
- }
-
- switch (fw_config[i].obj) {
- case ADF_FW_SYM_OBJ:
- rps[j] = SYM;
- break;
- case ADF_FW_ASYM_OBJ:
- rps[j] = ASYM;
- break;
- case ADF_FW_DC_OBJ:
- rps[j] = COMP;
- break;
- default:
- rps[j] = 0;
- break;
- }
- }
-
- ring_to_svc_map = rps[RP_GROUP_0] << ADF_CFG_SERV_RING_PAIR_0_SHIFT |
- rps[RP_GROUP_1] << ADF_CFG_SERV_RING_PAIR_1_SHIFT |
- rps[RP_GROUP_0] << ADF_CFG_SERV_RING_PAIR_2_SHIFT |
- rps[RP_GROUP_1] << ADF_CFG_SERV_RING_PAIR_3_SHIFT;
-
- return ring_to_svc_map;
-}
-
static const char *uof_get_name(struct adf_accel_dev *accel_dev, u32 obj_num,
const char * const fw_objs[], int num_objs)
{
@@ -433,6 +388,20 @@ static const char *uof_get_name_420xx(struct adf_accel_dev *accel_dev, u32 obj_n
return uof_get_name(accel_dev, obj_num, adf_420xx_fw_objs, num_fw_objs);
}
+static int uof_get_obj_type(struct adf_accel_dev *accel_dev, u32 obj_num)
+{
+ const struct adf_fw_config *fw_config;
+
+ if (obj_num >= uof_get_num_objs(accel_dev))
+ return -EINVAL;
+
+ fw_config = get_fw_config(accel_dev);
+ if (!fw_config)
+ return -EINVAL;
+
+ return fw_config[obj_num].obj;
+}
+
static u32 uof_get_ae_mask(struct adf_accel_dev *accel_dev, u32 obj_num)
{
const struct adf_fw_config *fw_config;
@@ -496,12 +465,13 @@ void adf_init_hw_data_420xx(struct adf_hw_device_data *hw_data, u32 dev_id)
hw_data->fw_mmp_name = ADF_420XX_MMP;
hw_data->uof_get_name = uof_get_name_420xx;
hw_data->uof_get_num_objs = uof_get_num_objs;
+ hw_data->uof_get_obj_type = uof_get_obj_type;
hw_data->uof_get_ae_mask = uof_get_ae_mask;
hw_data->get_rp_group = get_rp_group;
hw_data->get_ena_thd_mask = get_ena_thd_mask;
hw_data->set_msix_rttable = adf_gen4_set_msix_default_rttable;
hw_data->set_ssm_wdtimer = adf_gen4_set_ssm_wdtimer;
- hw_data->get_ring_to_svc_map = get_ring_to_svc_map;
+ hw_data->get_ring_to_svc_map = adf_gen4_get_ring_to_svc_map;
hw_data->disable_iov = adf_disable_sriov;
hw_data->ring_pair_reset = adf_gen4_ring_pair_reset;
hw_data->enable_pm = adf_gen4_enable_pm;
@@ -519,6 +489,7 @@ void adf_init_hw_data_420xx(struct adf_hw_device_data *hw_data, u32 dev_id)
adf_gen4_init_dc_ops(&hw_data->dc_ops);
adf_gen4_init_ras_ops(&hw_data->ras_ops);
adf_gen4_init_tl_data(&hw_data->tl_data);
+ adf_gen4_init_vf_mig_ops(&hw_data->vfmig_ops);
adf_init_rl_data(&hw_data->rl_data);
}
diff --git a/drivers/crypto/intel/qat/qat_4xxx/Makefile b/drivers/crypto/intel/qat/qat_4xxx/Makefile
index ff9c8b5897ea..9ba202079a22 100644
--- a/drivers/crypto/intel/qat/qat_4xxx/Makefile
+++ b/drivers/crypto/intel/qat/qat_4xxx/Makefile
@@ -1,4 +1,4 @@
# SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only)
-ccflags-y := -I $(srctree)/$(src)/../qat_common
+ccflags-y := -I $(src)/../qat_common
obj-$(CONFIG_CRYPTO_DEV_QAT_4XXX) += qat_4xxx.o
qat_4xxx-objs := adf_drv.o adf_4xxx_hw_data.o
diff --git a/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c b/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c
index 94a0ebb03d8c..9fd7ec53b9f3 100644
--- a/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c
+++ b/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c
@@ -10,12 +10,14 @@
#include <adf_fw_config.h>
#include <adf_gen4_config.h>
#include <adf_gen4_dc.h>
+#include <adf_gen4_hw_csr_data.h>
#include <adf_gen4_hw_data.h>
#include <adf_gen4_pfvf.h>
#include <adf_gen4_pm.h>
#include "adf_gen4_ras.h"
#include <adf_gen4_timer.h>
#include <adf_gen4_tl.h>
+#include <adf_gen4_vf_mig.h>
#include "adf_4xxx_hw_data.h"
#include "icp_qat_hw.h"
@@ -208,7 +210,7 @@ static const u32 *adf_get_arbiter_mapping(struct adf_accel_dev *accel_dev)
{
if (adf_gen4_init_thd2arb_map(accel_dev))
dev_warn(&GET_DEV(accel_dev),
- "Generate of the thread to arbiter map failed");
+ "Failed to generate thread to arbiter mapping");
return GET_HW_DATA(accel_dev)->thd_to_arb_map;
}
@@ -320,53 +322,6 @@ static u32 get_ena_thd_mask_401xx(struct adf_accel_dev *accel_dev, u32 obj_num)
}
}
-static u16 get_ring_to_svc_map(struct adf_accel_dev *accel_dev)
-{
- enum adf_cfg_service_type rps[RP_GROUP_COUNT];
- const struct adf_fw_config *fw_config;
- u16 ring_to_svc_map;
- int i, j;
-
- fw_config = get_fw_config(accel_dev);
- if (!fw_config)
- return 0;
-
- for (i = 0; i < RP_GROUP_COUNT; i++) {
- switch (fw_config[i].ae_mask) {
- case ADF_AE_GROUP_0:
- j = RP_GROUP_0;
- break;
- case ADF_AE_GROUP_1:
- j = RP_GROUP_1;
- break;
- default:
- return 0;
- }
-
- switch (fw_config[i].obj) {
- case ADF_FW_SYM_OBJ:
- rps[j] = SYM;
- break;
- case ADF_FW_ASYM_OBJ:
- rps[j] = ASYM;
- break;
- case ADF_FW_DC_OBJ:
- rps[j] = COMP;
- break;
- default:
- rps[j] = 0;
- break;
- }
- }
-
- ring_to_svc_map = rps[RP_GROUP_0] << ADF_CFG_SERV_RING_PAIR_0_SHIFT |
- rps[RP_GROUP_1] << ADF_CFG_SERV_RING_PAIR_1_SHIFT |
- rps[RP_GROUP_0] << ADF_CFG_SERV_RING_PAIR_2_SHIFT |
- rps[RP_GROUP_1] << ADF_CFG_SERV_RING_PAIR_3_SHIFT;
-
- return ring_to_svc_map;
-}
-
static const char *uof_get_name(struct adf_accel_dev *accel_dev, u32 obj_num,
const char * const fw_objs[], int num_objs)
{
@@ -399,6 +354,20 @@ static const char *uof_get_name_402xx(struct adf_accel_dev *accel_dev, u32 obj_n
return uof_get_name(accel_dev, obj_num, adf_402xx_fw_objs, num_fw_objs);
}
+static int uof_get_obj_type(struct adf_accel_dev *accel_dev, u32 obj_num)
+{
+ const struct adf_fw_config *fw_config;
+
+ if (obj_num >= uof_get_num_objs(accel_dev))
+ return -EINVAL;
+
+ fw_config = get_fw_config(accel_dev);
+ if (!fw_config)
+ return -EINVAL;
+
+ return fw_config[obj_num].obj;
+}
+
static u32 uof_get_ae_mask(struct adf_accel_dev *accel_dev, u32 obj_num)
{
const struct adf_fw_config *fw_config;
@@ -479,13 +448,16 @@ void adf_init_hw_data_4xxx(struct adf_hw_device_data *hw_data, u32 dev_id)
break;
}
hw_data->uof_get_num_objs = uof_get_num_objs;
+ hw_data->uof_get_obj_type = uof_get_obj_type;
hw_data->uof_get_ae_mask = uof_get_ae_mask;
hw_data->get_rp_group = get_rp_group;
hw_data->set_msix_rttable = adf_gen4_set_msix_default_rttable;
hw_data->set_ssm_wdtimer = adf_gen4_set_ssm_wdtimer;
- hw_data->get_ring_to_svc_map = get_ring_to_svc_map;
+ hw_data->get_ring_to_svc_map = adf_gen4_get_ring_to_svc_map;
hw_data->disable_iov = adf_disable_sriov;
hw_data->ring_pair_reset = adf_gen4_ring_pair_reset;
+ hw_data->bank_state_save = adf_gen4_bank_state_save;
+ hw_data->bank_state_restore = adf_gen4_bank_state_restore;
hw_data->enable_pm = adf_gen4_enable_pm;
hw_data->handle_pm_interrupt = adf_gen4_handle_pm_interrupt;
hw_data->dev_config = adf_gen4_dev_config;
@@ -501,6 +473,7 @@ void adf_init_hw_data_4xxx(struct adf_hw_device_data *hw_data, u32 dev_id)
adf_gen4_init_dc_ops(&hw_data->dc_ops);
adf_gen4_init_ras_ops(&hw_data->ras_ops);
adf_gen4_init_tl_data(&hw_data->tl_data);
+ adf_gen4_init_vf_mig_ops(&hw_data->vfmig_ops);
adf_init_rl_data(&hw_data->rl_data);
}
diff --git a/drivers/crypto/intel/qat/qat_4xxx/adf_drv.c b/drivers/crypto/intel/qat/qat_4xxx/adf_drv.c
index 9762f2bf7727..d26564cebdec 100644
--- a/drivers/crypto/intel/qat/qat_4xxx/adf_drv.c
+++ b/drivers/crypto/intel/qat/qat_4xxx/adf_drv.c
@@ -197,7 +197,9 @@ module_pci_driver(adf_driver);
MODULE_LICENSE("Dual BSD/GPL");
MODULE_AUTHOR("Intel");
MODULE_FIRMWARE(ADF_4XXX_FW);
+MODULE_FIRMWARE(ADF_402XX_FW);
MODULE_FIRMWARE(ADF_4XXX_MMP);
+MODULE_FIRMWARE(ADF_402XX_MMP);
MODULE_DESCRIPTION("Intel(R) QuickAssist Technology");
MODULE_VERSION(ADF_DRV_VERSION);
MODULE_SOFTDEP("pre: crypto-intel_qat");
diff --git a/drivers/crypto/intel/qat/qat_c3xxx/Makefile b/drivers/crypto/intel/qat/qat_c3xxx/Makefile
index 92ef416ccc78..7a06ad519bfc 100644
--- a/drivers/crypto/intel/qat/qat_c3xxx/Makefile
+++ b/drivers/crypto/intel/qat/qat_c3xxx/Makefile
@@ -1,4 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
-ccflags-y := -I $(srctree)/$(src)/../qat_common
+ccflags-y := -I $(src)/../qat_common
obj-$(CONFIG_CRYPTO_DEV_QAT_C3XXX) += qat_c3xxx.o
qat_c3xxx-objs := adf_drv.o adf_c3xxx_hw_data.o
diff --git a/drivers/crypto/intel/qat/qat_c3xxx/adf_c3xxx_hw_data.c b/drivers/crypto/intel/qat/qat_c3xxx/adf_c3xxx_hw_data.c
index a882e0ea2279..201f9412c582 100644
--- a/drivers/crypto/intel/qat/qat_c3xxx/adf_c3xxx_hw_data.c
+++ b/drivers/crypto/intel/qat/qat_c3xxx/adf_c3xxx_hw_data.c
@@ -6,6 +6,7 @@
#include <adf_common_drv.h>
#include <adf_gen2_config.h>
#include <adf_gen2_dc.h>
+#include <adf_gen2_hw_csr_data.h>
#include <adf_gen2_hw_data.h>
#include <adf_gen2_pfvf.h>
#include "adf_c3xxx_hw_data.h"
diff --git a/drivers/crypto/intel/qat/qat_c3xxxvf/Makefile b/drivers/crypto/intel/qat/qat_c3xxxvf/Makefile
index b6d76825a92c..7ef633058c4f 100644
--- a/drivers/crypto/intel/qat/qat_c3xxxvf/Makefile
+++ b/drivers/crypto/intel/qat/qat_c3xxxvf/Makefile
@@ -1,4 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
-ccflags-y := -I $(srctree)/$(src)/../qat_common
+ccflags-y := -I $(src)/../qat_common
obj-$(CONFIG_CRYPTO_DEV_QAT_C3XXXVF) += qat_c3xxxvf.o
qat_c3xxxvf-objs := adf_drv.o adf_c3xxxvf_hw_data.o
diff --git a/drivers/crypto/intel/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c b/drivers/crypto/intel/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c
index 84d9486e04de..a512ca4efd3f 100644
--- a/drivers/crypto/intel/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c
+++ b/drivers/crypto/intel/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c
@@ -4,6 +4,7 @@
#include <adf_common_drv.h>
#include <adf_gen2_config.h>
#include <adf_gen2_dc.h>
+#include <adf_gen2_hw_csr_data.h>
#include <adf_gen2_hw_data.h>
#include <adf_gen2_pfvf.h>
#include <adf_pfvf_vf_msg.h>
diff --git a/drivers/crypto/intel/qat/qat_c62x/Makefile b/drivers/crypto/intel/qat/qat_c62x/Makefile
index d581f7c87d6c..cc9255b3b198 100644
--- a/drivers/crypto/intel/qat/qat_c62x/Makefile
+++ b/drivers/crypto/intel/qat/qat_c62x/Makefile
@@ -1,4 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
-ccflags-y := -I $(srctree)/$(src)/../qat_common
+ccflags-y := -I $(src)/../qat_common
obj-$(CONFIG_CRYPTO_DEV_QAT_C62X) += qat_c62x.o
qat_c62x-objs := adf_drv.o adf_c62x_hw_data.o
diff --git a/drivers/crypto/intel/qat/qat_c62x/adf_c62x_hw_data.c b/drivers/crypto/intel/qat/qat_c62x/adf_c62x_hw_data.c
index 48cf3eb7c734..6b5b0cf9c7c7 100644
--- a/drivers/crypto/intel/qat/qat_c62x/adf_c62x_hw_data.c
+++ b/drivers/crypto/intel/qat/qat_c62x/adf_c62x_hw_data.c
@@ -6,6 +6,7 @@
#include <adf_common_drv.h>
#include <adf_gen2_config.h>
#include <adf_gen2_dc.h>
+#include <adf_gen2_hw_csr_data.h>
#include <adf_gen2_hw_data.h>
#include <adf_gen2_pfvf.h>
#include "adf_c62x_hw_data.h"
diff --git a/drivers/crypto/intel/qat/qat_c62xvf/Makefile b/drivers/crypto/intel/qat/qat_c62xvf/Makefile
index 446c3d638605..256786662d60 100644
--- a/drivers/crypto/intel/qat/qat_c62xvf/Makefile
+++ b/drivers/crypto/intel/qat/qat_c62xvf/Makefile
@@ -1,4 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
-ccflags-y := -I $(srctree)/$(src)/../qat_common
+ccflags-y := -I $(src)/../qat_common
obj-$(CONFIG_CRYPTO_DEV_QAT_C62XVF) += qat_c62xvf.o
qat_c62xvf-objs := adf_drv.o adf_c62xvf_hw_data.o
diff --git a/drivers/crypto/intel/qat/qat_c62xvf/adf_c62xvf_hw_data.c b/drivers/crypto/intel/qat/qat_c62xvf/adf_c62xvf_hw_data.c
index 751d7aa57fc7..4aaaaf921734 100644
--- a/drivers/crypto/intel/qat/qat_c62xvf/adf_c62xvf_hw_data.c
+++ b/drivers/crypto/intel/qat/qat_c62xvf/adf_c62xvf_hw_data.c
@@ -4,6 +4,7 @@
#include <adf_common_drv.h>
#include <adf_gen2_config.h>
#include <adf_gen2_dc.h>
+#include <adf_gen2_hw_csr_data.h>
#include <adf_gen2_hw_data.h>
#include <adf_gen2_pfvf.h>
#include <adf_pfvf_vf_msg.h>
diff --git a/drivers/crypto/intel/qat/qat_common/Makefile b/drivers/crypto/intel/qat/qat_common/Makefile
index 6908727bff3b..6f9266edc9f1 100644
--- a/drivers/crypto/intel/qat/qat_common/Makefile
+++ b/drivers/crypto/intel/qat/qat_common/Makefile
@@ -14,16 +14,20 @@ intel_qat-objs := adf_cfg.o \
adf_hw_arbiter.o \
adf_sysfs.o \
adf_sysfs_ras_counters.o \
+ adf_gen2_hw_csr_data.o \
adf_gen2_hw_data.o \
adf_gen2_config.o \
adf_gen4_config.o \
+ adf_gen4_hw_csr_data.o \
adf_gen4_hw_data.o \
+ adf_gen4_vf_mig.o \
adf_gen4_pm.o \
adf_gen2_dc.o \
adf_gen4_dc.o \
adf_gen4_ras.o \
adf_gen4_timer.o \
adf_clock.o \
+ adf_mstate_mgr.o \
qat_crypto.o \
qat_compression.o \
qat_comp_algs.o \
@@ -52,4 +56,6 @@ intel_qat-$(CONFIG_DEBUG_FS) += adf_transport_debug.o \
intel_qat-$(CONFIG_PCI_IOV) += adf_sriov.o adf_vf_isr.o adf_pfvf_utils.o \
adf_pfvf_pf_msg.o adf_pfvf_pf_proto.o \
adf_pfvf_vf_msg.o adf_pfvf_vf_proto.o \
- adf_gen2_pfvf.o adf_gen4_pfvf.o
+ adf_gen2_pfvf.o adf_gen4_pfvf.o qat_mig_dev.o
+
+intel_qat-$(CONFIG_CRYPTO_DEV_QAT_ERROR_INJECTION) += adf_heartbeat_inject.o
diff --git a/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h b/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h
index a16c7e6edc65..7830ecb1a1f1 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h
+++ b/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h
@@ -9,6 +9,7 @@
#include <linux/pci.h>
#include <linux/ratelimit.h>
#include <linux/types.h>
+#include <linux/qat/qat_mig_dev.h>
#include "adf_cfg_common.h"
#include "adf_rl.h"
#include "adf_telemetry.h"
@@ -140,6 +141,40 @@ struct admin_info {
u32 mailbox_offset;
};
+struct ring_config {
+ u64 base;
+ u32 config;
+ u32 head;
+ u32 tail;
+ u32 reserved0;
+};
+
+struct bank_state {
+ u32 ringstat0;
+ u32 ringstat1;
+ u32 ringuostat;
+ u32 ringestat;
+ u32 ringnestat;
+ u32 ringnfstat;
+ u32 ringfstat;
+ u32 ringcstat0;
+ u32 ringcstat1;
+ u32 ringcstat2;
+ u32 ringcstat3;
+ u32 iaintflagen;
+ u32 iaintflagreg;
+ u32 iaintflagsrcsel0;
+ u32 iaintflagsrcsel1;
+ u32 iaintcolen;
+ u32 iaintcolctl;
+ u32 iaintflagandcolen;
+ u32 ringexpstat;
+ u32 ringexpintenable;
+ u32 ringsrvarben;
+ u32 reserved0;
+ struct ring_config rings[ADF_ETR_MAX_RINGS_PER_BANK];
+};
+
struct adf_hw_csr_ops {
u64 (*build_csr_ring_base_addr)(dma_addr_t addr, u32 size);
u32 (*read_csr_ring_head)(void __iomem *csr_base_addr, u32 bank,
@@ -150,22 +185,49 @@ struct adf_hw_csr_ops {
u32 ring);
void (*write_csr_ring_tail)(void __iomem *csr_base_addr, u32 bank,
u32 ring, u32 value);
+ u32 (*read_csr_stat)(void __iomem *csr_base_addr, u32 bank);
+ u32 (*read_csr_uo_stat)(void __iomem *csr_base_addr, u32 bank);
u32 (*read_csr_e_stat)(void __iomem *csr_base_addr, u32 bank);
+ u32 (*read_csr_ne_stat)(void __iomem *csr_base_addr, u32 bank);
+ u32 (*read_csr_nf_stat)(void __iomem *csr_base_addr, u32 bank);
+ u32 (*read_csr_f_stat)(void __iomem *csr_base_addr, u32 bank);
+ u32 (*read_csr_c_stat)(void __iomem *csr_base_addr, u32 bank);
+ u32 (*read_csr_exp_stat)(void __iomem *csr_base_addr, u32 bank);
+ u32 (*read_csr_exp_int_en)(void __iomem *csr_base_addr, u32 bank);
+ void (*write_csr_exp_int_en)(void __iomem *csr_base_addr, u32 bank,
+ u32 value);
+ u32 (*read_csr_ring_config)(void __iomem *csr_base_addr, u32 bank,
+ u32 ring);
void (*write_csr_ring_config)(void __iomem *csr_base_addr, u32 bank,
u32 ring, u32 value);
+ dma_addr_t (*read_csr_ring_base)(void __iomem *csr_base_addr, u32 bank,
+ u32 ring);
void (*write_csr_ring_base)(void __iomem *csr_base_addr, u32 bank,
u32 ring, dma_addr_t addr);
+ u32 (*read_csr_int_en)(void __iomem *csr_base_addr, u32 bank);
+ void (*write_csr_int_en)(void __iomem *csr_base_addr, u32 bank,
+ u32 value);
+ u32 (*read_csr_int_flag)(void __iomem *csr_base_addr, u32 bank);
void (*write_csr_int_flag)(void __iomem *csr_base_addr, u32 bank,
u32 value);
+ u32 (*read_csr_int_srcsel)(void __iomem *csr_base_addr, u32 bank);
void (*write_csr_int_srcsel)(void __iomem *csr_base_addr, u32 bank);
+ void (*write_csr_int_srcsel_w_val)(void __iomem *csr_base_addr,
+ u32 bank, u32 value);
+ u32 (*read_csr_int_col_en)(void __iomem *csr_base_addr, u32 bank);
void (*write_csr_int_col_en)(void __iomem *csr_base_addr, u32 bank,
u32 value);
+ u32 (*read_csr_int_col_ctl)(void __iomem *csr_base_addr, u32 bank);
void (*write_csr_int_col_ctl)(void __iomem *csr_base_addr, u32 bank,
u32 value);
+ u32 (*read_csr_int_flag_and_col)(void __iomem *csr_base_addr,
+ u32 bank);
void (*write_csr_int_flag_and_col)(void __iomem *csr_base_addr,
u32 bank, u32 value);
+ u32 (*read_csr_ring_srv_arb_en)(void __iomem *csr_base_addr, u32 bank);
void (*write_csr_ring_srv_arb_en)(void __iomem *csr_base_addr, u32 bank,
u32 value);
+ u32 (*get_int_col_ctl_enable_mask)(void);
};
struct adf_cfg_device_data;
@@ -197,6 +259,20 @@ struct adf_dc_ops {
void (*build_deflate_ctx)(void *ctx);
};
+struct qat_migdev_ops {
+ int (*init)(struct qat_mig_dev *mdev);
+ void (*cleanup)(struct qat_mig_dev *mdev);
+ void (*reset)(struct qat_mig_dev *mdev);
+ int (*open)(struct qat_mig_dev *mdev);
+ void (*close)(struct qat_mig_dev *mdev);
+ int (*suspend)(struct qat_mig_dev *mdev);
+ int (*resume)(struct qat_mig_dev *mdev);
+ int (*save_state)(struct qat_mig_dev *mdev);
+ int (*save_setup)(struct qat_mig_dev *mdev);
+ int (*load_state)(struct qat_mig_dev *mdev);
+ int (*load_setup)(struct qat_mig_dev *mdev, int size);
+};
+
struct adf_dev_err_mask {
u32 cppagentcmdpar_mask;
u32 parerr_ath_cph_mask;
@@ -244,10 +320,15 @@ struct adf_hw_device_data {
void (*enable_ints)(struct adf_accel_dev *accel_dev);
void (*set_ssm_wdtimer)(struct adf_accel_dev *accel_dev);
int (*ring_pair_reset)(struct adf_accel_dev *accel_dev, u32 bank_nr);
+ int (*bank_state_save)(struct adf_accel_dev *accel_dev, u32 bank_number,
+ struct bank_state *state);
+ int (*bank_state_restore)(struct adf_accel_dev *accel_dev,
+ u32 bank_number, struct bank_state *state);
void (*reset_device)(struct adf_accel_dev *accel_dev);
void (*set_msix_rttable)(struct adf_accel_dev *accel_dev);
const char *(*uof_get_name)(struct adf_accel_dev *accel_dev, u32 obj_num);
u32 (*uof_get_num_objs)(struct adf_accel_dev *accel_dev);
+ int (*uof_get_obj_type)(struct adf_accel_dev *accel_dev, u32 obj_num);
u32 (*uof_get_ae_mask)(struct adf_accel_dev *accel_dev, u32 obj_num);
int (*get_rp_group)(struct adf_accel_dev *accel_dev, u32 ae_mask);
u32 (*get_ena_thd_mask)(struct adf_accel_dev *accel_dev, u32 obj_num);
@@ -259,6 +340,7 @@ struct adf_hw_device_data {
struct adf_dev_err_mask dev_err_mask;
struct adf_rl_hw_data rl_data;
struct adf_tl_hw_data tl_data;
+ struct qat_migdev_ops vfmig_ops;
const char *fw_name;
const char *fw_mmp_name;
u32 fuses;
@@ -315,6 +397,7 @@ struct adf_hw_device_data {
#define GET_CSR_OPS(accel_dev) (&(accel_dev)->hw_device->csr_ops)
#define GET_PFVF_OPS(accel_dev) (&(accel_dev)->hw_device->pfvf_ops)
#define GET_DC_OPS(accel_dev) (&(accel_dev)->hw_device->dc_ops)
+#define GET_VFMIG_OPS(accel_dev) (&(accel_dev)->hw_device->vfmig_ops)
#define GET_TL_DATA(accel_dev) GET_HW_DATA(accel_dev)->tl_data
#define accel_to_pci_dev(accel_ptr) accel_ptr->accel_pci_dev.pci_dev
@@ -329,10 +412,17 @@ struct adf_fw_loader_data {
struct adf_accel_vf_info {
struct adf_accel_dev *accel_dev;
struct mutex pf2vf_lock; /* protect CSR access for PF2VF messages */
+ struct mutex pfvf_mig_lock; /* protects PFVF state for migration */
struct ratelimit_state vf2pf_ratelimit;
u32 vf_nr;
bool init;
+ bool restarting;
u8 vf_compat_ver;
+ /*
+ * Private area used for device migration.
+ * Memory allocation and free is managed by migration driver.
+ */
+ void *mig_priv;
};
struct adf_dc_data {
@@ -401,6 +491,7 @@ struct adf_accel_dev {
struct adf_error_counters ras_errors;
struct mutex state_lock; /* protect state of the device */
bool is_vf;
+ bool autoreset_on_error;
u32 accel_id;
};
#endif
diff --git a/drivers/crypto/intel/qat/qat_common/adf_aer.c b/drivers/crypto/intel/qat/qat_common/adf_aer.c
index a39e70bd4b21..04260f61d042 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_aer.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_aer.c
@@ -7,8 +7,15 @@
#include <linux/delay.h>
#include "adf_accel_devices.h"
#include "adf_common_drv.h"
+#include "adf_pfvf_pf_msg.h"
+
+struct adf_fatal_error_data {
+ struct adf_accel_dev *accel_dev;
+ struct work_struct work;
+};
static struct workqueue_struct *device_reset_wq;
+static struct workqueue_struct *device_sriov_wq;
static pci_ers_result_t adf_error_detected(struct pci_dev *pdev,
pci_channel_state_t state)
@@ -26,6 +33,19 @@ static pci_ers_result_t adf_error_detected(struct pci_dev *pdev,
return PCI_ERS_RESULT_DISCONNECT;
}
+ set_bit(ADF_STATUS_RESTARTING, &accel_dev->status);
+ if (accel_dev->hw_device->exit_arb) {
+ dev_dbg(&pdev->dev, "Disabling arbitration\n");
+ accel_dev->hw_device->exit_arb(accel_dev);
+ }
+ adf_error_notifier(accel_dev);
+ adf_pf2vf_notify_fatal_error(accel_dev);
+ adf_dev_restarting_notify(accel_dev);
+ adf_pf2vf_notify_restarting(accel_dev);
+ adf_pf2vf_wait_for_restarting_complete(accel_dev);
+ pci_clear_master(pdev);
+ adf_dev_down(accel_dev, false);
+
return PCI_ERS_RESULT_NEED_RESET;
}
@@ -37,6 +57,13 @@ struct adf_reset_dev_data {
struct work_struct reset_work;
};
+/* sriov dev data */
+struct adf_sriov_dev_data {
+ struct adf_accel_dev *accel_dev;
+ struct completion compl;
+ struct work_struct sriov_work;
+};
+
void adf_reset_sbr(struct adf_accel_dev *accel_dev)
{
struct pci_dev *pdev = accel_to_pci_dev(accel_dev);
@@ -82,11 +109,22 @@ void adf_dev_restore(struct adf_accel_dev *accel_dev)
}
}
+static void adf_device_sriov_worker(struct work_struct *work)
+{
+ struct adf_sriov_dev_data *sriov_data =
+ container_of(work, struct adf_sriov_dev_data, sriov_work);
+
+ adf_reenable_sriov(sriov_data->accel_dev);
+ complete(&sriov_data->compl);
+}
+
static void adf_device_reset_worker(struct work_struct *work)
{
struct adf_reset_dev_data *reset_data =
container_of(work, struct adf_reset_dev_data, reset_work);
struct adf_accel_dev *accel_dev = reset_data->accel_dev;
+ unsigned long wait_jiffies = msecs_to_jiffies(10000);
+ struct adf_sriov_dev_data sriov_data;
adf_dev_restarting_notify(accel_dev);
if (adf_dev_restart(accel_dev)) {
@@ -97,14 +135,22 @@ static void adf_device_reset_worker(struct work_struct *work)
WARN(1, "QAT: device restart failed. Device is unusable\n");
return;
}
+
+ sriov_data.accel_dev = accel_dev;
+ init_completion(&sriov_data.compl);
+ INIT_WORK(&sriov_data.sriov_work, adf_device_sriov_worker);
+ queue_work(device_sriov_wq, &sriov_data.sriov_work);
+ if (wait_for_completion_timeout(&sriov_data.compl, wait_jiffies))
+ adf_pf2vf_notify_restarted(accel_dev);
+
adf_dev_restarted_notify(accel_dev);
clear_bit(ADF_STATUS_RESTARTING, &accel_dev->status);
/* The dev is back alive. Notify the caller if in sync mode */
- if (reset_data->mode == ADF_DEV_RESET_SYNC)
- complete(&reset_data->compl);
- else
+ if (reset_data->mode == ADF_DEV_RESET_ASYNC)
kfree(reset_data);
+ else
+ complete(&reset_data->compl);
}
static int adf_dev_aer_schedule_reset(struct adf_accel_dev *accel_dev,
@@ -136,6 +182,7 @@ static int adf_dev_aer_schedule_reset(struct adf_accel_dev *accel_dev,
if (!timeout) {
dev_err(&GET_DEV(accel_dev),
"Reset device timeout expired\n");
+ cancel_work_sync(&reset_data->reset_work);
ret = -EFAULT;
}
kfree(reset_data);
@@ -147,14 +194,25 @@ static int adf_dev_aer_schedule_reset(struct adf_accel_dev *accel_dev,
static pci_ers_result_t adf_slot_reset(struct pci_dev *pdev)
{
struct adf_accel_dev *accel_dev = adf_devmgr_pci_to_accel_dev(pdev);
+ int res = 0;
if (!accel_dev) {
pr_err("QAT: Can't find acceleration device\n");
return PCI_ERS_RESULT_DISCONNECT;
}
- if (adf_dev_aer_schedule_reset(accel_dev, ADF_DEV_RESET_SYNC))
+
+ if (!pdev->is_busmaster)
+ pci_set_master(pdev);
+ pci_restore_state(pdev);
+ pci_save_state(pdev);
+ res = adf_dev_up(accel_dev, false);
+ if (res && res != -EALREADY)
return PCI_ERS_RESULT_DISCONNECT;
+ adf_reenable_sriov(accel_dev);
+ adf_pf2vf_notify_restarted(accel_dev);
+ adf_dev_restarted_notify(accel_dev);
+ clear_bit(ADF_STATUS_RESTARTING, &accel_dev->status);
return PCI_ERS_RESULT_RECOVERED;
}
@@ -171,11 +229,62 @@ const struct pci_error_handlers adf_err_handler = {
};
EXPORT_SYMBOL_GPL(adf_err_handler);
+int adf_dev_autoreset(struct adf_accel_dev *accel_dev)
+{
+ if (accel_dev->autoreset_on_error)
+ return adf_dev_aer_schedule_reset(accel_dev, ADF_DEV_RESET_ASYNC);
+
+ return 0;
+}
+
+static void adf_notify_fatal_error_worker(struct work_struct *work)
+{
+ struct adf_fatal_error_data *wq_data =
+ container_of(work, struct adf_fatal_error_data, work);
+ struct adf_accel_dev *accel_dev = wq_data->accel_dev;
+ struct adf_hw_device_data *hw_device = accel_dev->hw_device;
+
+ adf_error_notifier(accel_dev);
+
+ if (!accel_dev->is_vf) {
+ /* Disable arbitration to stop processing of new requests */
+ if (accel_dev->autoreset_on_error && hw_device->exit_arb)
+ hw_device->exit_arb(accel_dev);
+ if (accel_dev->pf.vf_info)
+ adf_pf2vf_notify_fatal_error(accel_dev);
+ adf_dev_autoreset(accel_dev);
+ }
+
+ kfree(wq_data);
+}
+
+int adf_notify_fatal_error(struct adf_accel_dev *accel_dev)
+{
+ struct adf_fatal_error_data *wq_data;
+
+ wq_data = kzalloc(sizeof(*wq_data), GFP_ATOMIC);
+ if (!wq_data)
+ return -ENOMEM;
+
+ wq_data->accel_dev = accel_dev;
+ INIT_WORK(&wq_data->work, adf_notify_fatal_error_worker);
+ adf_misc_wq_queue_work(&wq_data->work);
+
+ return 0;
+}
+
int adf_init_aer(void)
{
device_reset_wq = alloc_workqueue("qat_device_reset_wq",
WQ_MEM_RECLAIM, 0);
- return !device_reset_wq ? -EFAULT : 0;
+ if (!device_reset_wq)
+ return -EFAULT;
+
+ device_sriov_wq = alloc_workqueue("qat_device_sriov_wq", 0, 0);
+ if (!device_sriov_wq)
+ return -EFAULT;
+
+ return 0;
}
void adf_exit_aer(void)
@@ -183,4 +292,8 @@ void adf_exit_aer(void)
if (device_reset_wq)
destroy_workqueue(device_reset_wq);
device_reset_wq = NULL;
+
+ if (device_sriov_wq)
+ destroy_workqueue(device_sriov_wq);
+ device_sriov_wq = NULL;
}
diff --git a/drivers/crypto/intel/qat/qat_common/adf_cfg_strings.h b/drivers/crypto/intel/qat/qat_common/adf_cfg_strings.h
index 322b76903a73..e015ad6cace2 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_cfg_strings.h
+++ b/drivers/crypto/intel/qat/qat_common/adf_cfg_strings.h
@@ -49,5 +49,6 @@
ADF_ETRMGR_BANK "%d" ADF_ETRMGR_CORE_AFFINITY
#define ADF_ACCEL_STR "Accelerator%d"
#define ADF_HEARTBEAT_TIMER "HeartbeatTimer"
+#define ADF_SRIOV_ENABLED "SriovEnabled"
#endif
diff --git a/drivers/crypto/intel/qat/qat_common/adf_clock.c b/drivers/crypto/intel/qat/qat_common/adf_clock.c
index 01e0a389e462..cf89f57de2a7 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_clock.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_clock.c
@@ -83,6 +83,9 @@ static int measure_clock(struct adf_accel_dev *accel_dev, u32 *frequency)
}
delta_us = timespec_to_us(&ts3) - timespec_to_us(&ts1);
+ if (!delta_us)
+ return -EINVAL;
+
temp = (timestamp2 - timestamp1) * ME_CLK_DIVIDER * 10;
temp = DIV_ROUND_CLOSEST_ULL(temp, delta_us);
/*
diff --git a/drivers/crypto/intel/qat/qat_common/adf_cnv_dbgfs.c b/drivers/crypto/intel/qat/qat_common/adf_cnv_dbgfs.c
index 07119c487da0..627953a72d47 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_cnv_dbgfs.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_cnv_dbgfs.c
@@ -16,7 +16,6 @@
#define CNV_ERR_INFO_MASK GENMASK(11, 0)
#define CNV_ERR_TYPE_MASK GENMASK(15, 12)
-#define CNV_SLICE_ERR_MASK GENMASK(7, 0)
#define CNV_SLICE_ERR_SIGN_BIT_INDEX 7
#define CNV_DELTA_ERR_SIGN_BIT_INDEX 11
diff --git a/drivers/crypto/intel/qat/qat_common/adf_common_drv.h b/drivers/crypto/intel/qat/qat_common/adf_common_drv.h
index f06188033a93..3bec9e20bad0 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_common_drv.h
+++ b/drivers/crypto/intel/qat/qat_common/adf_common_drv.h
@@ -40,6 +40,7 @@ enum adf_event {
ADF_EVENT_SHUTDOWN,
ADF_EVENT_RESTARTING,
ADF_EVENT_RESTARTED,
+ ADF_EVENT_FATAL_ERROR,
};
struct service_hndl {
@@ -60,6 +61,8 @@ int adf_dev_restart(struct adf_accel_dev *accel_dev);
void adf_devmgr_update_class_index(struct adf_hw_device_data *hw_data);
void adf_clean_vf_map(bool);
+int adf_notify_fatal_error(struct adf_accel_dev *accel_dev);
+void adf_error_notifier(struct adf_accel_dev *accel_dev);
int adf_devmgr_add_dev(struct adf_accel_dev *accel_dev,
struct adf_accel_dev *pf);
void adf_devmgr_rm_dev(struct adf_accel_dev *accel_dev,
@@ -84,12 +87,14 @@ int adf_ae_stop(struct adf_accel_dev *accel_dev);
extern const struct pci_error_handlers adf_err_handler;
void adf_reset_sbr(struct adf_accel_dev *accel_dev);
void adf_reset_flr(struct adf_accel_dev *accel_dev);
+int adf_dev_autoreset(struct adf_accel_dev *accel_dev);
void adf_dev_restore(struct adf_accel_dev *accel_dev);
int adf_init_aer(void);
void adf_exit_aer(void);
int adf_init_arb(struct adf_accel_dev *accel_dev);
void adf_exit_arb(struct adf_accel_dev *accel_dev);
void adf_update_ring_arb(struct adf_etr_ring_data *ring);
+int adf_disable_arb_thd(struct adf_accel_dev *accel_dev, u32 ae, u32 thr);
int adf_dev_get(struct adf_accel_dev *accel_dev);
void adf_dev_put(struct adf_accel_dev *accel_dev);
@@ -188,6 +193,7 @@ bool adf_misc_wq_queue_delayed_work(struct delayed_work *work,
#if defined(CONFIG_PCI_IOV)
int adf_sriov_configure(struct pci_dev *pdev, int numvfs);
void adf_disable_sriov(struct adf_accel_dev *accel_dev);
+void adf_reenable_sriov(struct adf_accel_dev *accel_dev);
void adf_enable_vf2pf_interrupts(struct adf_accel_dev *accel_dev, u32 vf_mask);
void adf_disable_all_vf2pf_interrupts(struct adf_accel_dev *accel_dev);
bool adf_recv_and_handle_pf2vf_msg(struct adf_accel_dev *accel_dev);
@@ -208,6 +214,10 @@ static inline void adf_disable_sriov(struct adf_accel_dev *accel_dev)
{
}
+static inline void adf_reenable_sriov(struct adf_accel_dev *accel_dev)
+{
+}
+
static inline int adf_init_pf_wq(void)
{
return 0;
@@ -238,6 +248,16 @@ static inline void __iomem *adf_get_pmisc_base(struct adf_accel_dev *accel_dev)
return pmisc->virt_addr;
}
+static inline void __iomem *adf_get_etr_base(struct adf_accel_dev *accel_dev)
+{
+ struct adf_hw_device_data *hw_data = accel_dev->hw_device;
+ struct adf_bar *etr;
+
+ etr = &GET_BARS(accel_dev)[hw_data->get_etr_bar_id(hw_data)];
+
+ return etr->virt_addr;
+}
+
static inline void __iomem *adf_get_aram_base(struct adf_accel_dev *accel_dev)
{
struct adf_hw_device_data *hw_data = accel_dev->hw_device;
diff --git a/drivers/crypto/intel/qat/qat_common/adf_dev_mgr.c b/drivers/crypto/intel/qat/qat_common/adf_dev_mgr.c
index 86ee36feefad..f07b748795f7 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_dev_mgr.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_dev_mgr.c
@@ -60,10 +60,10 @@ static int adf_get_vf_real_id(u32 fake)
/**
* adf_clean_vf_map() - Cleans VF id mapings
- *
- * Function cleans internal ids for virtual functions.
* @vf: flag indicating whether mappings is cleaned
* for vfs only or for vfs and pfs
+ *
+ * Function cleans internal ids for virtual functions.
*/
void adf_clean_vf_map(bool vf)
{
diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_csr_data.c b/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_csr_data.c
new file mode 100644
index 000000000000..650c9edd8a66
--- /dev/null
+++ b/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_csr_data.c
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2024 Intel Corporation */
+#include <linux/types.h>
+#include "adf_gen2_hw_csr_data.h"
+
+static u64 build_csr_ring_base_addr(dma_addr_t addr, u32 size)
+{
+ return BUILD_RING_BASE_ADDR(addr, size);
+}
+
+static u32 read_csr_ring_head(void __iomem *csr_base_addr, u32 bank, u32 ring)
+{
+ return READ_CSR_RING_HEAD(csr_base_addr, bank, ring);
+}
+
+static void write_csr_ring_head(void __iomem *csr_base_addr, u32 bank, u32 ring,
+ u32 value)
+{
+ WRITE_CSR_RING_HEAD(csr_base_addr, bank, ring, value);
+}
+
+static u32 read_csr_ring_tail(void __iomem *csr_base_addr, u32 bank, u32 ring)
+{
+ return READ_CSR_RING_TAIL(csr_base_addr, bank, ring);
+}
+
+static void write_csr_ring_tail(void __iomem *csr_base_addr, u32 bank, u32 ring,
+ u32 value)
+{
+ WRITE_CSR_RING_TAIL(csr_base_addr, bank, ring, value);
+}
+
+static u32 read_csr_e_stat(void __iomem *csr_base_addr, u32 bank)
+{
+ return READ_CSR_E_STAT(csr_base_addr, bank);
+}
+
+static void write_csr_ring_config(void __iomem *csr_base_addr, u32 bank,
+ u32 ring, u32 value)
+{
+ WRITE_CSR_RING_CONFIG(csr_base_addr, bank, ring, value);
+}
+
+static void write_csr_ring_base(void __iomem *csr_base_addr, u32 bank, u32 ring,
+ dma_addr_t addr)
+{
+ WRITE_CSR_RING_BASE(csr_base_addr, bank, ring, addr);
+}
+
+static void write_csr_int_flag(void __iomem *csr_base_addr, u32 bank, u32 value)
+{
+ WRITE_CSR_INT_FLAG(csr_base_addr, bank, value);
+}
+
+static void write_csr_int_srcsel(void __iomem *csr_base_addr, u32 bank)
+{
+ WRITE_CSR_INT_SRCSEL(csr_base_addr, bank);
+}
+
+static void write_csr_int_col_en(void __iomem *csr_base_addr, u32 bank,
+ u32 value)
+{
+ WRITE_CSR_INT_COL_EN(csr_base_addr, bank, value);
+}
+
+static void write_csr_int_col_ctl(void __iomem *csr_base_addr, u32 bank,
+ u32 value)
+{
+ WRITE_CSR_INT_COL_CTL(csr_base_addr, bank, value);
+}
+
+static void write_csr_int_flag_and_col(void __iomem *csr_base_addr, u32 bank,
+ u32 value)
+{
+ WRITE_CSR_INT_FLAG_AND_COL(csr_base_addr, bank, value);
+}
+
+static void write_csr_ring_srv_arb_en(void __iomem *csr_base_addr, u32 bank,
+ u32 value)
+{
+ WRITE_CSR_RING_SRV_ARB_EN(csr_base_addr, bank, value);
+}
+
+void adf_gen2_init_hw_csr_ops(struct adf_hw_csr_ops *csr_ops)
+{
+ csr_ops->build_csr_ring_base_addr = build_csr_ring_base_addr;
+ csr_ops->read_csr_ring_head = read_csr_ring_head;
+ csr_ops->write_csr_ring_head = write_csr_ring_head;
+ csr_ops->read_csr_ring_tail = read_csr_ring_tail;
+ csr_ops->write_csr_ring_tail = write_csr_ring_tail;
+ csr_ops->read_csr_e_stat = read_csr_e_stat;
+ csr_ops->write_csr_ring_config = write_csr_ring_config;
+ csr_ops->write_csr_ring_base = write_csr_ring_base;
+ csr_ops->write_csr_int_flag = write_csr_int_flag;
+ csr_ops->write_csr_int_srcsel = write_csr_int_srcsel;
+ csr_ops->write_csr_int_col_en = write_csr_int_col_en;
+ csr_ops->write_csr_int_col_ctl = write_csr_int_col_ctl;
+ csr_ops->write_csr_int_flag_and_col = write_csr_int_flag_and_col;
+ csr_ops->write_csr_ring_srv_arb_en = write_csr_ring_srv_arb_en;
+}
+EXPORT_SYMBOL_GPL(adf_gen2_init_hw_csr_ops);
diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_csr_data.h b/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_csr_data.h
new file mode 100644
index 000000000000..55058b0f9e52
--- /dev/null
+++ b/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_csr_data.h
@@ -0,0 +1,86 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright(c) 2024 Intel Corporation */
+#ifndef ADF_GEN2_HW_CSR_DATA_H_
+#define ADF_GEN2_HW_CSR_DATA_H_
+
+#include <linux/bitops.h>
+#include "adf_accel_devices.h"
+
+#define ADF_BANK_INT_SRC_SEL_MASK_0 0x4444444CUL
+#define ADF_BANK_INT_SRC_SEL_MASK_X 0x44444444UL
+#define ADF_RING_CSR_RING_CONFIG 0x000
+#define ADF_RING_CSR_RING_LBASE 0x040
+#define ADF_RING_CSR_RING_UBASE 0x080
+#define ADF_RING_CSR_RING_HEAD 0x0C0
+#define ADF_RING_CSR_RING_TAIL 0x100
+#define ADF_RING_CSR_E_STAT 0x14C
+#define ADF_RING_CSR_INT_FLAG 0x170
+#define ADF_RING_CSR_INT_SRCSEL 0x174
+#define ADF_RING_CSR_INT_SRCSEL_2 0x178
+#define ADF_RING_CSR_INT_COL_EN 0x17C
+#define ADF_RING_CSR_INT_COL_CTL 0x180
+#define ADF_RING_CSR_INT_FLAG_AND_COL 0x184
+#define ADF_RING_CSR_INT_COL_CTL_ENABLE 0x80000000
+#define ADF_RING_BUNDLE_SIZE 0x1000
+#define ADF_ARB_REG_SLOT 0x1000
+#define ADF_ARB_RINGSRVARBEN_OFFSET 0x19C
+
+#define BUILD_RING_BASE_ADDR(addr, size) \
+ (((addr) >> 6) & (GENMASK_ULL(63, 0) << (size)))
+#define READ_CSR_RING_HEAD(csr_base_addr, bank, ring) \
+ ADF_CSR_RD(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \
+ ADF_RING_CSR_RING_HEAD + ((ring) << 2))
+#define READ_CSR_RING_TAIL(csr_base_addr, bank, ring) \
+ ADF_CSR_RD(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \
+ ADF_RING_CSR_RING_TAIL + ((ring) << 2))
+#define READ_CSR_E_STAT(csr_base_addr, bank) \
+ ADF_CSR_RD(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \
+ ADF_RING_CSR_E_STAT)
+#define WRITE_CSR_RING_CONFIG(csr_base_addr, bank, ring, value) \
+ ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \
+ ADF_RING_CSR_RING_CONFIG + ((ring) << 2), value)
+#define WRITE_CSR_RING_BASE(csr_base_addr, bank, ring, value) \
+do { \
+ u32 l_base = 0, u_base = 0; \
+ l_base = (u32)((value) & 0xFFFFFFFF); \
+ u_base = (u32)(((value) & 0xFFFFFFFF00000000ULL) >> 32); \
+ ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \
+ ADF_RING_CSR_RING_LBASE + ((ring) << 2), l_base); \
+ ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \
+ ADF_RING_CSR_RING_UBASE + ((ring) << 2), u_base); \
+} while (0)
+
+#define WRITE_CSR_RING_HEAD(csr_base_addr, bank, ring, value) \
+ ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \
+ ADF_RING_CSR_RING_HEAD + ((ring) << 2), value)
+#define WRITE_CSR_RING_TAIL(csr_base_addr, bank, ring, value) \
+ ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \
+ ADF_RING_CSR_RING_TAIL + ((ring) << 2), value)
+#define WRITE_CSR_INT_FLAG(csr_base_addr, bank, value) \
+ ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \
+ ADF_RING_CSR_INT_FLAG, value)
+#define WRITE_CSR_INT_SRCSEL(csr_base_addr, bank) \
+do { \
+ ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \
+ ADF_RING_CSR_INT_SRCSEL, ADF_BANK_INT_SRC_SEL_MASK_0); \
+ ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \
+ ADF_RING_CSR_INT_SRCSEL_2, ADF_BANK_INT_SRC_SEL_MASK_X); \
+} while (0)
+#define WRITE_CSR_INT_COL_EN(csr_base_addr, bank, value) \
+ ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \
+ ADF_RING_CSR_INT_COL_EN, value)
+#define WRITE_CSR_INT_COL_CTL(csr_base_addr, bank, value) \
+ ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \
+ ADF_RING_CSR_INT_COL_CTL, \
+ ADF_RING_CSR_INT_COL_CTL_ENABLE | (value))
+#define WRITE_CSR_INT_FLAG_AND_COL(csr_base_addr, bank, value) \
+ ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \
+ ADF_RING_CSR_INT_FLAG_AND_COL, value)
+
+#define WRITE_CSR_RING_SRV_ARB_EN(csr_addr, index, value) \
+ ADF_CSR_WR(csr_addr, ADF_ARB_RINGSRVARBEN_OFFSET + \
+ (ADF_ARB_REG_SLOT * (index)), value)
+
+void adf_gen2_init_hw_csr_ops(struct adf_hw_csr_ops *csr_ops);
+
+#endif
diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_data.c b/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_data.c
index d1884547b5a1..1f64bf49b221 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_data.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_data.c
@@ -111,103 +111,6 @@ void adf_gen2_enable_ints(struct adf_accel_dev *accel_dev)
}
EXPORT_SYMBOL_GPL(adf_gen2_enable_ints);
-static u64 build_csr_ring_base_addr(dma_addr_t addr, u32 size)
-{
- return BUILD_RING_BASE_ADDR(addr, size);
-}
-
-static u32 read_csr_ring_head(void __iomem *csr_base_addr, u32 bank, u32 ring)
-{
- return READ_CSR_RING_HEAD(csr_base_addr, bank, ring);
-}
-
-static void write_csr_ring_head(void __iomem *csr_base_addr, u32 bank, u32 ring,
- u32 value)
-{
- WRITE_CSR_RING_HEAD(csr_base_addr, bank, ring, value);
-}
-
-static u32 read_csr_ring_tail(void __iomem *csr_base_addr, u32 bank, u32 ring)
-{
- return READ_CSR_RING_TAIL(csr_base_addr, bank, ring);
-}
-
-static void write_csr_ring_tail(void __iomem *csr_base_addr, u32 bank, u32 ring,
- u32 value)
-{
- WRITE_CSR_RING_TAIL(csr_base_addr, bank, ring, value);
-}
-
-static u32 read_csr_e_stat(void __iomem *csr_base_addr, u32 bank)
-{
- return READ_CSR_E_STAT(csr_base_addr, bank);
-}
-
-static void write_csr_ring_config(void __iomem *csr_base_addr, u32 bank,
- u32 ring, u32 value)
-{
- WRITE_CSR_RING_CONFIG(csr_base_addr, bank, ring, value);
-}
-
-static void write_csr_ring_base(void __iomem *csr_base_addr, u32 bank, u32 ring,
- dma_addr_t addr)
-{
- WRITE_CSR_RING_BASE(csr_base_addr, bank, ring, addr);
-}
-
-static void write_csr_int_flag(void __iomem *csr_base_addr, u32 bank, u32 value)
-{
- WRITE_CSR_INT_FLAG(csr_base_addr, bank, value);
-}
-
-static void write_csr_int_srcsel(void __iomem *csr_base_addr, u32 bank)
-{
- WRITE_CSR_INT_SRCSEL(csr_base_addr, bank);
-}
-
-static void write_csr_int_col_en(void __iomem *csr_base_addr, u32 bank,
- u32 value)
-{
- WRITE_CSR_INT_COL_EN(csr_base_addr, bank, value);
-}
-
-static void write_csr_int_col_ctl(void __iomem *csr_base_addr, u32 bank,
- u32 value)
-{
- WRITE_CSR_INT_COL_CTL(csr_base_addr, bank, value);
-}
-
-static void write_csr_int_flag_and_col(void __iomem *csr_base_addr, u32 bank,
- u32 value)
-{
- WRITE_CSR_INT_FLAG_AND_COL(csr_base_addr, bank, value);
-}
-
-static void write_csr_ring_srv_arb_en(void __iomem *csr_base_addr, u32 bank,
- u32 value)
-{
- WRITE_CSR_RING_SRV_ARB_EN(csr_base_addr, bank, value);
-}
-
-void adf_gen2_init_hw_csr_ops(struct adf_hw_csr_ops *csr_ops)
-{
- csr_ops->build_csr_ring_base_addr = build_csr_ring_base_addr;
- csr_ops->read_csr_ring_head = read_csr_ring_head;
- csr_ops->write_csr_ring_head = write_csr_ring_head;
- csr_ops->read_csr_ring_tail = read_csr_ring_tail;
- csr_ops->write_csr_ring_tail = write_csr_ring_tail;
- csr_ops->read_csr_e_stat = read_csr_e_stat;
- csr_ops->write_csr_ring_config = write_csr_ring_config;
- csr_ops->write_csr_ring_base = write_csr_ring_base;
- csr_ops->write_csr_int_flag = write_csr_int_flag;
- csr_ops->write_csr_int_srcsel = write_csr_int_srcsel;
- csr_ops->write_csr_int_col_en = write_csr_int_col_en;
- csr_ops->write_csr_int_col_ctl = write_csr_int_col_ctl;
- csr_ops->write_csr_int_flag_and_col = write_csr_int_flag_and_col;
- csr_ops->write_csr_ring_srv_arb_en = write_csr_ring_srv_arb_en;
-}
-EXPORT_SYMBOL_GPL(adf_gen2_init_hw_csr_ops);
-
u32 adf_gen2_get_accel_cap(struct adf_accel_dev *accel_dev)
{
struct adf_hw_device_data *hw_data = accel_dev->hw_device;
diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_data.h b/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_data.h
index 6bd341061de4..708e9186127b 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_data.h
+++ b/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_data.h
@@ -6,78 +6,9 @@
#include "adf_accel_devices.h"
#include "adf_cfg_common.h"
-/* Transport access */
-#define ADF_BANK_INT_SRC_SEL_MASK_0 0x4444444CUL
-#define ADF_BANK_INT_SRC_SEL_MASK_X 0x44444444UL
-#define ADF_RING_CSR_RING_CONFIG 0x000
-#define ADF_RING_CSR_RING_LBASE 0x040
-#define ADF_RING_CSR_RING_UBASE 0x080
-#define ADF_RING_CSR_RING_HEAD 0x0C0
-#define ADF_RING_CSR_RING_TAIL 0x100
-#define ADF_RING_CSR_E_STAT 0x14C
-#define ADF_RING_CSR_INT_FLAG 0x170
-#define ADF_RING_CSR_INT_SRCSEL 0x174
-#define ADF_RING_CSR_INT_SRCSEL_2 0x178
-#define ADF_RING_CSR_INT_COL_EN 0x17C
-#define ADF_RING_CSR_INT_COL_CTL 0x180
-#define ADF_RING_CSR_INT_FLAG_AND_COL 0x184
-#define ADF_RING_CSR_INT_COL_CTL_ENABLE 0x80000000
-#define ADF_RING_BUNDLE_SIZE 0x1000
#define ADF_GEN2_RX_RINGS_OFFSET 8
#define ADF_GEN2_TX_RINGS_MASK 0xFF
-#define BUILD_RING_BASE_ADDR(addr, size) \
- (((addr) >> 6) & (GENMASK_ULL(63, 0) << (size)))
-#define READ_CSR_RING_HEAD(csr_base_addr, bank, ring) \
- ADF_CSR_RD(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \
- ADF_RING_CSR_RING_HEAD + ((ring) << 2))
-#define READ_CSR_RING_TAIL(csr_base_addr, bank, ring) \
- ADF_CSR_RD(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \
- ADF_RING_CSR_RING_TAIL + ((ring) << 2))
-#define READ_CSR_E_STAT(csr_base_addr, bank) \
- ADF_CSR_RD(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \
- ADF_RING_CSR_E_STAT)
-#define WRITE_CSR_RING_CONFIG(csr_base_addr, bank, ring, value) \
- ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \
- ADF_RING_CSR_RING_CONFIG + ((ring) << 2), value)
-#define WRITE_CSR_RING_BASE(csr_base_addr, bank, ring, value) \
-do { \
- u32 l_base = 0, u_base = 0; \
- l_base = (u32)((value) & 0xFFFFFFFF); \
- u_base = (u32)(((value) & 0xFFFFFFFF00000000ULL) >> 32); \
- ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \
- ADF_RING_CSR_RING_LBASE + ((ring) << 2), l_base); \
- ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \
- ADF_RING_CSR_RING_UBASE + ((ring) << 2), u_base); \
-} while (0)
-
-#define WRITE_CSR_RING_HEAD(csr_base_addr, bank, ring, value) \
- ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \
- ADF_RING_CSR_RING_HEAD + ((ring) << 2), value)
-#define WRITE_CSR_RING_TAIL(csr_base_addr, bank, ring, value) \
- ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \
- ADF_RING_CSR_RING_TAIL + ((ring) << 2), value)
-#define WRITE_CSR_INT_FLAG(csr_base_addr, bank, value) \
- ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \
- ADF_RING_CSR_INT_FLAG, value)
-#define WRITE_CSR_INT_SRCSEL(csr_base_addr, bank) \
-do { \
- ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \
- ADF_RING_CSR_INT_SRCSEL, ADF_BANK_INT_SRC_SEL_MASK_0); \
- ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \
- ADF_RING_CSR_INT_SRCSEL_2, ADF_BANK_INT_SRC_SEL_MASK_X); \
-} while (0)
-#define WRITE_CSR_INT_COL_EN(csr_base_addr, bank, value) \
- ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \
- ADF_RING_CSR_INT_COL_EN, value)
-#define WRITE_CSR_INT_COL_CTL(csr_base_addr, bank, value) \
- ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \
- ADF_RING_CSR_INT_COL_CTL, \
- ADF_RING_CSR_INT_COL_CTL_ENABLE | (value))
-#define WRITE_CSR_INT_FLAG_AND_COL(csr_base_addr, bank, value) \
- ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \
- ADF_RING_CSR_INT_FLAG_AND_COL, value)
-
/* AE to function map */
#define AE2FUNCTION_MAP_A_OFFSET (0x3A400 + 0x190)
#define AE2FUNCTION_MAP_B_OFFSET (0x3A400 + 0x310)
@@ -106,12 +37,6 @@ do { \
#define ADF_ARB_OFFSET 0x30000
#define ADF_ARB_WRK_2_SER_MAP_OFFSET 0x180
#define ADF_ARB_CONFIG (BIT(31) | BIT(6) | BIT(0))
-#define ADF_ARB_REG_SLOT 0x1000
-#define ADF_ARB_RINGSRVARBEN_OFFSET 0x19C
-
-#define WRITE_CSR_RING_SRV_ARB_EN(csr_addr, index, value) \
- ADF_CSR_WR(csr_addr, ADF_ARB_RINGSRVARBEN_OFFSET + \
- (ADF_ARB_REG_SLOT * (index)), value)
/* Power gating */
#define ADF_POWERGATE_DC BIT(23)
@@ -158,7 +83,6 @@ u32 adf_gen2_get_num_aes(struct adf_hw_device_data *self);
void adf_gen2_enable_error_correction(struct adf_accel_dev *accel_dev);
void adf_gen2_cfg_iov_thds(struct adf_accel_dev *accel_dev, bool enable,
int num_a_regs, int num_b_regs);
-void adf_gen2_init_hw_csr_ops(struct adf_hw_csr_ops *csr_ops);
void adf_gen2_get_admin_info(struct admin_info *admin_csrs_info);
void adf_gen2_get_arb_info(struct arb_info *arb_info);
void adf_gen2_enable_ints(struct adf_accel_dev *accel_dev);
diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_csr_data.c b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_csr_data.c
new file mode 100644
index 000000000000..6609c248aaba
--- /dev/null
+++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_csr_data.c
@@ -0,0 +1,231 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2024 Intel Corporation */
+#include <linux/types.h>
+#include "adf_gen4_hw_csr_data.h"
+
+static u64 build_csr_ring_base_addr(dma_addr_t addr, u32 size)
+{
+ return BUILD_RING_BASE_ADDR(addr, size);
+}
+
+static u32 read_csr_ring_head(void __iomem *csr_base_addr, u32 bank, u32 ring)
+{
+ return READ_CSR_RING_HEAD(csr_base_addr, bank, ring);
+}
+
+static void write_csr_ring_head(void __iomem *csr_base_addr, u32 bank, u32 ring,
+ u32 value)
+{
+ WRITE_CSR_RING_HEAD(csr_base_addr, bank, ring, value);
+}
+
+static u32 read_csr_ring_tail(void __iomem *csr_base_addr, u32 bank, u32 ring)
+{
+ return READ_CSR_RING_TAIL(csr_base_addr, bank, ring);
+}
+
+static void write_csr_ring_tail(void __iomem *csr_base_addr, u32 bank, u32 ring,
+ u32 value)
+{
+ WRITE_CSR_RING_TAIL(csr_base_addr, bank, ring, value);
+}
+
+static u32 read_csr_stat(void __iomem *csr_base_addr, u32 bank)
+{
+ return READ_CSR_STAT(csr_base_addr, bank);
+}
+
+static u32 read_csr_uo_stat(void __iomem *csr_base_addr, u32 bank)
+{
+ return READ_CSR_UO_STAT(csr_base_addr, bank);
+}
+
+static u32 read_csr_e_stat(void __iomem *csr_base_addr, u32 bank)
+{
+ return READ_CSR_E_STAT(csr_base_addr, bank);
+}
+
+static u32 read_csr_ne_stat(void __iomem *csr_base_addr, u32 bank)
+{
+ return READ_CSR_NE_STAT(csr_base_addr, bank);
+}
+
+static u32 read_csr_nf_stat(void __iomem *csr_base_addr, u32 bank)
+{
+ return READ_CSR_NF_STAT(csr_base_addr, bank);
+}
+
+static u32 read_csr_f_stat(void __iomem *csr_base_addr, u32 bank)
+{
+ return READ_CSR_F_STAT(csr_base_addr, bank);
+}
+
+static u32 read_csr_c_stat(void __iomem *csr_base_addr, u32 bank)
+{
+ return READ_CSR_C_STAT(csr_base_addr, bank);
+}
+
+static u32 read_csr_exp_stat(void __iomem *csr_base_addr, u32 bank)
+{
+ return READ_CSR_EXP_STAT(csr_base_addr, bank);
+}
+
+static u32 read_csr_exp_int_en(void __iomem *csr_base_addr, u32 bank)
+{
+ return READ_CSR_EXP_INT_EN(csr_base_addr, bank);
+}
+
+static void write_csr_exp_int_en(void __iomem *csr_base_addr, u32 bank,
+ u32 value)
+{
+ WRITE_CSR_EXP_INT_EN(csr_base_addr, bank, value);
+}
+
+static u32 read_csr_ring_config(void __iomem *csr_base_addr, u32 bank,
+ u32 ring)
+{
+ return READ_CSR_RING_CONFIG(csr_base_addr, bank, ring);
+}
+
+static void write_csr_ring_config(void __iomem *csr_base_addr, u32 bank, u32 ring,
+ u32 value)
+{
+ WRITE_CSR_RING_CONFIG(csr_base_addr, bank, ring, value);
+}
+
+static dma_addr_t read_csr_ring_base(void __iomem *csr_base_addr, u32 bank,
+ u32 ring)
+{
+ return READ_CSR_RING_BASE(csr_base_addr, bank, ring);
+}
+
+static void write_csr_ring_base(void __iomem *csr_base_addr, u32 bank, u32 ring,
+ dma_addr_t addr)
+{
+ WRITE_CSR_RING_BASE(csr_base_addr, bank, ring, addr);
+}
+
+static u32 read_csr_int_en(void __iomem *csr_base_addr, u32 bank)
+{
+ return READ_CSR_INT_EN(csr_base_addr, bank);
+}
+
+static void write_csr_int_en(void __iomem *csr_base_addr, u32 bank, u32 value)
+{
+ WRITE_CSR_INT_EN(csr_base_addr, bank, value);
+}
+
+static u32 read_csr_int_flag(void __iomem *csr_base_addr, u32 bank)
+{
+ return READ_CSR_INT_FLAG(csr_base_addr, bank);
+}
+
+static void write_csr_int_flag(void __iomem *csr_base_addr, u32 bank,
+ u32 value)
+{
+ WRITE_CSR_INT_FLAG(csr_base_addr, bank, value);
+}
+
+static u32 read_csr_int_srcsel(void __iomem *csr_base_addr, u32 bank)
+{
+ return READ_CSR_INT_SRCSEL(csr_base_addr, bank);
+}
+
+static void write_csr_int_srcsel(void __iomem *csr_base_addr, u32 bank)
+{
+ WRITE_CSR_INT_SRCSEL(csr_base_addr, bank);
+}
+
+static void write_csr_int_srcsel_w_val(void __iomem *csr_base_addr, u32 bank,
+ u32 value)
+{
+ WRITE_CSR_INT_SRCSEL_W_VAL(csr_base_addr, bank, value);
+}
+
+static u32 read_csr_int_col_en(void __iomem *csr_base_addr, u32 bank)
+{
+ return READ_CSR_INT_COL_EN(csr_base_addr, bank);
+}
+
+static void write_csr_int_col_en(void __iomem *csr_base_addr, u32 bank, u32 value)
+{
+ WRITE_CSR_INT_COL_EN(csr_base_addr, bank, value);
+}
+
+static u32 read_csr_int_col_ctl(void __iomem *csr_base_addr, u32 bank)
+{
+ return READ_CSR_INT_COL_CTL(csr_base_addr, bank);
+}
+
+static void write_csr_int_col_ctl(void __iomem *csr_base_addr, u32 bank,
+ u32 value)
+{
+ WRITE_CSR_INT_COL_CTL(csr_base_addr, bank, value);
+}
+
+static u32 read_csr_int_flag_and_col(void __iomem *csr_base_addr, u32 bank)
+{
+ return READ_CSR_INT_FLAG_AND_COL(csr_base_addr, bank);
+}
+
+static void write_csr_int_flag_and_col(void __iomem *csr_base_addr, u32 bank,
+ u32 value)
+{
+ WRITE_CSR_INT_FLAG_AND_COL(csr_base_addr, bank, value);
+}
+
+static u32 read_csr_ring_srv_arb_en(void __iomem *csr_base_addr, u32 bank)
+{
+ return READ_CSR_RING_SRV_ARB_EN(csr_base_addr, bank);
+}
+
+static void write_csr_ring_srv_arb_en(void __iomem *csr_base_addr, u32 bank,
+ u32 value)
+{
+ WRITE_CSR_RING_SRV_ARB_EN(csr_base_addr, bank, value);
+}
+
+static u32 get_int_col_ctl_enable_mask(void)
+{
+ return ADF_RING_CSR_INT_COL_CTL_ENABLE;
+}
+
+void adf_gen4_init_hw_csr_ops(struct adf_hw_csr_ops *csr_ops)
+{
+ csr_ops->build_csr_ring_base_addr = build_csr_ring_base_addr;
+ csr_ops->read_csr_ring_head = read_csr_ring_head;
+ csr_ops->write_csr_ring_head = write_csr_ring_head;
+ csr_ops->read_csr_ring_tail = read_csr_ring_tail;
+ csr_ops->write_csr_ring_tail = write_csr_ring_tail;
+ csr_ops->read_csr_stat = read_csr_stat;
+ csr_ops->read_csr_uo_stat = read_csr_uo_stat;
+ csr_ops->read_csr_e_stat = read_csr_e_stat;
+ csr_ops->read_csr_ne_stat = read_csr_ne_stat;
+ csr_ops->read_csr_nf_stat = read_csr_nf_stat;
+ csr_ops->read_csr_f_stat = read_csr_f_stat;
+ csr_ops->read_csr_c_stat = read_csr_c_stat;
+ csr_ops->read_csr_exp_stat = read_csr_exp_stat;
+ csr_ops->read_csr_exp_int_en = read_csr_exp_int_en;
+ csr_ops->write_csr_exp_int_en = write_csr_exp_int_en;
+ csr_ops->read_csr_ring_config = read_csr_ring_config;
+ csr_ops->write_csr_ring_config = write_csr_ring_config;
+ csr_ops->read_csr_ring_base = read_csr_ring_base;
+ csr_ops->write_csr_ring_base = write_csr_ring_base;
+ csr_ops->read_csr_int_en = read_csr_int_en;
+ csr_ops->write_csr_int_en = write_csr_int_en;
+ csr_ops->read_csr_int_flag = read_csr_int_flag;
+ csr_ops->write_csr_int_flag = write_csr_int_flag;
+ csr_ops->read_csr_int_srcsel = read_csr_int_srcsel;
+ csr_ops->write_csr_int_srcsel = write_csr_int_srcsel;
+ csr_ops->write_csr_int_srcsel_w_val = write_csr_int_srcsel_w_val;
+ csr_ops->read_csr_int_col_en = read_csr_int_col_en;
+ csr_ops->write_csr_int_col_en = write_csr_int_col_en;
+ csr_ops->read_csr_int_col_ctl = read_csr_int_col_ctl;
+ csr_ops->write_csr_int_col_ctl = write_csr_int_col_ctl;
+ csr_ops->read_csr_int_flag_and_col = read_csr_int_flag_and_col;
+ csr_ops->write_csr_int_flag_and_col = write_csr_int_flag_and_col;
+ csr_ops->read_csr_ring_srv_arb_en = read_csr_ring_srv_arb_en;
+ csr_ops->write_csr_ring_srv_arb_en = write_csr_ring_srv_arb_en;
+ csr_ops->get_int_col_ctl_enable_mask = get_int_col_ctl_enable_mask;
+}
+EXPORT_SYMBOL_GPL(adf_gen4_init_hw_csr_ops);
diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_csr_data.h b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_csr_data.h
new file mode 100644
index 000000000000..6f33e7c87c2c
--- /dev/null
+++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_csr_data.h
@@ -0,0 +1,188 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright(c) 2024 Intel Corporation */
+#ifndef ADF_GEN4_HW_CSR_DATA_H_
+#define ADF_GEN4_HW_CSR_DATA_H_
+
+#include <linux/bitops.h>
+#include "adf_accel_devices.h"
+
+#define ADF_BANK_INT_SRC_SEL_MASK 0x44UL
+#define ADF_RING_CSR_RING_CONFIG 0x1000
+#define ADF_RING_CSR_RING_LBASE 0x1040
+#define ADF_RING_CSR_RING_UBASE 0x1080
+#define ADF_RING_CSR_RING_HEAD 0x0C0
+#define ADF_RING_CSR_RING_TAIL 0x100
+#define ADF_RING_CSR_STAT 0x140
+#define ADF_RING_CSR_UO_STAT 0x148
+#define ADF_RING_CSR_E_STAT 0x14C
+#define ADF_RING_CSR_NE_STAT 0x150
+#define ADF_RING_CSR_NF_STAT 0x154
+#define ADF_RING_CSR_F_STAT 0x158
+#define ADF_RING_CSR_C_STAT 0x15C
+#define ADF_RING_CSR_INT_FLAG_EN 0x16C
+#define ADF_RING_CSR_INT_FLAG 0x170
+#define ADF_RING_CSR_INT_SRCSEL 0x174
+#define ADF_RING_CSR_INT_COL_EN 0x17C
+#define ADF_RING_CSR_INT_COL_CTL 0x180
+#define ADF_RING_CSR_INT_FLAG_AND_COL 0x184
+#define ADF_RING_CSR_EXP_STAT 0x188
+#define ADF_RING_CSR_EXP_INT_EN 0x18C
+#define ADF_RING_CSR_INT_COL_CTL_ENABLE 0x80000000
+#define ADF_RING_CSR_ADDR_OFFSET 0x100000
+#define ADF_RING_BUNDLE_SIZE 0x2000
+#define ADF_RING_CSR_RING_SRV_ARB_EN 0x19C
+
+#define BUILD_RING_BASE_ADDR(addr, size) \
+ ((((addr) >> 6) & (GENMASK_ULL(63, 0) << (size))) << 6)
+#define READ_CSR_RING_HEAD(csr_base_addr, bank, ring) \
+ ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \
+ ADF_RING_BUNDLE_SIZE * (bank) + \
+ ADF_RING_CSR_RING_HEAD + ((ring) << 2))
+#define READ_CSR_RING_TAIL(csr_base_addr, bank, ring) \
+ ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \
+ ADF_RING_BUNDLE_SIZE * (bank) + \
+ ADF_RING_CSR_RING_TAIL + ((ring) << 2))
+#define READ_CSR_STAT(csr_base_addr, bank) \
+ ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \
+ ADF_RING_BUNDLE_SIZE * (bank) + ADF_RING_CSR_STAT)
+#define READ_CSR_UO_STAT(csr_base_addr, bank) \
+ ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \
+ ADF_RING_BUNDLE_SIZE * (bank) + ADF_RING_CSR_UO_STAT)
+#define READ_CSR_E_STAT(csr_base_addr, bank) \
+ ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \
+ ADF_RING_BUNDLE_SIZE * (bank) + ADF_RING_CSR_E_STAT)
+#define READ_CSR_NE_STAT(csr_base_addr, bank) \
+ ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \
+ ADF_RING_BUNDLE_SIZE * (bank) + ADF_RING_CSR_NE_STAT)
+#define READ_CSR_NF_STAT(csr_base_addr, bank) \
+ ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \
+ ADF_RING_BUNDLE_SIZE * (bank) + ADF_RING_CSR_NF_STAT)
+#define READ_CSR_F_STAT(csr_base_addr, bank) \
+ ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \
+ ADF_RING_BUNDLE_SIZE * (bank) + ADF_RING_CSR_F_STAT)
+#define READ_CSR_C_STAT(csr_base_addr, bank) \
+ ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \
+ ADF_RING_BUNDLE_SIZE * (bank) + ADF_RING_CSR_C_STAT)
+#define READ_CSR_EXP_STAT(csr_base_addr, bank) \
+ ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \
+ ADF_RING_BUNDLE_SIZE * (bank) + ADF_RING_CSR_EXP_STAT)
+#define READ_CSR_EXP_INT_EN(csr_base_addr, bank) \
+ ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \
+ ADF_RING_BUNDLE_SIZE * (bank) + ADF_RING_CSR_EXP_INT_EN)
+#define WRITE_CSR_EXP_INT_EN(csr_base_addr, bank, value) \
+ ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \
+ ADF_RING_BUNDLE_SIZE * (bank) + \
+ ADF_RING_CSR_EXP_INT_EN, value)
+#define READ_CSR_RING_CONFIG(csr_base_addr, bank, ring) \
+ ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \
+ ADF_RING_BUNDLE_SIZE * (bank) + \
+ ADF_RING_CSR_RING_CONFIG + ((ring) << 2))
+#define WRITE_CSR_RING_CONFIG(csr_base_addr, bank, ring, value) \
+ ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \
+ ADF_RING_BUNDLE_SIZE * (bank) + \
+ ADF_RING_CSR_RING_CONFIG + ((ring) << 2), value)
+#define WRITE_CSR_RING_BASE(csr_base_addr, bank, ring, value) \
+do { \
+ void __iomem *_csr_base_addr = csr_base_addr; \
+ u32 _bank = bank; \
+ u32 _ring = ring; \
+ dma_addr_t _value = value; \
+ u32 l_base = 0, u_base = 0; \
+ l_base = lower_32_bits(_value); \
+ u_base = upper_32_bits(_value); \
+ ADF_CSR_WR((_csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \
+ ADF_RING_BUNDLE_SIZE * (_bank) + \
+ ADF_RING_CSR_RING_LBASE + ((_ring) << 2), l_base); \
+ ADF_CSR_WR((_csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \
+ ADF_RING_BUNDLE_SIZE * (_bank) + \
+ ADF_RING_CSR_RING_UBASE + ((_ring) << 2), u_base); \
+} while (0)
+
+static inline u64 read_base(void __iomem *csr_base_addr, u32 bank, u32 ring)
+{
+ u32 l_base, u_base;
+
+ /*
+ * Use special IO wrapper for ring base as LBASE and UBASE are
+ * not physically contigious
+ */
+ l_base = ADF_CSR_RD(csr_base_addr, (ADF_RING_BUNDLE_SIZE * bank) +
+ ADF_RING_CSR_RING_LBASE + (ring << 2));
+ u_base = ADF_CSR_RD(csr_base_addr, (ADF_RING_BUNDLE_SIZE * bank) +
+ ADF_RING_CSR_RING_UBASE + (ring << 2));
+
+ return (u64)u_base << 32 | (u64)l_base;
+}
+
+#define READ_CSR_RING_BASE(csr_base_addr, bank, ring) \
+ read_base((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, (bank), (ring))
+
+#define WRITE_CSR_RING_HEAD(csr_base_addr, bank, ring, value) \
+ ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \
+ ADF_RING_BUNDLE_SIZE * (bank) + \
+ ADF_RING_CSR_RING_HEAD + ((ring) << 2), value)
+#define WRITE_CSR_RING_TAIL(csr_base_addr, bank, ring, value) \
+ ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \
+ ADF_RING_BUNDLE_SIZE * (bank) + \
+ ADF_RING_CSR_RING_TAIL + ((ring) << 2), value)
+#define READ_CSR_INT_EN(csr_base_addr, bank) \
+ ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \
+ ADF_RING_BUNDLE_SIZE * (bank) + ADF_RING_CSR_INT_FLAG_EN)
+#define WRITE_CSR_INT_EN(csr_base_addr, bank, value) \
+ ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \
+ ADF_RING_BUNDLE_SIZE * (bank) + \
+ ADF_RING_CSR_INT_FLAG_EN, (value))
+#define READ_CSR_INT_FLAG(csr_base_addr, bank) \
+ ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \
+ ADF_RING_BUNDLE_SIZE * (bank) + ADF_RING_CSR_INT_FLAG)
+#define WRITE_CSR_INT_FLAG(csr_base_addr, bank, value) \
+ ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \
+ ADF_RING_BUNDLE_SIZE * (bank) + \
+ ADF_RING_CSR_INT_FLAG, (value))
+#define READ_CSR_INT_SRCSEL(csr_base_addr, bank) \
+ ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \
+ ADF_RING_BUNDLE_SIZE * (bank) + ADF_RING_CSR_INT_SRCSEL)
+#define WRITE_CSR_INT_SRCSEL(csr_base_addr, bank) \
+ ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \
+ ADF_RING_BUNDLE_SIZE * (bank) + \
+ ADF_RING_CSR_INT_SRCSEL, ADF_BANK_INT_SRC_SEL_MASK)
+#define WRITE_CSR_INT_SRCSEL_W_VAL(csr_base_addr, bank, value) \
+ ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \
+ ADF_RING_BUNDLE_SIZE * (bank) + \
+ ADF_RING_CSR_INT_SRCSEL, (value))
+#define READ_CSR_INT_COL_EN(csr_base_addr, bank) \
+ ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \
+ ADF_RING_BUNDLE_SIZE * (bank) + ADF_RING_CSR_INT_COL_EN)
+#define WRITE_CSR_INT_COL_EN(csr_base_addr, bank, value) \
+ ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \
+ ADF_RING_BUNDLE_SIZE * (bank) + \
+ ADF_RING_CSR_INT_COL_EN, (value))
+#define READ_CSR_INT_COL_CTL(csr_base_addr, bank) \
+ ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \
+ ADF_RING_BUNDLE_SIZE * (bank) + ADF_RING_CSR_INT_COL_CTL)
+#define WRITE_CSR_INT_COL_CTL(csr_base_addr, bank, value) \
+ ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \
+ ADF_RING_BUNDLE_SIZE * (bank) + \
+ ADF_RING_CSR_INT_COL_CTL, \
+ ADF_RING_CSR_INT_COL_CTL_ENABLE | (value))
+#define READ_CSR_INT_FLAG_AND_COL(csr_base_addr, bank) \
+ ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \
+ ADF_RING_BUNDLE_SIZE * (bank) + \
+ ADF_RING_CSR_INT_FLAG_AND_COL)
+#define WRITE_CSR_INT_FLAG_AND_COL(csr_base_addr, bank, value) \
+ ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \
+ ADF_RING_BUNDLE_SIZE * (bank) + \
+ ADF_RING_CSR_INT_FLAG_AND_COL, (value))
+
+#define READ_CSR_RING_SRV_ARB_EN(csr_base_addr, bank) \
+ ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \
+ ADF_RING_BUNDLE_SIZE * (bank) + \
+ ADF_RING_CSR_RING_SRV_ARB_EN)
+#define WRITE_CSR_RING_SRV_ARB_EN(csr_base_addr, bank, value) \
+ ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \
+ ADF_RING_BUNDLE_SIZE * (bank) + \
+ ADF_RING_CSR_RING_SRV_ARB_EN, (value))
+
+void adf_gen4_init_hw_csr_ops(struct adf_hw_csr_ops *csr_ops);
+
+#endif
diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c
index 9985683056d5..41a0979e68c1 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c
@@ -1,109 +1,14 @@
// SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only)
/* Copyright(c) 2020 Intel Corporation */
#include <linux/iopoll.h>
+#include <asm/div64.h>
#include "adf_accel_devices.h"
#include "adf_cfg_services.h"
#include "adf_common_drv.h"
+#include "adf_fw_config.h"
#include "adf_gen4_hw_data.h"
#include "adf_gen4_pm.h"
-static u64 build_csr_ring_base_addr(dma_addr_t addr, u32 size)
-{
- return BUILD_RING_BASE_ADDR(addr, size);
-}
-
-static u32 read_csr_ring_head(void __iomem *csr_base_addr, u32 bank, u32 ring)
-{
- return READ_CSR_RING_HEAD(csr_base_addr, bank, ring);
-}
-
-static void write_csr_ring_head(void __iomem *csr_base_addr, u32 bank, u32 ring,
- u32 value)
-{
- WRITE_CSR_RING_HEAD(csr_base_addr, bank, ring, value);
-}
-
-static u32 read_csr_ring_tail(void __iomem *csr_base_addr, u32 bank, u32 ring)
-{
- return READ_CSR_RING_TAIL(csr_base_addr, bank, ring);
-}
-
-static void write_csr_ring_tail(void __iomem *csr_base_addr, u32 bank, u32 ring,
- u32 value)
-{
- WRITE_CSR_RING_TAIL(csr_base_addr, bank, ring, value);
-}
-
-static u32 read_csr_e_stat(void __iomem *csr_base_addr, u32 bank)
-{
- return READ_CSR_E_STAT(csr_base_addr, bank);
-}
-
-static void write_csr_ring_config(void __iomem *csr_base_addr, u32 bank, u32 ring,
- u32 value)
-{
- WRITE_CSR_RING_CONFIG(csr_base_addr, bank, ring, value);
-}
-
-static void write_csr_ring_base(void __iomem *csr_base_addr, u32 bank, u32 ring,
- dma_addr_t addr)
-{
- WRITE_CSR_RING_BASE(csr_base_addr, bank, ring, addr);
-}
-
-static void write_csr_int_flag(void __iomem *csr_base_addr, u32 bank,
- u32 value)
-{
- WRITE_CSR_INT_FLAG(csr_base_addr, bank, value);
-}
-
-static void write_csr_int_srcsel(void __iomem *csr_base_addr, u32 bank)
-{
- WRITE_CSR_INT_SRCSEL(csr_base_addr, bank);
-}
-
-static void write_csr_int_col_en(void __iomem *csr_base_addr, u32 bank, u32 value)
-{
- WRITE_CSR_INT_COL_EN(csr_base_addr, bank, value);
-}
-
-static void write_csr_int_col_ctl(void __iomem *csr_base_addr, u32 bank,
- u32 value)
-{
- WRITE_CSR_INT_COL_CTL(csr_base_addr, bank, value);
-}
-
-static void write_csr_int_flag_and_col(void __iomem *csr_base_addr, u32 bank,
- u32 value)
-{
- WRITE_CSR_INT_FLAG_AND_COL(csr_base_addr, bank, value);
-}
-
-static void write_csr_ring_srv_arb_en(void __iomem *csr_base_addr, u32 bank,
- u32 value)
-{
- WRITE_CSR_RING_SRV_ARB_EN(csr_base_addr, bank, value);
-}
-
-void adf_gen4_init_hw_csr_ops(struct adf_hw_csr_ops *csr_ops)
-{
- csr_ops->build_csr_ring_base_addr = build_csr_ring_base_addr;
- csr_ops->read_csr_ring_head = read_csr_ring_head;
- csr_ops->write_csr_ring_head = write_csr_ring_head;
- csr_ops->read_csr_ring_tail = read_csr_ring_tail;
- csr_ops->write_csr_ring_tail = write_csr_ring_tail;
- csr_ops->read_csr_e_stat = read_csr_e_stat;
- csr_ops->write_csr_ring_config = write_csr_ring_config;
- csr_ops->write_csr_ring_base = write_csr_ring_base;
- csr_ops->write_csr_int_flag = write_csr_int_flag;
- csr_ops->write_csr_int_srcsel = write_csr_int_srcsel;
- csr_ops->write_csr_int_col_en = write_csr_int_col_en;
- csr_ops->write_csr_int_col_ctl = write_csr_int_col_ctl;
- csr_ops->write_csr_int_flag_and_col = write_csr_int_flag_and_col;
- csr_ops->write_csr_ring_srv_arb_en = write_csr_ring_srv_arb_en;
-}
-EXPORT_SYMBOL_GPL(adf_gen4_init_hw_csr_ops);
-
u32 adf_gen4_get_accel_mask(struct adf_hw_device_data *self)
{
return ADF_GEN4_ACCELERATORS_MASK;
@@ -320,8 +225,7 @@ static int reset_ring_pair(void __iomem *csr, u32 bank_number)
int adf_gen4_ring_pair_reset(struct adf_accel_dev *accel_dev, u32 bank_number)
{
struct adf_hw_device_data *hw_data = accel_dev->hw_device;
- u32 etr_bar_id = hw_data->get_etr_bar_id(hw_data);
- void __iomem *csr;
+ void __iomem *csr = adf_get_etr_base(accel_dev);
int ret;
if (bank_number >= hw_data->num_banks)
@@ -330,7 +234,6 @@ int adf_gen4_ring_pair_reset(struct adf_accel_dev *accel_dev, u32 bank_number)
dev_dbg(&GET_DEV(accel_dev),
"ring pair reset for bank:%d\n", bank_number);
- csr = (&GET_BARS(accel_dev)[etr_bar_id])->virt_addr;
ret = reset_ring_pair(csr, bank_number);
if (ret)
dev_err(&GET_DEV(accel_dev),
@@ -398,6 +301,9 @@ int adf_gen4_init_thd2arb_map(struct adf_accel_dev *accel_dev)
ADF_GEN4_ADMIN_ACCELENGINES;
if (srv_id == SVC_DCC) {
+ if (ae_cnt > ICP_QAT_HW_AE_DELIMITER)
+ return -EINVAL;
+
memcpy(thd2arb_map, thrd_to_arb_map_dcc,
array_size(sizeof(*thd2arb_map), ae_cnt));
return 0;
@@ -430,3 +336,336 @@ int adf_gen4_init_thd2arb_map(struct adf_accel_dev *accel_dev)
return 0;
}
EXPORT_SYMBOL_GPL(adf_gen4_init_thd2arb_map);
+
+u16 adf_gen4_get_ring_to_svc_map(struct adf_accel_dev *accel_dev)
+{
+ struct adf_hw_device_data *hw_data = GET_HW_DATA(accel_dev);
+ enum adf_cfg_service_type rps[RP_GROUP_COUNT] = { };
+ unsigned int ae_mask, start_id, worker_obj_cnt, i;
+ u16 ring_to_svc_map;
+ int rp_group;
+
+ if (!hw_data->get_rp_group || !hw_data->uof_get_ae_mask ||
+ !hw_data->uof_get_obj_type || !hw_data->uof_get_num_objs)
+ return 0;
+
+ /* If dcc, all rings handle compression requests */
+ if (adf_get_service_enabled(accel_dev) == SVC_DCC) {
+ for (i = 0; i < RP_GROUP_COUNT; i++)
+ rps[i] = COMP;
+ goto set_mask;
+ }
+
+ worker_obj_cnt = hw_data->uof_get_num_objs(accel_dev) -
+ ADF_GEN4_ADMIN_ACCELENGINES;
+ start_id = worker_obj_cnt - RP_GROUP_COUNT;
+
+ for (i = start_id; i < worker_obj_cnt; i++) {
+ ae_mask = hw_data->uof_get_ae_mask(accel_dev, i);
+ rp_group = hw_data->get_rp_group(accel_dev, ae_mask);
+ if (rp_group >= RP_GROUP_COUNT || rp_group < RP_GROUP_0)
+ return 0;
+
+ switch (hw_data->uof_get_obj_type(accel_dev, i)) {
+ case ADF_FW_SYM_OBJ:
+ rps[rp_group] = SYM;
+ break;
+ case ADF_FW_ASYM_OBJ:
+ rps[rp_group] = ASYM;
+ break;
+ case ADF_FW_DC_OBJ:
+ rps[rp_group] = COMP;
+ break;
+ default:
+ rps[rp_group] = 0;
+ break;
+ }
+ }
+
+set_mask:
+ ring_to_svc_map = rps[RP_GROUP_0] << ADF_CFG_SERV_RING_PAIR_0_SHIFT |
+ rps[RP_GROUP_1] << ADF_CFG_SERV_RING_PAIR_1_SHIFT |
+ rps[RP_GROUP_0] << ADF_CFG_SERV_RING_PAIR_2_SHIFT |
+ rps[RP_GROUP_1] << ADF_CFG_SERV_RING_PAIR_3_SHIFT;
+
+ return ring_to_svc_map;
+}
+EXPORT_SYMBOL_GPL(adf_gen4_get_ring_to_svc_map);
+
+/*
+ * adf_gen4_bank_quiesce_coal_timer() - quiesce bank coalesced interrupt timer
+ * @accel_dev: Pointer to the device structure
+ * @bank_idx: Offset to the bank within this device
+ * @timeout_ms: Timeout in milliseconds for the operation
+ *
+ * This function tries to quiesce the coalesced interrupt timer of a bank if
+ * it has been enabled and triggered.
+ *
+ * Returns 0 on success, error code otherwise
+ *
+ */
+int adf_gen4_bank_quiesce_coal_timer(struct adf_accel_dev *accel_dev,
+ u32 bank_idx, int timeout_ms)
+{
+ struct adf_hw_device_data *hw_data = GET_HW_DATA(accel_dev);
+ struct adf_hw_csr_ops *csr_ops = GET_CSR_OPS(accel_dev);
+ void __iomem *csr_misc = adf_get_pmisc_base(accel_dev);
+ void __iomem *csr_etr = adf_get_etr_base(accel_dev);
+ u32 int_col_ctl, int_col_mask, int_col_en;
+ u32 e_stat, intsrc;
+ u64 wait_us;
+ int ret;
+
+ if (timeout_ms < 0)
+ return -EINVAL;
+
+ int_col_ctl = csr_ops->read_csr_int_col_ctl(csr_etr, bank_idx);
+ int_col_mask = csr_ops->get_int_col_ctl_enable_mask();
+ if (!(int_col_ctl & int_col_mask))
+ return 0;
+
+ int_col_en = csr_ops->read_csr_int_col_en(csr_etr, bank_idx);
+ int_col_en &= BIT(ADF_WQM_CSR_RP_IDX_RX);
+
+ e_stat = csr_ops->read_csr_e_stat(csr_etr, bank_idx);
+ if (!(~e_stat & int_col_en))
+ return 0;
+
+ wait_us = 2 * ((int_col_ctl & ~int_col_mask) << 8) * USEC_PER_SEC;
+ do_div(wait_us, hw_data->clock_frequency);
+ wait_us = min(wait_us, (u64)timeout_ms * USEC_PER_MSEC);
+ dev_dbg(&GET_DEV(accel_dev),
+ "wait for bank %d - coalesced timer expires in %llu us (max=%u ms estat=0x%x intcolen=0x%x)\n",
+ bank_idx, wait_us, timeout_ms, e_stat, int_col_en);
+
+ ret = read_poll_timeout(ADF_CSR_RD, intsrc, intsrc,
+ ADF_COALESCED_POLL_DELAY_US, wait_us, true,
+ csr_misc, ADF_WQM_CSR_RPINTSOU(bank_idx));
+ if (ret)
+ dev_warn(&GET_DEV(accel_dev),
+ "coalesced timer for bank %d expired (%llu us)\n",
+ bank_idx, wait_us);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(adf_gen4_bank_quiesce_coal_timer);
+
+static int drain_bank(void __iomem *csr, u32 bank_number, int timeout_us)
+{
+ u32 status;
+
+ ADF_CSR_WR(csr, ADF_WQM_CSR_RPRESETCTL(bank_number),
+ ADF_WQM_CSR_RPRESETCTL_DRAIN);
+
+ return read_poll_timeout(ADF_CSR_RD, status,
+ status & ADF_WQM_CSR_RPRESETSTS_STATUS,
+ ADF_RPRESET_POLL_DELAY_US, timeout_us, true,
+ csr, ADF_WQM_CSR_RPRESETSTS(bank_number));
+}
+
+void adf_gen4_bank_drain_finish(struct adf_accel_dev *accel_dev,
+ u32 bank_number)
+{
+ void __iomem *csr = adf_get_etr_base(accel_dev);
+
+ ADF_CSR_WR(csr, ADF_WQM_CSR_RPRESETSTS(bank_number),
+ ADF_WQM_CSR_RPRESETSTS_STATUS);
+}
+
+int adf_gen4_bank_drain_start(struct adf_accel_dev *accel_dev,
+ u32 bank_number, int timeout_us)
+{
+ void __iomem *csr = adf_get_etr_base(accel_dev);
+ int ret;
+
+ dev_dbg(&GET_DEV(accel_dev), "Drain bank %d\n", bank_number);
+
+ ret = drain_bank(csr, bank_number, timeout_us);
+ if (ret)
+ dev_err(&GET_DEV(accel_dev), "Bank drain failed (timeout)\n");
+ else
+ dev_dbg(&GET_DEV(accel_dev), "Bank drain successful\n");
+
+ return ret;
+}
+
+static void bank_state_save(struct adf_hw_csr_ops *ops, void __iomem *base,
+ u32 bank, struct bank_state *state, u32 num_rings)
+{
+ u32 i;
+
+ state->ringstat0 = ops->read_csr_stat(base, bank);
+ state->ringuostat = ops->read_csr_uo_stat(base, bank);
+ state->ringestat = ops->read_csr_e_stat(base, bank);
+ state->ringnestat = ops->read_csr_ne_stat(base, bank);
+ state->ringnfstat = ops->read_csr_nf_stat(base, bank);
+ state->ringfstat = ops->read_csr_f_stat(base, bank);
+ state->ringcstat0 = ops->read_csr_c_stat(base, bank);
+ state->iaintflagen = ops->read_csr_int_en(base, bank);
+ state->iaintflagreg = ops->read_csr_int_flag(base, bank);
+ state->iaintflagsrcsel0 = ops->read_csr_int_srcsel(base, bank);
+ state->iaintcolen = ops->read_csr_int_col_en(base, bank);
+ state->iaintcolctl = ops->read_csr_int_col_ctl(base, bank);
+ state->iaintflagandcolen = ops->read_csr_int_flag_and_col(base, bank);
+ state->ringexpstat = ops->read_csr_exp_stat(base, bank);
+ state->ringexpintenable = ops->read_csr_exp_int_en(base, bank);
+ state->ringsrvarben = ops->read_csr_ring_srv_arb_en(base, bank);
+
+ for (i = 0; i < num_rings; i++) {
+ state->rings[i].head = ops->read_csr_ring_head(base, bank, i);
+ state->rings[i].tail = ops->read_csr_ring_tail(base, bank, i);
+ state->rings[i].config = ops->read_csr_ring_config(base, bank, i);
+ state->rings[i].base = ops->read_csr_ring_base(base, bank, i);
+ }
+}
+
+#define CHECK_STAT(op, expect_val, name, args...) \
+({ \
+ u32 __expect_val = (expect_val); \
+ u32 actual_val = op(args); \
+ (__expect_val == actual_val) ? 0 : \
+ (pr_err("QAT: Fail to restore %s register. Expected 0x%x, actual 0x%x\n", \
+ name, __expect_val, actual_val), -EINVAL); \
+})
+
+static int bank_state_restore(struct adf_hw_csr_ops *ops, void __iomem *base,
+ u32 bank, struct bank_state *state, u32 num_rings,
+ int tx_rx_gap)
+{
+ u32 val, tmp_val, i;
+ int ret;
+
+ for (i = 0; i < num_rings; i++)
+ ops->write_csr_ring_base(base, bank, i, state->rings[i].base);
+
+ for (i = 0; i < num_rings; i++)
+ ops->write_csr_ring_config(base, bank, i, state->rings[i].config);
+
+ for (i = 0; i < num_rings / 2; i++) {
+ int tx = i * (tx_rx_gap + 1);
+ int rx = tx + tx_rx_gap;
+
+ ops->write_csr_ring_head(base, bank, tx, state->rings[tx].head);
+ ops->write_csr_ring_tail(base, bank, tx, state->rings[tx].tail);
+
+ /*
+ * The TX ring head needs to be updated again to make sure that
+ * the HW will not consider the ring as full when it is empty
+ * and the correct state flags are set to match the recovered state.
+ */
+ if (state->ringestat & BIT(tx)) {
+ val = ops->read_csr_int_srcsel(base, bank);
+ val |= ADF_RP_INT_SRC_SEL_F_RISE_MASK;
+ ops->write_csr_int_srcsel_w_val(base, bank, val);
+ ops->write_csr_ring_head(base, bank, tx, state->rings[tx].head);
+ }
+
+ ops->write_csr_ring_tail(base, bank, rx, state->rings[rx].tail);
+ val = ops->read_csr_int_srcsel(base, bank);
+ val |= ADF_RP_INT_SRC_SEL_F_RISE_MASK << ADF_RP_INT_SRC_SEL_RANGE_WIDTH;
+ ops->write_csr_int_srcsel_w_val(base, bank, val);
+
+ ops->write_csr_ring_head(base, bank, rx, state->rings[rx].head);
+ val = ops->read_csr_int_srcsel(base, bank);
+ val |= ADF_RP_INT_SRC_SEL_F_FALL_MASK << ADF_RP_INT_SRC_SEL_RANGE_WIDTH;
+ ops->write_csr_int_srcsel_w_val(base, bank, val);
+
+ /*
+ * The RX ring tail needs to be updated again to make sure that
+ * the HW will not consider the ring as empty when it is full
+ * and the correct state flags are set to match the recovered state.
+ */
+ if (state->ringfstat & BIT(rx))
+ ops->write_csr_ring_tail(base, bank, rx, state->rings[rx].tail);
+ }
+
+ ops->write_csr_int_flag_and_col(base, bank, state->iaintflagandcolen);
+ ops->write_csr_int_en(base, bank, state->iaintflagen);
+ ops->write_csr_int_col_en(base, bank, state->iaintcolen);
+ ops->write_csr_int_srcsel_w_val(base, bank, state->iaintflagsrcsel0);
+ ops->write_csr_exp_int_en(base, bank, state->ringexpintenable);
+ ops->write_csr_int_col_ctl(base, bank, state->iaintcolctl);
+ ops->write_csr_ring_srv_arb_en(base, bank, state->ringsrvarben);
+
+ /* Check that all ring statuses match the saved state. */
+ ret = CHECK_STAT(ops->read_csr_stat, state->ringstat0, "ringstat",
+ base, bank);
+ if (ret)
+ return ret;
+
+ ret = CHECK_STAT(ops->read_csr_e_stat, state->ringestat, "ringestat",
+ base, bank);
+ if (ret)
+ return ret;
+
+ ret = CHECK_STAT(ops->read_csr_ne_stat, state->ringnestat, "ringnestat",
+ base, bank);
+ if (ret)
+ return ret;
+
+ ret = CHECK_STAT(ops->read_csr_nf_stat, state->ringnfstat, "ringnfstat",
+ base, bank);
+ if (ret)
+ return ret;
+
+ ret = CHECK_STAT(ops->read_csr_f_stat, state->ringfstat, "ringfstat",
+ base, bank);
+ if (ret)
+ return ret;
+
+ ret = CHECK_STAT(ops->read_csr_c_stat, state->ringcstat0, "ringcstat",
+ base, bank);
+ if (ret)
+ return ret;
+
+ tmp_val = ops->read_csr_exp_stat(base, bank);
+ val = state->ringexpstat;
+ if (tmp_val && !val) {
+ pr_err("QAT: Bank was restored with exception: 0x%x\n", val);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int adf_gen4_bank_state_save(struct adf_accel_dev *accel_dev, u32 bank_number,
+ struct bank_state *state)
+{
+ struct adf_hw_device_data *hw_data = GET_HW_DATA(accel_dev);
+ struct adf_hw_csr_ops *csr_ops = GET_CSR_OPS(accel_dev);
+ void __iomem *csr_base = adf_get_etr_base(accel_dev);
+
+ if (bank_number >= hw_data->num_banks || !state)
+ return -EINVAL;
+
+ dev_dbg(&GET_DEV(accel_dev), "Saving state of bank %d\n", bank_number);
+
+ bank_state_save(csr_ops, csr_base, bank_number, state,
+ hw_data->num_rings_per_bank);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(adf_gen4_bank_state_save);
+
+int adf_gen4_bank_state_restore(struct adf_accel_dev *accel_dev, u32 bank_number,
+ struct bank_state *state)
+{
+ struct adf_hw_device_data *hw_data = GET_HW_DATA(accel_dev);
+ struct adf_hw_csr_ops *csr_ops = GET_CSR_OPS(accel_dev);
+ void __iomem *csr_base = adf_get_etr_base(accel_dev);
+ int ret;
+
+ if (bank_number >= hw_data->num_banks || !state)
+ return -EINVAL;
+
+ dev_dbg(&GET_DEV(accel_dev), "Restoring state of bank %d\n", bank_number);
+
+ ret = bank_state_restore(csr_ops, csr_base, bank_number, state,
+ hw_data->num_rings_per_bank, hw_data->tx_rx_gap);
+ if (ret)
+ dev_err(&GET_DEV(accel_dev),
+ "Unable to restore state of bank %d\n", bank_number);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(adf_gen4_bank_state_restore);
diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.h b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.h
index 7d8a774cadc8..8b10926cedba 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.h
+++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only) */
/* Copyright(c) 2020 Intel Corporation */
-#ifndef ADF_GEN4_HW_CSR_DATA_H_
-#define ADF_GEN4_HW_CSR_DATA_H_
+#ifndef ADF_GEN4_HW_DATA_H_
+#define ADF_GEN4_HW_DATA_H_
#include <linux/units.h>
@@ -54,95 +54,6 @@
#define ADF_GEN4_ADMINMSGLR_OFFSET 0x500578
#define ADF_GEN4_MAILBOX_BASE_OFFSET 0x600970
-/* Transport access */
-#define ADF_BANK_INT_SRC_SEL_MASK 0x44UL
-#define ADF_RING_CSR_RING_CONFIG 0x1000
-#define ADF_RING_CSR_RING_LBASE 0x1040
-#define ADF_RING_CSR_RING_UBASE 0x1080
-#define ADF_RING_CSR_RING_HEAD 0x0C0
-#define ADF_RING_CSR_RING_TAIL 0x100
-#define ADF_RING_CSR_E_STAT 0x14C
-#define ADF_RING_CSR_INT_FLAG 0x170
-#define ADF_RING_CSR_INT_SRCSEL 0x174
-#define ADF_RING_CSR_INT_COL_CTL 0x180
-#define ADF_RING_CSR_INT_FLAG_AND_COL 0x184
-#define ADF_RING_CSR_INT_COL_CTL_ENABLE 0x80000000
-#define ADF_RING_CSR_INT_COL_EN 0x17C
-#define ADF_RING_CSR_ADDR_OFFSET 0x100000
-#define ADF_RING_BUNDLE_SIZE 0x2000
-
-#define BUILD_RING_BASE_ADDR(addr, size) \
- ((((addr) >> 6) & (GENMASK_ULL(63, 0) << (size))) << 6)
-#define READ_CSR_RING_HEAD(csr_base_addr, bank, ring) \
- ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \
- ADF_RING_BUNDLE_SIZE * (bank) + \
- ADF_RING_CSR_RING_HEAD + ((ring) << 2))
-#define READ_CSR_RING_TAIL(csr_base_addr, bank, ring) \
- ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \
- ADF_RING_BUNDLE_SIZE * (bank) + \
- ADF_RING_CSR_RING_TAIL + ((ring) << 2))
-#define READ_CSR_E_STAT(csr_base_addr, bank) \
- ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \
- ADF_RING_BUNDLE_SIZE * (bank) + ADF_RING_CSR_E_STAT)
-#define WRITE_CSR_RING_CONFIG(csr_base_addr, bank, ring, value) \
- ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \
- ADF_RING_BUNDLE_SIZE * (bank) + \
- ADF_RING_CSR_RING_CONFIG + ((ring) << 2), value)
-#define WRITE_CSR_RING_BASE(csr_base_addr, bank, ring, value) \
-do { \
- void __iomem *_csr_base_addr = csr_base_addr; \
- u32 _bank = bank; \
- u32 _ring = ring; \
- dma_addr_t _value = value; \
- u32 l_base = 0, u_base = 0; \
- l_base = lower_32_bits(_value); \
- u_base = upper_32_bits(_value); \
- ADF_CSR_WR((_csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \
- ADF_RING_BUNDLE_SIZE * (_bank) + \
- ADF_RING_CSR_RING_LBASE + ((_ring) << 2), l_base); \
- ADF_CSR_WR((_csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \
- ADF_RING_BUNDLE_SIZE * (_bank) + \
- ADF_RING_CSR_RING_UBASE + ((_ring) << 2), u_base); \
-} while (0)
-
-#define WRITE_CSR_RING_HEAD(csr_base_addr, bank, ring, value) \
- ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \
- ADF_RING_BUNDLE_SIZE * (bank) + \
- ADF_RING_CSR_RING_HEAD + ((ring) << 2), value)
-#define WRITE_CSR_RING_TAIL(csr_base_addr, bank, ring, value) \
- ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \
- ADF_RING_BUNDLE_SIZE * (bank) + \
- ADF_RING_CSR_RING_TAIL + ((ring) << 2), value)
-#define WRITE_CSR_INT_FLAG(csr_base_addr, bank, value) \
- ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \
- ADF_RING_BUNDLE_SIZE * (bank) + \
- ADF_RING_CSR_INT_FLAG, (value))
-#define WRITE_CSR_INT_SRCSEL(csr_base_addr, bank) \
- ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \
- ADF_RING_BUNDLE_SIZE * (bank) + \
- ADF_RING_CSR_INT_SRCSEL, ADF_BANK_INT_SRC_SEL_MASK)
-#define WRITE_CSR_INT_COL_EN(csr_base_addr, bank, value) \
- ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \
- ADF_RING_BUNDLE_SIZE * (bank) + \
- ADF_RING_CSR_INT_COL_EN, (value))
-#define WRITE_CSR_INT_COL_CTL(csr_base_addr, bank, value) \
- ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \
- ADF_RING_BUNDLE_SIZE * (bank) + \
- ADF_RING_CSR_INT_COL_CTL, \
- ADF_RING_CSR_INT_COL_CTL_ENABLE | (value))
-#define WRITE_CSR_INT_FLAG_AND_COL(csr_base_addr, bank, value) \
- ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \
- ADF_RING_BUNDLE_SIZE * (bank) + \
- ADF_RING_CSR_INT_FLAG_AND_COL, (value))
-
-/* Arbiter configuration */
-#define ADF_RING_CSR_RING_SRV_ARB_EN 0x19C
-
-#define WRITE_CSR_RING_SRV_ARB_EN(csr_base_addr, bank, value) \
- ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \
- ADF_RING_BUNDLE_SIZE * (bank) + \
- ADF_RING_CSR_RING_SRV_ARB_EN, (value))
-
/* Default ring mapping */
#define ADF_GEN4_DEFAULT_RING_TO_SRV_MAP \
(ASYM << ADF_CFG_SERV_RING_PAIR_0_SHIFT | \
@@ -166,10 +77,20 @@ do { \
#define ADF_RPRESET_POLL_TIMEOUT_US (5 * USEC_PER_SEC)
#define ADF_RPRESET_POLL_DELAY_US 20
#define ADF_WQM_CSR_RPRESETCTL_RESET BIT(0)
+#define ADF_WQM_CSR_RPRESETCTL_DRAIN BIT(2)
#define ADF_WQM_CSR_RPRESETCTL(bank) (0x6000 + ((bank) << 3))
#define ADF_WQM_CSR_RPRESETSTS_STATUS BIT(0)
#define ADF_WQM_CSR_RPRESETSTS(bank) (ADF_WQM_CSR_RPRESETCTL(bank) + 4)
+/* Ring interrupt */
+#define ADF_RP_INT_SRC_SEL_F_RISE_MASK BIT(2)
+#define ADF_RP_INT_SRC_SEL_F_FALL_MASK GENMASK(2, 0)
+#define ADF_RP_INT_SRC_SEL_RANGE_WIDTH 4
+#define ADF_COALESCED_POLL_TIMEOUT_US (1 * USEC_PER_SEC)
+#define ADF_COALESCED_POLL_DELAY_US 1000
+#define ADF_WQM_CSR_RPINTSOU(bank) (0x200000 + ((bank) << 12))
+#define ADF_WQM_CSR_RP_IDX_RX 1
+
/* Error source registers */
#define ADF_GEN4_ERRSOU0 (0x41A200)
#define ADF_GEN4_ERRSOU1 (0x41A204)
@@ -197,6 +118,19 @@ do { \
/* Arbiter threads mask with error value */
#define ADF_GEN4_ENA_THD_MASK_ERROR GENMASK(ADF_NUM_THREADS_PER_AE, 0)
+/* PF2VM communication channel */
+#define ADF_GEN4_PF2VM_OFFSET(i) (0x40B010 + (i) * 0x20)
+#define ADF_GEN4_VM2PF_OFFSET(i) (0x40B014 + (i) * 0x20)
+#define ADF_GEN4_VINTMSKPF2VM_OFFSET(i) (0x40B00C + (i) * 0x20)
+#define ADF_GEN4_VINTSOUPF2VM_OFFSET(i) (0x40B008 + (i) * 0x20)
+#define ADF_GEN4_VINTMSK_OFFSET(i) (0x40B004 + (i) * 0x20)
+#define ADF_GEN4_VINTSOU_OFFSET(i) (0x40B000 + (i) * 0x20)
+
+struct adf_gen4_vfmig {
+ struct adf_mstate_mgr *mstate_mgr;
+ bool bank_stopped[ADF_GEN4_NUM_BANKS_PER_VF];
+};
+
void adf_gen4_set_ssm_wdtimer(struct adf_accel_dev *accel_dev);
enum icp_qat_gen4_slice_mask {
@@ -230,10 +164,20 @@ u32 adf_gen4_get_num_aes(struct adf_hw_device_data *self);
enum dev_sku_info adf_gen4_get_sku(struct adf_hw_device_data *self);
u32 adf_gen4_get_sram_bar_id(struct adf_hw_device_data *self);
int adf_gen4_init_device(struct adf_accel_dev *accel_dev);
-void adf_gen4_init_hw_csr_ops(struct adf_hw_csr_ops *csr_ops);
int adf_gen4_ring_pair_reset(struct adf_accel_dev *accel_dev, u32 bank_number);
void adf_gen4_set_msix_default_rttable(struct adf_accel_dev *accel_dev);
void adf_gen4_set_ssm_wdtimer(struct adf_accel_dev *accel_dev);
int adf_gen4_init_thd2arb_map(struct adf_accel_dev *accel_dev);
+u16 adf_gen4_get_ring_to_svc_map(struct adf_accel_dev *accel_dev);
+int adf_gen4_bank_quiesce_coal_timer(struct adf_accel_dev *accel_dev,
+ u32 bank_idx, int timeout_ms);
+int adf_gen4_bank_drain_start(struct adf_accel_dev *accel_dev,
+ u32 bank_number, int timeout_us);
+void adf_gen4_bank_drain_finish(struct adf_accel_dev *accel_dev,
+ u32 bank_number);
+int adf_gen4_bank_state_save(struct adf_accel_dev *accel_dev, u32 bank_number,
+ struct bank_state *state);
+int adf_gen4_bank_state_restore(struct adf_accel_dev *accel_dev,
+ u32 bank_number, struct bank_state *state);
#endif
diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_pfvf.c b/drivers/crypto/intel/qat/qat_common/adf_gen4_pfvf.c
index 8e8efe93f3ee..21474d402d09 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_gen4_pfvf.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_pfvf.c
@@ -6,12 +6,10 @@
#include "adf_accel_devices.h"
#include "adf_common_drv.h"
#include "adf_gen4_pfvf.h"
+#include "adf_gen4_hw_data.h"
#include "adf_pfvf_pf_proto.h"
#include "adf_pfvf_utils.h"
-#define ADF_4XXX_PF2VM_OFFSET(i) (0x40B010 + ((i) * 0x20))
-#define ADF_4XXX_VM2PF_OFFSET(i) (0x40B014 + ((i) * 0x20))
-
/* VF2PF interrupt source registers */
#define ADF_4XXX_VM2PF_SOU 0x41A180
#define ADF_4XXX_VM2PF_MSK 0x41A1C0
@@ -29,12 +27,12 @@ static const struct pfvf_csr_format csr_gen4_fmt = {
static u32 adf_gen4_pf_get_pf2vf_offset(u32 i)
{
- return ADF_4XXX_PF2VM_OFFSET(i);
+ return ADF_GEN4_PF2VM_OFFSET(i);
}
static u32 adf_gen4_pf_get_vf2pf_offset(u32 i)
{
- return ADF_4XXX_VM2PF_OFFSET(i);
+ return ADF_GEN4_VM2PF_OFFSET(i);
}
static void adf_gen4_enable_vf2pf_interrupts(void __iomem *pmisc_addr, u32 vf_mask)
diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_ras.c b/drivers/crypto/intel/qat/qat_common/adf_gen4_ras.c
index 048c24607939..2dd3772bf58a 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_gen4_ras.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_ras.c
@@ -1007,8 +1007,7 @@ static bool adf_handle_spppar_err(struct adf_accel_dev *accel_dev,
static bool adf_handle_ssmcpppar_err(struct adf_accel_dev *accel_dev,
void __iomem *csr, u32 iastatssm)
{
- u32 reg = ADF_CSR_RD(csr, ADF_GEN4_SSMCPPERR);
- u32 bits_num = BITS_PER_REG(reg);
+ u32 reg, bits_num = BITS_PER_REG(reg);
bool reset_required = false;
unsigned long errs_bits;
u32 bit_iterator;
@@ -1106,8 +1105,7 @@ static bool adf_handle_rf_parr_err(struct adf_accel_dev *accel_dev,
static bool adf_handle_ser_err_ssmsh(struct adf_accel_dev *accel_dev,
void __iomem *csr, u32 iastatssm)
{
- u32 reg = ADF_CSR_RD(csr, ADF_GEN4_SER_ERR_SSMSH);
- u32 bits_num = BITS_PER_REG(reg);
+ u32 reg, bits_num = BITS_PER_REG(reg);
bool reset_required = false;
unsigned long errs_bits;
u32 bit_iterator;
diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_tl.c b/drivers/crypto/intel/qat/qat_common/adf_gen4_tl.c
index 7fc7a77f6aed..c7ad8cf07863 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_gen4_tl.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_tl.c
@@ -149,5 +149,6 @@ void adf_gen4_init_tl_data(struct adf_tl_hw_data *tl_data)
tl_data->sl_exec_counters = sl_exec_counters;
tl_data->rp_counters = rp_counters;
tl_data->num_rp_counters = ARRAY_SIZE(rp_counters);
+ tl_data->max_sl_cnt = ADF_GEN4_TL_MAX_SLICES_PER_TYPE;
}
EXPORT_SYMBOL_GPL(adf_gen4_init_tl_data);
diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_vf_mig.c b/drivers/crypto/intel/qat/qat_common/adf_gen4_vf_mig.c
new file mode 100644
index 000000000000..a62eb5e8dbe6
--- /dev/null
+++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_vf_mig.c
@@ -0,0 +1,1010 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2024 Intel Corporation */
+#include <linux/delay.h>
+#include <linux/dev_printk.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <asm/errno.h>
+
+#include "adf_accel_devices.h"
+#include "adf_common_drv.h"
+#include "adf_gen4_hw_data.h"
+#include "adf_gen4_pfvf.h"
+#include "adf_pfvf_utils.h"
+#include "adf_mstate_mgr.h"
+#include "adf_gen4_vf_mig.h"
+
+#define ADF_GEN4_VF_MSTATE_SIZE 4096
+#define ADF_GEN4_PFVF_RSP_TIMEOUT_US 5000
+
+static int adf_gen4_vfmig_save_setup(struct qat_mig_dev *mdev);
+static int adf_gen4_vfmig_load_setup(struct qat_mig_dev *mdev, int len);
+
+static int adf_gen4_vfmig_init_device(struct qat_mig_dev *mdev)
+{
+ u8 *state;
+
+ state = kmalloc(ADF_GEN4_VF_MSTATE_SIZE, GFP_KERNEL);
+ if (!state)
+ return -ENOMEM;
+
+ mdev->state = state;
+ mdev->state_size = ADF_GEN4_VF_MSTATE_SIZE;
+ mdev->setup_size = 0;
+ mdev->remote_setup_size = 0;
+
+ return 0;
+}
+
+static void adf_gen4_vfmig_cleanup_device(struct qat_mig_dev *mdev)
+{
+ kfree(mdev->state);
+ mdev->state = NULL;
+}
+
+static void adf_gen4_vfmig_reset_device(struct qat_mig_dev *mdev)
+{
+ mdev->setup_size = 0;
+ mdev->remote_setup_size = 0;
+}
+
+static int adf_gen4_vfmig_open_device(struct qat_mig_dev *mdev)
+{
+ struct adf_accel_dev *accel_dev = mdev->parent_accel_dev;
+ struct adf_accel_vf_info *vf_info;
+ struct adf_gen4_vfmig *vfmig;
+
+ vf_info = &accel_dev->pf.vf_info[mdev->vf_id];
+
+ vfmig = kzalloc(sizeof(*vfmig), GFP_KERNEL);
+ if (!vfmig)
+ return -ENOMEM;
+
+ vfmig->mstate_mgr = adf_mstate_mgr_new(mdev->state, mdev->state_size);
+ if (!vfmig->mstate_mgr) {
+ kfree(vfmig);
+ return -ENOMEM;
+ }
+ vf_info->mig_priv = vfmig;
+ mdev->setup_size = 0;
+ mdev->remote_setup_size = 0;
+
+ return 0;
+}
+
+static void adf_gen4_vfmig_close_device(struct qat_mig_dev *mdev)
+{
+ struct adf_accel_dev *accel_dev = mdev->parent_accel_dev;
+ struct adf_accel_vf_info *vf_info;
+ struct adf_gen4_vfmig *vfmig;
+
+ vf_info = &accel_dev->pf.vf_info[mdev->vf_id];
+ if (vf_info->mig_priv) {
+ vfmig = vf_info->mig_priv;
+ adf_mstate_mgr_destroy(vfmig->mstate_mgr);
+ kfree(vfmig);
+ vf_info->mig_priv = NULL;
+ }
+}
+
+static int adf_gen4_vfmig_suspend_device(struct qat_mig_dev *mdev)
+{
+ struct adf_accel_dev *accel_dev = mdev->parent_accel_dev;
+ struct adf_hw_device_data *hw_data = accel_dev->hw_device;
+ struct adf_accel_vf_info *vf_info;
+ struct adf_gen4_vfmig *vf_mig;
+ u32 vf_nr = mdev->vf_id;
+ int ret, i;
+
+ vf_info = &accel_dev->pf.vf_info[vf_nr];
+ vf_mig = vf_info->mig_priv;
+
+ /* Stop all inflight jobs */
+ for (i = 0; i < hw_data->num_banks_per_vf; i++) {
+ u32 pf_bank_nr = i + vf_nr * hw_data->num_banks_per_vf;
+
+ ret = adf_gen4_bank_drain_start(accel_dev, pf_bank_nr,
+ ADF_RPRESET_POLL_TIMEOUT_US);
+ if (ret) {
+ dev_err(&GET_DEV(accel_dev),
+ "Failed to drain bank %d for vf_nr %d\n", i,
+ vf_nr);
+ return ret;
+ }
+ vf_mig->bank_stopped[i] = true;
+
+ adf_gen4_bank_quiesce_coal_timer(accel_dev, pf_bank_nr,
+ ADF_COALESCED_POLL_TIMEOUT_US);
+ }
+
+ return 0;
+}
+
+static int adf_gen4_vfmig_resume_device(struct qat_mig_dev *mdev)
+{
+ struct adf_accel_dev *accel_dev = mdev->parent_accel_dev;
+ struct adf_hw_device_data *hw_data = accel_dev->hw_device;
+ struct adf_accel_vf_info *vf_info;
+ struct adf_gen4_vfmig *vf_mig;
+ u32 vf_nr = mdev->vf_id;
+ int i;
+
+ vf_info = &accel_dev->pf.vf_info[vf_nr];
+ vf_mig = vf_info->mig_priv;
+
+ for (i = 0; i < hw_data->num_banks_per_vf; i++) {
+ u32 pf_bank_nr = i + vf_nr * hw_data->num_banks_per_vf;
+
+ if (vf_mig->bank_stopped[i]) {
+ adf_gen4_bank_drain_finish(accel_dev, pf_bank_nr);
+ vf_mig->bank_stopped[i] = false;
+ }
+ }
+
+ return 0;
+}
+
+struct adf_vf_bank_info {
+ struct adf_accel_dev *accel_dev;
+ u32 vf_nr;
+ u32 bank_nr;
+};
+
+struct mig_user_sla {
+ enum adf_base_services srv;
+ u64 rp_mask;
+ u32 cir;
+ u32 pir;
+};
+
+static int adf_mstate_sla_check(struct adf_mstate_mgr *sub_mgr, u8 *src_buf,
+ u32 src_size, void *opaque)
+{
+ struct adf_mstate_vreginfo _sinfo = { src_buf, src_size };
+ struct adf_mstate_vreginfo *sinfo = &_sinfo, *dinfo = opaque;
+ u32 src_sla_cnt = sinfo->size / sizeof(struct mig_user_sla);
+ u32 dst_sla_cnt = dinfo->size / sizeof(struct mig_user_sla);
+ struct mig_user_sla *src_slas = sinfo->addr;
+ struct mig_user_sla *dst_slas = dinfo->addr;
+ int i, j;
+
+ for (i = 0; i < src_sla_cnt; i++) {
+ for (j = 0; j < dst_sla_cnt; j++) {
+ if (src_slas[i].srv != dst_slas[j].srv ||
+ src_slas[i].rp_mask != dst_slas[j].rp_mask)
+ continue;
+
+ if (src_slas[i].cir > dst_slas[j].cir ||
+ src_slas[i].pir > dst_slas[j].pir) {
+ pr_err("QAT: DST VF rate limiting mismatch.\n");
+ return -EINVAL;
+ }
+ break;
+ }
+
+ if (j == dst_sla_cnt) {
+ pr_err("QAT: SRC VF rate limiting mismatch - SRC srv %d and rp_mask 0x%llx.\n",
+ src_slas[i].srv, src_slas[i].rp_mask);
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static inline int adf_mstate_check_cap_size(u32 src_sz, u32 dst_sz, u32 max_sz)
+{
+ if (src_sz > max_sz || dst_sz > max_sz)
+ return -EINVAL;
+ else
+ return 0;
+}
+
+static int adf_mstate_compatver_check(struct adf_mstate_mgr *sub_mgr,
+ u8 *src_buf, u32 src_sz, void *opaque)
+{
+ struct adf_mstate_vreginfo *info = opaque;
+ u8 compat = 0;
+ u8 *pcompat;
+
+ if (src_sz != info->size) {
+ pr_debug("QAT: State mismatch (compat version size), current %u, expected %u\n",
+ src_sz, info->size);
+ return -EINVAL;
+ }
+
+ memcpy(info->addr, src_buf, info->size);
+ pcompat = info->addr;
+ if (*pcompat == 0) {
+ pr_warn("QAT: Unable to determine the version of VF\n");
+ return 0;
+ }
+
+ compat = adf_vf_compat_checker(*pcompat);
+ if (compat == ADF_PF2VF_VF_INCOMPATIBLE) {
+ pr_debug("QAT: SRC VF driver (ver=%u) is incompatible with DST PF driver (ver=%u)\n",
+ *pcompat, ADF_PFVF_COMPAT_THIS_VERSION);
+ return -EINVAL;
+ }
+
+ if (compat == ADF_PF2VF_VF_COMPAT_UNKNOWN)
+ pr_debug("QAT: SRC VF driver (ver=%u) is newer than DST PF driver (ver=%u)\n",
+ *pcompat, ADF_PFVF_COMPAT_THIS_VERSION);
+
+ return 0;
+}
+
+/*
+ * adf_mstate_capmask_compare() - compare QAT device capability mask
+ * @sinfo: Pointer to source capability info
+ * @dinfo: Pointer to target capability info
+ *
+ * This function compares the capability mask between source VF and target VF
+ *
+ * Returns: 0 if target capability mask is identical to source capability mask,
+ * 1 if target mask can represent all the capabilities represented by source mask,
+ * -1 if target mask can't represent all the capabilities represented by source
+ * mask.
+ */
+static int adf_mstate_capmask_compare(struct adf_mstate_vreginfo *sinfo,
+ struct adf_mstate_vreginfo *dinfo)
+{
+ u64 src = 0, dst = 0;
+
+ if (adf_mstate_check_cap_size(sinfo->size, dinfo->size, sizeof(u64))) {
+ pr_debug("QAT: Unexpected capability size %u %u %zu\n",
+ sinfo->size, dinfo->size, sizeof(u64));
+ return -1;
+ }
+
+ memcpy(&src, sinfo->addr, sinfo->size);
+ memcpy(&dst, dinfo->addr, dinfo->size);
+
+ pr_debug("QAT: Check cap compatibility of cap %llu %llu\n", src, dst);
+
+ if (src == dst)
+ return 0;
+
+ if ((src | dst) == dst)
+ return 1;
+
+ return -1;
+}
+
+static int adf_mstate_capmask_superset(struct adf_mstate_mgr *sub_mgr, u8 *buf,
+ u32 size, void *opa)
+{
+ struct adf_mstate_vreginfo sinfo = { buf, size };
+
+ if (adf_mstate_capmask_compare(&sinfo, opa) >= 0)
+ return 0;
+
+ return -EINVAL;
+}
+
+static int adf_mstate_capmask_equal(struct adf_mstate_mgr *sub_mgr, u8 *buf,
+ u32 size, void *opa)
+{
+ struct adf_mstate_vreginfo sinfo = { buf, size };
+
+ if (adf_mstate_capmask_compare(&sinfo, opa) == 0)
+ return 0;
+
+ return -EINVAL;
+}
+
+static int adf_mstate_set_vreg(struct adf_mstate_mgr *sub_mgr, u8 *buf,
+ u32 size, void *opa)
+{
+ struct adf_mstate_vreginfo *info = opa;
+
+ if (size != info->size) {
+ pr_debug("QAT: Unexpected cap size %u %u\n", size, info->size);
+ return -EINVAL;
+ }
+ memcpy(info->addr, buf, info->size);
+
+ return 0;
+}
+
+static u32 adf_gen4_vfmig_get_slas(struct adf_accel_dev *accel_dev, u32 vf_nr,
+ struct mig_user_sla *pmig_slas)
+{
+ struct adf_hw_device_data *hw_data = accel_dev->hw_device;
+ struct adf_rl *rl_data = accel_dev->rate_limiting;
+ struct rl_sla **sla_type_arr = NULL;
+ u64 rp_mask, rp_index;
+ u32 max_num_sla;
+ u32 sla_cnt = 0;
+ int i, j;
+
+ if (!accel_dev->rate_limiting)
+ return 0;
+
+ rp_index = vf_nr * hw_data->num_banks_per_vf;
+ max_num_sla = adf_rl_get_sla_arr_of_type(rl_data, RL_LEAF, &sla_type_arr);
+
+ for (i = 0; i < max_num_sla; i++) {
+ if (!sla_type_arr[i])
+ continue;
+
+ rp_mask = 0;
+ for (j = 0; j < sla_type_arr[i]->ring_pairs_cnt; j++)
+ rp_mask |= BIT(sla_type_arr[i]->ring_pairs_ids[j]);
+
+ if (rp_mask & GENMASK_ULL(rp_index + 3, rp_index)) {
+ pmig_slas->rp_mask = rp_mask;
+ pmig_slas->cir = sla_type_arr[i]->cir;
+ pmig_slas->pir = sla_type_arr[i]->pir;
+ pmig_slas->srv = sla_type_arr[i]->srv;
+ pmig_slas++;
+ sla_cnt++;
+ }
+ }
+
+ return sla_cnt;
+}
+
+static int adf_gen4_vfmig_load_etr_regs(struct adf_mstate_mgr *sub_mgr,
+ u8 *state, u32 size, void *opa)
+{
+ struct adf_vf_bank_info *vf_bank_info = opa;
+ struct adf_accel_dev *accel_dev = vf_bank_info->accel_dev;
+ struct adf_hw_device_data *hw_data = accel_dev->hw_device;
+ u32 pf_bank_nr;
+ int ret;
+
+ pf_bank_nr = vf_bank_info->bank_nr + vf_bank_info->vf_nr * hw_data->num_banks_per_vf;
+ ret = hw_data->bank_state_restore(accel_dev, pf_bank_nr,
+ (struct bank_state *)state);
+ if (ret) {
+ dev_err(&GET_DEV(accel_dev),
+ "Failed to load regs for vf%d bank%d\n",
+ vf_bank_info->vf_nr, vf_bank_info->bank_nr);
+ return ret;
+ }
+
+ return 0;
+}
+
+static int adf_gen4_vfmig_load_etr_bank(struct adf_accel_dev *accel_dev,
+ u32 vf_nr, u32 bank_nr,
+ struct adf_mstate_mgr *mstate_mgr)
+{
+ struct adf_vf_bank_info vf_bank_info = {accel_dev, vf_nr, bank_nr};
+ struct adf_mstate_sect_h *subsec, *l2_subsec;
+ struct adf_mstate_mgr sub_sects_mgr;
+ char bank_ids[ADF_MSTATE_ID_LEN];
+
+ snprintf(bank_ids, sizeof(bank_ids), ADF_MSTATE_BANK_IDX_IDS "%x", bank_nr);
+ subsec = adf_mstate_sect_lookup(mstate_mgr, bank_ids, NULL, NULL);
+ if (!subsec) {
+ dev_err(&GET_DEV(accel_dev),
+ "Failed to lookup sec %s for vf%d bank%d\n",
+ ADF_MSTATE_BANK_IDX_IDS, vf_nr, bank_nr);
+ return -EINVAL;
+ }
+
+ adf_mstate_mgr_init_from_psect(&sub_sects_mgr, subsec);
+ l2_subsec = adf_mstate_sect_lookup(&sub_sects_mgr, ADF_MSTATE_ETR_REGS_IDS,
+ adf_gen4_vfmig_load_etr_regs,
+ &vf_bank_info);
+ if (!l2_subsec) {
+ dev_err(&GET_DEV(accel_dev),
+ "Failed to add sec %s for vf%d bank%d\n",
+ ADF_MSTATE_ETR_REGS_IDS, vf_nr, bank_nr);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int adf_gen4_vfmig_load_etr(struct adf_accel_dev *accel_dev, u32 vf_nr)
+{
+ struct adf_accel_vf_info *vf_info = &accel_dev->pf.vf_info[vf_nr];
+ struct adf_hw_device_data *hw_data = accel_dev->hw_device;
+ struct adf_gen4_vfmig *vfmig = vf_info->mig_priv;
+ struct adf_mstate_mgr *mstate_mgr = vfmig->mstate_mgr;
+ struct adf_mstate_mgr sub_sects_mgr;
+ struct adf_mstate_sect_h *subsec;
+ int ret, i;
+
+ subsec = adf_mstate_sect_lookup(mstate_mgr, ADF_MSTATE_ETRB_IDS, NULL,
+ NULL);
+ if (!subsec) {
+ dev_err(&GET_DEV(accel_dev), "Failed to load sec %s\n",
+ ADF_MSTATE_ETRB_IDS);
+ return -EINVAL;
+ }
+
+ adf_mstate_mgr_init_from_psect(&sub_sects_mgr, subsec);
+ for (i = 0; i < hw_data->num_banks_per_vf; i++) {
+ ret = adf_gen4_vfmig_load_etr_bank(accel_dev, vf_nr, i,
+ &sub_sects_mgr);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int adf_gen4_vfmig_load_misc(struct adf_accel_dev *accel_dev, u32 vf_nr)
+{
+ struct adf_accel_vf_info *vf_info = &accel_dev->pf.vf_info[vf_nr];
+ struct adf_gen4_vfmig *vfmig = vf_info->mig_priv;
+ void __iomem *csr = adf_get_pmisc_base(accel_dev);
+ struct adf_mstate_mgr *mstate_mgr = vfmig->mstate_mgr;
+ struct adf_mstate_sect_h *subsec, *l2_subsec;
+ struct adf_mstate_mgr sub_sects_mgr;
+ struct {
+ char *id;
+ u64 ofs;
+ } misc_states[] = {
+ {ADF_MSTATE_VINTMSK_IDS, ADF_GEN4_VINTMSK_OFFSET(vf_nr)},
+ {ADF_MSTATE_VINTMSK_PF2VM_IDS, ADF_GEN4_VINTMSKPF2VM_OFFSET(vf_nr)},
+ {ADF_MSTATE_PF2VM_IDS, ADF_GEN4_PF2VM_OFFSET(vf_nr)},
+ {ADF_MSTATE_VM2PF_IDS, ADF_GEN4_VM2PF_OFFSET(vf_nr)},
+ };
+ int i;
+
+ subsec = adf_mstate_sect_lookup(mstate_mgr, ADF_MSTATE_MISCB_IDS, NULL,
+ NULL);
+ if (!subsec) {
+ dev_err(&GET_DEV(accel_dev), "Failed to load sec %s\n",
+ ADF_MSTATE_MISCB_IDS);
+ return -EINVAL;
+ }
+
+ adf_mstate_mgr_init_from_psect(&sub_sects_mgr, subsec);
+ for (i = 0; i < ARRAY_SIZE(misc_states); i++) {
+ struct adf_mstate_vreginfo info;
+ u32 regv;
+
+ info.addr = &regv;
+ info.size = sizeof(regv);
+ l2_subsec = adf_mstate_sect_lookup(&sub_sects_mgr,
+ misc_states[i].id,
+ adf_mstate_set_vreg,
+ &info);
+ if (!l2_subsec) {
+ dev_err(&GET_DEV(accel_dev),
+ "Failed to load sec %s\n", misc_states[i].id);
+ return -EINVAL;
+ }
+ ADF_CSR_WR(csr, misc_states[i].ofs, regv);
+ }
+
+ return 0;
+}
+
+static int adf_gen4_vfmig_load_generic(struct adf_accel_dev *accel_dev, u32 vf_nr)
+{
+ struct adf_accel_vf_info *vf_info = &accel_dev->pf.vf_info[vf_nr];
+ struct mig_user_sla dst_slas[RL_RP_CNT_PER_LEAF_MAX] = { };
+ struct adf_gen4_vfmig *vfmig = vf_info->mig_priv;
+ struct adf_mstate_mgr *mstate_mgr = vfmig->mstate_mgr;
+ struct adf_mstate_sect_h *subsec, *l2_subsec;
+ struct adf_mstate_mgr sub_sects_mgr;
+ u32 dst_sla_cnt;
+ struct {
+ char *id;
+ int (*action)(struct adf_mstate_mgr *sub_mgr, u8 *buf, u32 size, void *opa);
+ struct adf_mstate_vreginfo info;
+ } gen_states[] = {
+ {ADF_MSTATE_IOV_INIT_IDS, adf_mstate_set_vreg,
+ {&vf_info->init, sizeof(vf_info->init)}},
+ {ADF_MSTATE_COMPAT_VER_IDS, adf_mstate_compatver_check,
+ {&vf_info->vf_compat_ver, sizeof(vf_info->vf_compat_ver)}},
+ {ADF_MSTATE_SLA_IDS, adf_mstate_sla_check, {dst_slas, 0}},
+ };
+ int i;
+
+ subsec = adf_mstate_sect_lookup(mstate_mgr, ADF_MSTATE_GEN_IDS, NULL, NULL);
+ if (!subsec) {
+ dev_err(&GET_DEV(accel_dev), "Failed to load sec %s\n",
+ ADF_MSTATE_GEN_IDS);
+ return -EINVAL;
+ }
+
+ adf_mstate_mgr_init_from_psect(&sub_sects_mgr, subsec);
+ for (i = 0; i < ARRAY_SIZE(gen_states); i++) {
+ if (gen_states[i].info.addr == dst_slas) {
+ dst_sla_cnt = adf_gen4_vfmig_get_slas(accel_dev, vf_nr, dst_slas);
+ gen_states[i].info.size = dst_sla_cnt * sizeof(struct mig_user_sla);
+ }
+
+ l2_subsec = adf_mstate_sect_lookup(&sub_sects_mgr,
+ gen_states[i].id,
+ gen_states[i].action,
+ &gen_states[i].info);
+ if (!l2_subsec) {
+ dev_err(&GET_DEV(accel_dev), "Failed to load sec %s\n",
+ gen_states[i].id);
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static int adf_gen4_vfmig_load_config(struct adf_accel_dev *accel_dev, u32 vf_nr)
+{
+ struct adf_accel_vf_info *vf_info = &accel_dev->pf.vf_info[vf_nr];
+ struct adf_hw_device_data *hw_data = accel_dev->hw_device;
+ struct adf_gen4_vfmig *vfmig = vf_info->mig_priv;
+ struct adf_mstate_mgr *mstate_mgr = vfmig->mstate_mgr;
+ struct adf_mstate_sect_h *subsec, *l2_subsec;
+ struct adf_mstate_mgr sub_sects_mgr;
+ struct {
+ char *id;
+ int (*action)(struct adf_mstate_mgr *sub_mgr, u8 *buf, u32 size, void *opa);
+ struct adf_mstate_vreginfo info;
+ } setups[] = {
+ {ADF_MSTATE_GEN_CAP_IDS, adf_mstate_capmask_superset,
+ {&hw_data->accel_capabilities_mask, sizeof(hw_data->accel_capabilities_mask)}},
+ {ADF_MSTATE_GEN_SVCMAP_IDS, adf_mstate_capmask_equal,
+ {&hw_data->ring_to_svc_map, sizeof(hw_data->ring_to_svc_map)}},
+ {ADF_MSTATE_GEN_EXTDC_IDS, adf_mstate_capmask_superset,
+ {&hw_data->extended_dc_capabilities, sizeof(hw_data->extended_dc_capabilities)}},
+ };
+ int i;
+
+ subsec = adf_mstate_sect_lookup(mstate_mgr, ADF_MSTATE_CONFIG_IDS, NULL, NULL);
+ if (!subsec) {
+ dev_err(&GET_DEV(accel_dev), "Failed to load sec %s\n",
+ ADF_MSTATE_CONFIG_IDS);
+ return -EINVAL;
+ }
+
+ adf_mstate_mgr_init_from_psect(&sub_sects_mgr, subsec);
+ for (i = 0; i < ARRAY_SIZE(setups); i++) {
+ l2_subsec = adf_mstate_sect_lookup(&sub_sects_mgr, setups[i].id,
+ setups[i].action, &setups[i].info);
+ if (!l2_subsec) {
+ dev_err(&GET_DEV(accel_dev), "Failed to load sec %s\n",
+ setups[i].id);
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static int adf_gen4_vfmig_save_etr_regs(struct adf_mstate_mgr *subs, u8 *state,
+ u32 size, void *opa)
+{
+ struct adf_vf_bank_info *vf_bank_info = opa;
+ struct adf_accel_dev *accel_dev = vf_bank_info->accel_dev;
+ struct adf_hw_device_data *hw_data = accel_dev->hw_device;
+ u32 pf_bank_nr;
+ int ret;
+
+ pf_bank_nr = vf_bank_info->bank_nr;
+ pf_bank_nr += vf_bank_info->vf_nr * hw_data->num_banks_per_vf;
+
+ ret = hw_data->bank_state_save(accel_dev, pf_bank_nr,
+ (struct bank_state *)state);
+ if (ret) {
+ dev_err(&GET_DEV(accel_dev),
+ "Failed to save regs for vf%d bank%d\n",
+ vf_bank_info->vf_nr, vf_bank_info->bank_nr);
+ return ret;
+ }
+
+ return sizeof(struct bank_state);
+}
+
+static int adf_gen4_vfmig_save_etr_bank(struct adf_accel_dev *accel_dev,
+ u32 vf_nr, u32 bank_nr,
+ struct adf_mstate_mgr *mstate_mgr)
+{
+ struct adf_mstate_sect_h *subsec, *l2_subsec;
+ struct adf_vf_bank_info vf_bank_info;
+ struct adf_mstate_mgr sub_sects_mgr;
+ char bank_ids[ADF_MSTATE_ID_LEN];
+
+ snprintf(bank_ids, sizeof(bank_ids), ADF_MSTATE_BANK_IDX_IDS "%x", bank_nr);
+
+ subsec = adf_mstate_sect_add(mstate_mgr, bank_ids, NULL, NULL);
+ if (!subsec) {
+ dev_err(&GET_DEV(accel_dev),
+ "Failed to add sec %s for vf%d bank%d\n",
+ ADF_MSTATE_BANK_IDX_IDS, vf_nr, bank_nr);
+ return -EINVAL;
+ }
+
+ adf_mstate_mgr_init_from_parent(&sub_sects_mgr, mstate_mgr);
+ vf_bank_info.accel_dev = accel_dev;
+ vf_bank_info.vf_nr = vf_nr;
+ vf_bank_info.bank_nr = bank_nr;
+ l2_subsec = adf_mstate_sect_add(&sub_sects_mgr, ADF_MSTATE_ETR_REGS_IDS,
+ adf_gen4_vfmig_save_etr_regs,
+ &vf_bank_info);
+ if (!l2_subsec) {
+ dev_err(&GET_DEV(accel_dev),
+ "Failed to add sec %s for vf%d bank%d\n",
+ ADF_MSTATE_ETR_REGS_IDS, vf_nr, bank_nr);
+ return -EINVAL;
+ }
+ adf_mstate_sect_update(mstate_mgr, &sub_sects_mgr, subsec);
+
+ return 0;
+}
+
+static int adf_gen4_vfmig_save_etr(struct adf_accel_dev *accel_dev, u32 vf_nr)
+{
+ struct adf_accel_vf_info *vf_info = &accel_dev->pf.vf_info[vf_nr];
+ struct adf_hw_device_data *hw_data = accel_dev->hw_device;
+ struct adf_gen4_vfmig *vfmig = vf_info->mig_priv;
+ struct adf_mstate_mgr *mstate_mgr = vfmig->mstate_mgr;
+ struct adf_mstate_mgr sub_sects_mgr;
+ struct adf_mstate_sect_h *subsec;
+ int ret, i;
+
+ subsec = adf_mstate_sect_add(mstate_mgr, ADF_MSTATE_ETRB_IDS, NULL, NULL);
+ if (!subsec) {
+ dev_err(&GET_DEV(accel_dev), "Failed to add sec %s\n",
+ ADF_MSTATE_ETRB_IDS);
+ return -EINVAL;
+ }
+
+ adf_mstate_mgr_init_from_parent(&sub_sects_mgr, mstate_mgr);
+ for (i = 0; i < hw_data->num_banks_per_vf; i++) {
+ ret = adf_gen4_vfmig_save_etr_bank(accel_dev, vf_nr, i,
+ &sub_sects_mgr);
+ if (ret)
+ return ret;
+ }
+ adf_mstate_sect_update(mstate_mgr, &sub_sects_mgr, subsec);
+
+ return 0;
+}
+
+static int adf_gen4_vfmig_save_misc(struct adf_accel_dev *accel_dev, u32 vf_nr)
+{
+ struct adf_accel_vf_info *vf_info = &accel_dev->pf.vf_info[vf_nr];
+ struct adf_gen4_vfmig *vfmig = vf_info->mig_priv;
+ struct adf_mstate_mgr *mstate_mgr = vfmig->mstate_mgr;
+ void __iomem *csr = adf_get_pmisc_base(accel_dev);
+ struct adf_mstate_sect_h *subsec, *l2_subsec;
+ struct adf_mstate_mgr sub_sects_mgr;
+ struct {
+ char *id;
+ u64 offset;
+ } misc_states[] = {
+ {ADF_MSTATE_VINTSRC_IDS, ADF_GEN4_VINTSOU_OFFSET(vf_nr)},
+ {ADF_MSTATE_VINTMSK_IDS, ADF_GEN4_VINTMSK_OFFSET(vf_nr)},
+ {ADF_MSTATE_VINTSRC_PF2VM_IDS, ADF_GEN4_VINTSOUPF2VM_OFFSET(vf_nr)},
+ {ADF_MSTATE_VINTMSK_PF2VM_IDS, ADF_GEN4_VINTMSKPF2VM_OFFSET(vf_nr)},
+ {ADF_MSTATE_PF2VM_IDS, ADF_GEN4_PF2VM_OFFSET(vf_nr)},
+ {ADF_MSTATE_VM2PF_IDS, ADF_GEN4_VM2PF_OFFSET(vf_nr)},
+ };
+ ktime_t time_exp;
+ int i;
+
+ subsec = adf_mstate_sect_add(mstate_mgr, ADF_MSTATE_MISCB_IDS, NULL, NULL);
+ if (!subsec) {
+ dev_err(&GET_DEV(accel_dev), "Failed to add sec %s\n",
+ ADF_MSTATE_MISCB_IDS);
+ return -EINVAL;
+ }
+
+ time_exp = ktime_add_us(ktime_get(), ADF_GEN4_PFVF_RSP_TIMEOUT_US);
+ while (!mutex_trylock(&vf_info->pfvf_mig_lock)) {
+ if (ktime_after(ktime_get(), time_exp)) {
+ dev_err(&GET_DEV(accel_dev), "Failed to get pfvf mig lock\n");
+ return -ETIMEDOUT;
+ }
+ usleep_range(500, 1000);
+ }
+
+ adf_mstate_mgr_init_from_parent(&sub_sects_mgr, mstate_mgr);
+ for (i = 0; i < ARRAY_SIZE(misc_states); i++) {
+ struct adf_mstate_vreginfo info;
+ u32 regv;
+
+ info.addr = &regv;
+ info.size = sizeof(regv);
+ regv = ADF_CSR_RD(csr, misc_states[i].offset);
+
+ l2_subsec = adf_mstate_sect_add_vreg(&sub_sects_mgr,
+ misc_states[i].id,
+ &info);
+ if (!l2_subsec) {
+ dev_err(&GET_DEV(accel_dev), "Failed to add sec %s\n",
+ misc_states[i].id);
+ mutex_unlock(&vf_info->pfvf_mig_lock);
+ return -EINVAL;
+ }
+ }
+
+ mutex_unlock(&vf_info->pfvf_mig_lock);
+ adf_mstate_sect_update(mstate_mgr, &sub_sects_mgr, subsec);
+
+ return 0;
+}
+
+static int adf_gen4_vfmig_save_generic(struct adf_accel_dev *accel_dev, u32 vf_nr)
+{
+ struct adf_accel_vf_info *vf_info = &accel_dev->pf.vf_info[vf_nr];
+ struct adf_gen4_vfmig *vfmig = vf_info->mig_priv;
+ struct adf_mstate_mgr *mstate_mgr = vfmig->mstate_mgr;
+ struct adf_mstate_mgr sub_sects_mgr;
+ struct adf_mstate_sect_h *subsec, *l2_subsec;
+ struct mig_user_sla src_slas[RL_RP_CNT_PER_LEAF_MAX] = { };
+ u32 src_sla_cnt;
+ struct {
+ char *id;
+ struct adf_mstate_vreginfo info;
+ } gen_states[] = {
+ {ADF_MSTATE_IOV_INIT_IDS,
+ {&vf_info->init, sizeof(vf_info->init)}},
+ {ADF_MSTATE_COMPAT_VER_IDS,
+ {&vf_info->vf_compat_ver, sizeof(vf_info->vf_compat_ver)}},
+ {ADF_MSTATE_SLA_IDS, {src_slas, 0}},
+ };
+ int i;
+
+ subsec = adf_mstate_sect_add(mstate_mgr, ADF_MSTATE_GEN_IDS, NULL, NULL);
+ if (!subsec) {
+ dev_err(&GET_DEV(accel_dev), "Failed to add sec %s\n",
+ ADF_MSTATE_GEN_IDS);
+ return -EINVAL;
+ }
+
+ adf_mstate_mgr_init_from_parent(&sub_sects_mgr, mstate_mgr);
+ for (i = 0; i < ARRAY_SIZE(gen_states); i++) {
+ if (gen_states[i].info.addr == src_slas) {
+ src_sla_cnt = adf_gen4_vfmig_get_slas(accel_dev, vf_nr, src_slas);
+ gen_states[i].info.size = src_sla_cnt * sizeof(struct mig_user_sla);
+ }
+
+ l2_subsec = adf_mstate_sect_add_vreg(&sub_sects_mgr,
+ gen_states[i].id,
+ &gen_states[i].info);
+ if (!l2_subsec) {
+ dev_err(&GET_DEV(accel_dev), "Failed to add sec %s\n",
+ gen_states[i].id);
+ return -EINVAL;
+ }
+ }
+ adf_mstate_sect_update(mstate_mgr, &sub_sects_mgr, subsec);
+
+ return 0;
+}
+
+static int adf_gen4_vfmig_save_config(struct adf_accel_dev *accel_dev, u32 vf_nr)
+{
+ struct adf_accel_vf_info *vf_info = &accel_dev->pf.vf_info[vf_nr];
+ struct adf_hw_device_data *hw_data = accel_dev->hw_device;
+ struct adf_gen4_vfmig *vfmig = vf_info->mig_priv;
+ struct adf_mstate_mgr *mstate_mgr = vfmig->mstate_mgr;
+ struct adf_mstate_mgr sub_sects_mgr;
+ struct adf_mstate_sect_h *subsec, *l2_subsec;
+ struct {
+ char *id;
+ struct adf_mstate_vreginfo info;
+ } setups[] = {
+ {ADF_MSTATE_GEN_CAP_IDS,
+ {&hw_data->accel_capabilities_mask, sizeof(hw_data->accel_capabilities_mask)}},
+ {ADF_MSTATE_GEN_SVCMAP_IDS,
+ {&hw_data->ring_to_svc_map, sizeof(hw_data->ring_to_svc_map)}},
+ {ADF_MSTATE_GEN_EXTDC_IDS,
+ {&hw_data->extended_dc_capabilities, sizeof(hw_data->extended_dc_capabilities)}},
+ };
+ int i;
+
+ subsec = adf_mstate_sect_add(mstate_mgr, ADF_MSTATE_CONFIG_IDS, NULL, NULL);
+ if (!subsec) {
+ dev_err(&GET_DEV(accel_dev), "Failed to add sec %s\n",
+ ADF_MSTATE_CONFIG_IDS);
+ return -EINVAL;
+ }
+
+ adf_mstate_mgr_init_from_parent(&sub_sects_mgr, mstate_mgr);
+ for (i = 0; i < ARRAY_SIZE(setups); i++) {
+ l2_subsec = adf_mstate_sect_add_vreg(&sub_sects_mgr, setups[i].id,
+ &setups[i].info);
+ if (!l2_subsec) {
+ dev_err(&GET_DEV(accel_dev), "Failed to add sec %s\n",
+ setups[i].id);
+ return -EINVAL;
+ }
+ }
+ adf_mstate_sect_update(mstate_mgr, &sub_sects_mgr, subsec);
+
+ return 0;
+}
+
+static int adf_gen4_vfmig_save_state(struct qat_mig_dev *mdev)
+{
+ struct adf_accel_dev *accel_dev = mdev->parent_accel_dev;
+ struct adf_accel_vf_info *vf_info;
+ struct adf_gen4_vfmig *vfmig;
+ u32 vf_nr = mdev->vf_id;
+ int ret;
+
+ vf_info = &accel_dev->pf.vf_info[vf_nr];
+ vfmig = vf_info->mig_priv;
+
+ ret = adf_gen4_vfmig_save_setup(mdev);
+ if (ret) {
+ dev_err(&GET_DEV(accel_dev),
+ "Failed to save setup for vf_nr %d\n", vf_nr);
+ return ret;
+ }
+
+ adf_mstate_mgr_init(vfmig->mstate_mgr, mdev->state + mdev->setup_size,
+ mdev->state_size - mdev->setup_size);
+ if (!adf_mstate_preamble_add(vfmig->mstate_mgr))
+ return -EINVAL;
+
+ ret = adf_gen4_vfmig_save_generic(accel_dev, vf_nr);
+ if (ret) {
+ dev_err(&GET_DEV(accel_dev),
+ "Failed to save generic state for vf_nr %d\n", vf_nr);
+ return ret;
+ }
+
+ ret = adf_gen4_vfmig_save_misc(accel_dev, vf_nr);
+ if (ret) {
+ dev_err(&GET_DEV(accel_dev),
+ "Failed to save misc bar state for vf_nr %d\n", vf_nr);
+ return ret;
+ }
+
+ ret = adf_gen4_vfmig_save_etr(accel_dev, vf_nr);
+ if (ret) {
+ dev_err(&GET_DEV(accel_dev),
+ "Failed to save etr bar state for vf_nr %d\n", vf_nr);
+ return ret;
+ }
+
+ adf_mstate_preamble_update(vfmig->mstate_mgr);
+
+ return 0;
+}
+
+static int adf_gen4_vfmig_load_state(struct qat_mig_dev *mdev)
+{
+ struct adf_accel_dev *accel_dev = mdev->parent_accel_dev;
+ struct adf_accel_vf_info *vf_info;
+ struct adf_gen4_vfmig *vfmig;
+ u32 vf_nr = mdev->vf_id;
+ int ret;
+
+ vf_info = &accel_dev->pf.vf_info[vf_nr];
+ vfmig = vf_info->mig_priv;
+
+ ret = adf_gen4_vfmig_load_setup(mdev, mdev->state_size);
+ if (ret) {
+ dev_err(&GET_DEV(accel_dev), "Failed to load setup for vf_nr %d\n",
+ vf_nr);
+ return ret;
+ }
+
+ ret = adf_mstate_mgr_init_from_remote(vfmig->mstate_mgr,
+ mdev->state + mdev->remote_setup_size,
+ mdev->state_size - mdev->remote_setup_size,
+ NULL, NULL);
+ if (ret) {
+ dev_err(&GET_DEV(accel_dev), "Invalid state for vf_nr %d\n",
+ vf_nr);
+ return ret;
+ }
+
+ ret = adf_gen4_vfmig_load_generic(accel_dev, vf_nr);
+ if (ret) {
+ dev_err(&GET_DEV(accel_dev),
+ "Failed to load general state for vf_nr %d\n", vf_nr);
+ return ret;
+ }
+
+ ret = adf_gen4_vfmig_load_misc(accel_dev, vf_nr);
+ if (ret) {
+ dev_err(&GET_DEV(accel_dev),
+ "Failed to load misc bar state for vf_nr %d\n", vf_nr);
+ return ret;
+ }
+
+ ret = adf_gen4_vfmig_load_etr(accel_dev, vf_nr);
+ if (ret) {
+ dev_err(&GET_DEV(accel_dev),
+ "Failed to load etr bar state for vf_nr %d\n", vf_nr);
+ return ret;
+ }
+
+ return 0;
+}
+
+static int adf_gen4_vfmig_save_setup(struct qat_mig_dev *mdev)
+{
+ struct adf_accel_dev *accel_dev = mdev->parent_accel_dev;
+ struct adf_accel_vf_info *vf_info;
+ struct adf_gen4_vfmig *vfmig;
+ u32 vf_nr = mdev->vf_id;
+ int ret;
+
+ vf_info = &accel_dev->pf.vf_info[vf_nr];
+ vfmig = vf_info->mig_priv;
+
+ if (mdev->setup_size)
+ return 0;
+
+ adf_mstate_mgr_init(vfmig->mstate_mgr, mdev->state, mdev->state_size);
+ if (!adf_mstate_preamble_add(vfmig->mstate_mgr))
+ return -EINVAL;
+
+ ret = adf_gen4_vfmig_save_config(accel_dev, mdev->vf_id);
+ if (ret)
+ return ret;
+
+ adf_mstate_preamble_update(vfmig->mstate_mgr);
+ mdev->setup_size = adf_mstate_state_size(vfmig->mstate_mgr);
+
+ return 0;
+}
+
+static int adf_gen4_vfmig_load_setup(struct qat_mig_dev *mdev, int len)
+{
+ struct adf_accel_dev *accel_dev = mdev->parent_accel_dev;
+ struct adf_accel_vf_info *vf_info;
+ struct adf_gen4_vfmig *vfmig;
+ u32 vf_nr = mdev->vf_id;
+ u32 setup_size;
+ int ret;
+
+ vf_info = &accel_dev->pf.vf_info[vf_nr];
+ vfmig = vf_info->mig_priv;
+
+ if (mdev->remote_setup_size)
+ return 0;
+
+ if (len < sizeof(struct adf_mstate_preh))
+ return -EAGAIN;
+
+ adf_mstate_mgr_init(vfmig->mstate_mgr, mdev->state, mdev->state_size);
+ setup_size = adf_mstate_state_size_from_remote(vfmig->mstate_mgr);
+ if (setup_size > mdev->state_size)
+ return -EINVAL;
+
+ if (len < setup_size)
+ return -EAGAIN;
+
+ ret = adf_mstate_mgr_init_from_remote(vfmig->mstate_mgr, mdev->state,
+ setup_size, NULL, NULL);
+ if (ret) {
+ dev_err(&GET_DEV(accel_dev), "Invalid setup for vf_nr %d\n",
+ vf_nr);
+ return ret;
+ }
+
+ mdev->remote_setup_size = setup_size;
+
+ ret = adf_gen4_vfmig_load_config(accel_dev, vf_nr);
+ if (ret) {
+ dev_err(&GET_DEV(accel_dev),
+ "Failed to load config for vf_nr %d\n", vf_nr);
+ return ret;
+ }
+
+ return 0;
+}
+
+void adf_gen4_init_vf_mig_ops(struct qat_migdev_ops *vfmig_ops)
+{
+ vfmig_ops->init = adf_gen4_vfmig_init_device;
+ vfmig_ops->cleanup = adf_gen4_vfmig_cleanup_device;
+ vfmig_ops->reset = adf_gen4_vfmig_reset_device;
+ vfmig_ops->open = adf_gen4_vfmig_open_device;
+ vfmig_ops->close = adf_gen4_vfmig_close_device;
+ vfmig_ops->suspend = adf_gen4_vfmig_suspend_device;
+ vfmig_ops->resume = adf_gen4_vfmig_resume_device;
+ vfmig_ops->save_state = adf_gen4_vfmig_save_state;
+ vfmig_ops->load_state = adf_gen4_vfmig_load_state;
+ vfmig_ops->load_setup = adf_gen4_vfmig_load_setup;
+ vfmig_ops->save_setup = adf_gen4_vfmig_save_setup;
+}
+EXPORT_SYMBOL_GPL(adf_gen4_init_vf_mig_ops);
diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_vf_mig.h b/drivers/crypto/intel/qat/qat_common/adf_gen4_vf_mig.h
new file mode 100644
index 000000000000..72216d078ee1
--- /dev/null
+++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_vf_mig.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright(c) 2024 Intel Corporation */
+#ifndef ADF_GEN4_VF_MIG_H_
+#define ADF_GEN4_VF_MIG_H_
+
+#include "adf_accel_devices.h"
+
+void adf_gen4_init_vf_mig_ops(struct qat_migdev_ops *vfmig_ops);
+
+#endif
diff --git a/drivers/crypto/intel/qat/qat_common/adf_heartbeat.c b/drivers/crypto/intel/qat/qat_common/adf_heartbeat.c
index 13f48d2f6da8..b19aa1ef8eee 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_heartbeat.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_heartbeat.c
@@ -23,12 +23,6 @@
#define ADF_HB_EMPTY_SIG 0xA5A5A5A5
-/* Heartbeat counter pair */
-struct hb_cnt_pair {
- __u16 resp_heartbeat_cnt;
- __u16 req_heartbeat_cnt;
-};
-
static int adf_hb_check_polling_freq(struct adf_accel_dev *accel_dev)
{
u64 curr_time = adf_clock_get_current_time();
@@ -211,6 +205,19 @@ static int adf_hb_get_status(struct adf_accel_dev *accel_dev)
return ret;
}
+static void adf_heartbeat_reset(struct adf_accel_dev *accel_dev)
+{
+ u64 curr_time = adf_clock_get_current_time();
+ u64 time_since_reset = curr_time - accel_dev->heartbeat->last_hb_reset_time;
+
+ if (time_since_reset < ADF_CFG_HB_RESET_MS)
+ return;
+
+ accel_dev->heartbeat->last_hb_reset_time = curr_time;
+ if (adf_notify_fatal_error(accel_dev))
+ dev_err(&GET_DEV(accel_dev), "Failed to notify fatal error\n");
+}
+
void adf_heartbeat_status(struct adf_accel_dev *accel_dev,
enum adf_device_heartbeat_status *hb_status)
{
@@ -235,6 +242,7 @@ void adf_heartbeat_status(struct adf_accel_dev *accel_dev,
"Heartbeat ERROR: QAT is not responding.\n");
*hb_status = HB_DEV_UNRESPONSIVE;
hb->hb_failed_counter++;
+ adf_heartbeat_reset(accel_dev);
return;
}
diff --git a/drivers/crypto/intel/qat/qat_common/adf_heartbeat.h b/drivers/crypto/intel/qat/qat_common/adf_heartbeat.h
index b22e3cb29798..16fdfb48b196 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_heartbeat.h
+++ b/drivers/crypto/intel/qat/qat_common/adf_heartbeat.h
@@ -13,17 +13,26 @@ struct dentry;
#define ADF_CFG_HB_TIMER_DEFAULT_MS 500
#define ADF_CFG_HB_COUNT_THRESHOLD 3
+#define ADF_CFG_HB_RESET_MS 5000
+
enum adf_device_heartbeat_status {
HB_DEV_UNRESPONSIVE = 0,
HB_DEV_ALIVE,
HB_DEV_UNSUPPORTED,
};
+/* Heartbeat counter pair */
+struct hb_cnt_pair {
+ __u16 resp_heartbeat_cnt;
+ __u16 req_heartbeat_cnt;
+};
+
struct adf_heartbeat {
unsigned int hb_sent_counter;
unsigned int hb_failed_counter;
unsigned int hb_timer;
u64 last_hb_check_time;
+ u64 last_hb_reset_time;
bool ctrs_cnt_checked;
struct hb_dma_addr {
dma_addr_t phy_addr;
@@ -35,6 +44,9 @@ struct adf_heartbeat {
struct dentry *cfg;
struct dentry *sent;
struct dentry *failed;
+#ifdef CONFIG_CRYPTO_DEV_QAT_ERROR_INJECTION
+ struct dentry *inject_error;
+#endif
} dbgfs;
};
@@ -51,6 +63,15 @@ void adf_heartbeat_status(struct adf_accel_dev *accel_dev,
enum adf_device_heartbeat_status *hb_status);
void adf_heartbeat_check_ctrs(struct adf_accel_dev *accel_dev);
+#ifdef CONFIG_CRYPTO_DEV_QAT_ERROR_INJECTION
+int adf_heartbeat_inject_error(struct adf_accel_dev *accel_dev);
+#else
+static inline int adf_heartbeat_inject_error(struct adf_accel_dev *accel_dev)
+{
+ return -EPERM;
+}
+#endif
+
#else
static inline int adf_heartbeat_init(struct adf_accel_dev *accel_dev)
{
diff --git a/drivers/crypto/intel/qat/qat_common/adf_heartbeat_dbgfs.c b/drivers/crypto/intel/qat/qat_common/adf_heartbeat_dbgfs.c
index 2661af6a2ef6..cccdff24b48d 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_heartbeat_dbgfs.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_heartbeat_dbgfs.c
@@ -155,6 +155,44 @@ static const struct file_operations adf_hb_cfg_fops = {
.write = adf_hb_cfg_write,
};
+static ssize_t adf_hb_error_inject_write(struct file *file,
+ const char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ struct adf_accel_dev *accel_dev = file->private_data;
+ char buf[3];
+ int ret;
+
+ /* last byte left as string termination */
+ if (*ppos != 0 || count != 2)
+ return -EINVAL;
+
+ if (copy_from_user(buf, user_buf, count))
+ return -EFAULT;
+ buf[count] = '\0';
+
+ if (buf[0] != '1')
+ return -EINVAL;
+
+ ret = adf_heartbeat_inject_error(accel_dev);
+ if (ret) {
+ dev_err(&GET_DEV(accel_dev),
+ "Heartbeat error injection failed with status %d\n",
+ ret);
+ return ret;
+ }
+
+ dev_info(&GET_DEV(accel_dev), "Heartbeat error injection enabled\n");
+
+ return count;
+}
+
+static const struct file_operations adf_hb_error_inject_fops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .write = adf_hb_error_inject_write,
+};
+
void adf_heartbeat_dbgfs_add(struct adf_accel_dev *accel_dev)
{
struct adf_heartbeat *hb = accel_dev->heartbeat;
@@ -171,6 +209,17 @@ void adf_heartbeat_dbgfs_add(struct adf_accel_dev *accel_dev)
&hb->hb_failed_counter, &adf_hb_stats_fops);
hb->dbgfs.cfg = debugfs_create_file("config", 0600, hb->dbgfs.base_dir,
accel_dev, &adf_hb_cfg_fops);
+
+ if (IS_ENABLED(CONFIG_CRYPTO_DEV_QAT_ERROR_INJECTION)) {
+ struct dentry *inject_error __maybe_unused;
+
+ inject_error = debugfs_create_file("inject_error", 0200,
+ hb->dbgfs.base_dir, accel_dev,
+ &adf_hb_error_inject_fops);
+#ifdef CONFIG_CRYPTO_DEV_QAT_ERROR_INJECTION
+ hb->dbgfs.inject_error = inject_error;
+#endif
+ }
}
EXPORT_SYMBOL_GPL(adf_heartbeat_dbgfs_add);
@@ -189,6 +238,10 @@ void adf_heartbeat_dbgfs_rm(struct adf_accel_dev *accel_dev)
hb->dbgfs.failed = NULL;
debugfs_remove(hb->dbgfs.cfg);
hb->dbgfs.cfg = NULL;
+#ifdef CONFIG_CRYPTO_DEV_QAT_ERROR_INJECTION
+ debugfs_remove(hb->dbgfs.inject_error);
+ hb->dbgfs.inject_error = NULL;
+#endif
debugfs_remove(hb->dbgfs.base_dir);
hb->dbgfs.base_dir = NULL;
}
diff --git a/drivers/crypto/intel/qat/qat_common/adf_heartbeat_inject.c b/drivers/crypto/intel/qat/qat_common/adf_heartbeat_inject.c
new file mode 100644
index 000000000000..a3b474bdef6c
--- /dev/null
+++ b/drivers/crypto/intel/qat/qat_common/adf_heartbeat_inject.c
@@ -0,0 +1,76 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2023 Intel Corporation */
+#include <linux/random.h>
+
+#include "adf_admin.h"
+#include "adf_common_drv.h"
+#include "adf_heartbeat.h"
+
+#define MAX_HB_TICKS 0xFFFFFFFF
+
+static int adf_hb_set_timer_to_max(struct adf_accel_dev *accel_dev)
+{
+ struct adf_hw_device_data *hw_data = accel_dev->hw_device;
+
+ accel_dev->heartbeat->hb_timer = 0;
+
+ if (hw_data->stop_timer)
+ hw_data->stop_timer(accel_dev);
+
+ return adf_send_admin_hb_timer(accel_dev, MAX_HB_TICKS);
+}
+
+static void adf_set_hb_counters_fail(struct adf_accel_dev *accel_dev, u32 ae,
+ u32 thr)
+{
+ struct hb_cnt_pair *stats = accel_dev->heartbeat->dma.virt_addr;
+ struct adf_hw_device_data *hw_device = accel_dev->hw_device;
+ const size_t max_aes = hw_device->get_num_aes(hw_device);
+ const size_t hb_ctrs = hw_device->num_hb_ctrs;
+ size_t thr_id = ae * hb_ctrs + thr;
+ u16 num_rsp = stats[thr_id].resp_heartbeat_cnt;
+
+ /*
+ * Inject live.req != live.rsp and live.rsp == last.rsp
+ * to trigger the heartbeat error detection
+ */
+ stats[thr_id].req_heartbeat_cnt++;
+ stats += (max_aes * hb_ctrs);
+ stats[thr_id].resp_heartbeat_cnt = num_rsp;
+}
+
+int adf_heartbeat_inject_error(struct adf_accel_dev *accel_dev)
+{
+ struct adf_hw_device_data *hw_device = accel_dev->hw_device;
+ const size_t max_aes = hw_device->get_num_aes(hw_device);
+ const size_t hb_ctrs = hw_device->num_hb_ctrs;
+ u32 rand, rand_ae, rand_thr;
+ unsigned long ae_mask;
+ int ret;
+
+ ae_mask = hw_device->ae_mask;
+
+ do {
+ /* Ensure we have a valid ae */
+ get_random_bytes(&rand, sizeof(rand));
+ rand_ae = rand % max_aes;
+ } while (!test_bit(rand_ae, &ae_mask));
+
+ get_random_bytes(&rand, sizeof(rand));
+ rand_thr = rand % hb_ctrs;
+
+ /* Increase the heartbeat timer to prevent FW updating HB counters */
+ ret = adf_hb_set_timer_to_max(accel_dev);
+ if (ret)
+ return ret;
+
+ /* Configure worker threads to stop processing any packet */
+ ret = adf_disable_arb_thd(accel_dev, rand_ae, rand_thr);
+ if (ret)
+ return ret;
+
+ /* Change HB counters memory to simulate a hang */
+ adf_set_hb_counters_fail(accel_dev, rand_ae, rand_thr);
+
+ return 0;
+}
diff --git a/drivers/crypto/intel/qat/qat_common/adf_hw_arbiter.c b/drivers/crypto/intel/qat/qat_common/adf_hw_arbiter.c
index da6956699246..65bd26b25abc 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_hw_arbiter.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_hw_arbiter.c
@@ -103,3 +103,28 @@ void adf_exit_arb(struct adf_accel_dev *accel_dev)
csr_ops->write_csr_ring_srv_arb_en(csr, i, 0);
}
EXPORT_SYMBOL_GPL(adf_exit_arb);
+
+int adf_disable_arb_thd(struct adf_accel_dev *accel_dev, u32 ae, u32 thr)
+{
+ void __iomem *csr = accel_dev->transport->banks[0].csr_addr;
+ struct adf_hw_device_data *hw_data = accel_dev->hw_device;
+ const u32 *thd_2_arb_cfg;
+ struct arb_info info;
+ u32 ae_thr_map;
+
+ if (ADF_AE_STRAND0_THREAD == thr || ADF_AE_STRAND1_THREAD == thr)
+ thr = ADF_AE_ADMIN_THREAD;
+
+ hw_data->get_arb_info(&info);
+ thd_2_arb_cfg = hw_data->get_arb_mapping(accel_dev);
+ if (!thd_2_arb_cfg)
+ return -EFAULT;
+
+ /* Disable scheduling for this particular AE and thread */
+ ae_thr_map = *(thd_2_arb_cfg + ae);
+ ae_thr_map &= ~(GENMASK(3, 0) << (thr * BIT(2)));
+
+ WRITE_CSR_ARB_WT2SAM(csr, info.arb_offset, info.wt2sam_offset, ae,
+ ae_thr_map);
+ return 0;
+}
diff --git a/drivers/crypto/intel/qat/qat_common/adf_init.c b/drivers/crypto/intel/qat/qat_common/adf_init.c
index f43ae9111553..74f0818c0703 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_init.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_init.c
@@ -433,6 +433,18 @@ int adf_dev_restarted_notify(struct adf_accel_dev *accel_dev)
return 0;
}
+void adf_error_notifier(struct adf_accel_dev *accel_dev)
+{
+ struct service_hndl *service;
+
+ list_for_each_entry(service, &service_table, list) {
+ if (service->event_hld(accel_dev, ADF_EVENT_FATAL_ERROR))
+ dev_err(&GET_DEV(accel_dev),
+ "Failed to send error event to %s.\n",
+ service->name);
+ }
+}
+
static int adf_dev_shutdown_cache_cfg(struct adf_accel_dev *accel_dev)
{
char services[ADF_CFG_MAX_VAL_LEN_IN_BYTES] = {0};
diff --git a/drivers/crypto/intel/qat/qat_common/adf_isr.c b/drivers/crypto/intel/qat/qat_common/adf_isr.c
index 3557a0d6dea2..cae1aee5479a 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_isr.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_isr.c
@@ -139,8 +139,13 @@ static bool adf_handle_ras_int(struct adf_accel_dev *accel_dev)
if (ras_ops->handle_interrupt &&
ras_ops->handle_interrupt(accel_dev, &reset_required)) {
- if (reset_required)
+ if (reset_required) {
dev_err(&GET_DEV(accel_dev), "Fatal error, reset required\n");
+ if (adf_notify_fatal_error(accel_dev))
+ dev_err(&GET_DEV(accel_dev),
+ "Failed to notify fatal error\n");
+ }
+
return true;
}
@@ -272,7 +277,7 @@ static int adf_isr_alloc_msix_vectors_data(struct adf_accel_dev *accel_dev)
if (!accel_dev->pf.vf_info)
msix_num_entries += hw_data->num_banks;
- irqs = kzalloc_node(msix_num_entries * sizeof(*irqs),
+ irqs = kcalloc_node(msix_num_entries, sizeof(*irqs),
GFP_KERNEL, dev_to_node(&GET_DEV(accel_dev)));
if (!irqs)
return -ENOMEM;
@@ -375,8 +380,6 @@ EXPORT_SYMBOL_GPL(adf_isr_resource_alloc);
/**
* adf_init_misc_wq() - Init misc workqueue
*
- * Function init workqueue 'qat_misc_wq' for general purpose.
- *
* Return: 0 on success, error code otherwise.
*/
int __init adf_init_misc_wq(void)
diff --git a/drivers/crypto/intel/qat/qat_common/adf_mstate_mgr.c b/drivers/crypto/intel/qat/qat_common/adf_mstate_mgr.c
new file mode 100644
index 000000000000..41cc763a74aa
--- /dev/null
+++ b/drivers/crypto/intel/qat/qat_common/adf_mstate_mgr.c
@@ -0,0 +1,318 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2024 Intel Corporation */
+
+#include <linux/slab.h>
+#include <linux/types.h>
+#include "adf_mstate_mgr.h"
+
+#define ADF_MSTATE_MAGIC 0xADF5CAEA
+#define ADF_MSTATE_VERSION 0x1
+
+struct adf_mstate_sect_h {
+ u8 id[ADF_MSTATE_ID_LEN];
+ u32 size;
+ u32 sub_sects;
+ u8 state[];
+};
+
+u32 adf_mstate_state_size(struct adf_mstate_mgr *mgr)
+{
+ return mgr->state - mgr->buf;
+}
+
+static inline u32 adf_mstate_avail_room(struct adf_mstate_mgr *mgr)
+{
+ return mgr->buf + mgr->size - mgr->state;
+}
+
+void adf_mstate_mgr_init(struct adf_mstate_mgr *mgr, u8 *buf, u32 size)
+{
+ mgr->buf = buf;
+ mgr->state = buf;
+ mgr->size = size;
+ mgr->n_sects = 0;
+};
+
+struct adf_mstate_mgr *adf_mstate_mgr_new(u8 *buf, u32 size)
+{
+ struct adf_mstate_mgr *mgr;
+
+ mgr = kzalloc(sizeof(*mgr), GFP_KERNEL);
+ if (!mgr)
+ return NULL;
+
+ adf_mstate_mgr_init(mgr, buf, size);
+
+ return mgr;
+}
+
+void adf_mstate_mgr_destroy(struct adf_mstate_mgr *mgr)
+{
+ kfree(mgr);
+}
+
+void adf_mstate_mgr_init_from_parent(struct adf_mstate_mgr *mgr,
+ struct adf_mstate_mgr *p_mgr)
+{
+ adf_mstate_mgr_init(mgr, p_mgr->state,
+ p_mgr->size - adf_mstate_state_size(p_mgr));
+}
+
+void adf_mstate_mgr_init_from_psect(struct adf_mstate_mgr *mgr,
+ struct adf_mstate_sect_h *p_sect)
+{
+ adf_mstate_mgr_init(mgr, p_sect->state, p_sect->size);
+ mgr->n_sects = p_sect->sub_sects;
+}
+
+static void adf_mstate_preamble_init(struct adf_mstate_preh *preamble)
+{
+ preamble->magic = ADF_MSTATE_MAGIC;
+ preamble->version = ADF_MSTATE_VERSION;
+ preamble->preh_len = sizeof(*preamble);
+ preamble->size = 0;
+ preamble->n_sects = 0;
+}
+
+/* default preambles checker */
+static int adf_mstate_preamble_def_checker(struct adf_mstate_preh *preamble,
+ void *opaque)
+{
+ struct adf_mstate_mgr *mgr = opaque;
+
+ if (preamble->magic != ADF_MSTATE_MAGIC ||
+ preamble->version > ADF_MSTATE_VERSION ||
+ preamble->preh_len > mgr->size) {
+ pr_debug("QAT: LM - Invalid state (magic=%#x, version=%#x, hlen=%u), state_size=%u\n",
+ preamble->magic, preamble->version, preamble->preh_len,
+ mgr->size);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+struct adf_mstate_preh *adf_mstate_preamble_add(struct adf_mstate_mgr *mgr)
+{
+ struct adf_mstate_preh *pre = (struct adf_mstate_preh *)mgr->buf;
+
+ if (adf_mstate_avail_room(mgr) < sizeof(*pre)) {
+ pr_err("QAT: LM - Not enough space for preamble\n");
+ return NULL;
+ }
+
+ adf_mstate_preamble_init(pre);
+ mgr->state += pre->preh_len;
+
+ return pre;
+}
+
+int adf_mstate_preamble_update(struct adf_mstate_mgr *mgr)
+{
+ struct adf_mstate_preh *preamble = (struct adf_mstate_preh *)mgr->buf;
+
+ preamble->size = adf_mstate_state_size(mgr) - preamble->preh_len;
+ preamble->n_sects = mgr->n_sects;
+
+ return 0;
+}
+
+static void adf_mstate_dump_sect(struct adf_mstate_sect_h *sect,
+ const char *prefix)
+{
+ pr_debug("QAT: LM - %s QAT state section %s\n", prefix, sect->id);
+ print_hex_dump_debug("h-", DUMP_PREFIX_OFFSET, 16, 2, sect,
+ sizeof(*sect), true);
+ print_hex_dump_debug("s-", DUMP_PREFIX_OFFSET, 16, 2, sect->state,
+ sect->size, true);
+}
+
+static inline void __adf_mstate_sect_update(struct adf_mstate_mgr *mgr,
+ struct adf_mstate_sect_h *sect,
+ u32 size,
+ u32 n_subsects)
+{
+ sect->size += size;
+ sect->sub_sects += n_subsects;
+ mgr->n_sects++;
+ mgr->state += sect->size;
+
+ adf_mstate_dump_sect(sect, "Add");
+}
+
+void adf_mstate_sect_update(struct adf_mstate_mgr *p_mgr,
+ struct adf_mstate_mgr *curr_mgr,
+ struct adf_mstate_sect_h *sect)
+{
+ __adf_mstate_sect_update(p_mgr, sect, adf_mstate_state_size(curr_mgr),
+ curr_mgr->n_sects);
+}
+
+static struct adf_mstate_sect_h *adf_mstate_sect_add_header(struct adf_mstate_mgr *mgr,
+ const char *id)
+{
+ struct adf_mstate_sect_h *sect = (struct adf_mstate_sect_h *)(mgr->state);
+
+ if (adf_mstate_avail_room(mgr) < sizeof(*sect)) {
+ pr_debug("QAT: LM - Not enough space for header of QAT state sect %s\n", id);
+ return NULL;
+ }
+
+ strscpy(sect->id, id, sizeof(sect->id));
+ sect->size = 0;
+ sect->sub_sects = 0;
+ mgr->state += sizeof(*sect);
+
+ return sect;
+}
+
+struct adf_mstate_sect_h *adf_mstate_sect_add_vreg(struct adf_mstate_mgr *mgr,
+ const char *id,
+ struct adf_mstate_vreginfo *info)
+{
+ struct adf_mstate_sect_h *sect;
+
+ sect = adf_mstate_sect_add_header(mgr, id);
+ if (!sect)
+ return NULL;
+
+ if (adf_mstate_avail_room(mgr) < info->size) {
+ pr_debug("QAT: LM - Not enough space for QAT state sect %s, requires %u\n",
+ id, info->size);
+ return NULL;
+ }
+
+ memcpy(sect->state, info->addr, info->size);
+ __adf_mstate_sect_update(mgr, sect, info->size, 0);
+
+ return sect;
+}
+
+struct adf_mstate_sect_h *adf_mstate_sect_add(struct adf_mstate_mgr *mgr,
+ const char *id,
+ adf_mstate_populate populate,
+ void *opaque)
+{
+ struct adf_mstate_mgr sub_sects_mgr;
+ struct adf_mstate_sect_h *sect;
+ int avail_room, size;
+
+ sect = adf_mstate_sect_add_header(mgr, id);
+ if (!sect)
+ return NULL;
+
+ if (!populate)
+ return sect;
+
+ avail_room = adf_mstate_avail_room(mgr);
+ adf_mstate_mgr_init_from_parent(&sub_sects_mgr, mgr);
+
+ size = (*populate)(&sub_sects_mgr, sect->state, avail_room, opaque);
+ if (size < 0)
+ return NULL;
+
+ size += adf_mstate_state_size(&sub_sects_mgr);
+ if (avail_room < size) {
+ pr_debug("QAT: LM - Not enough space for QAT state sect %s, requires %u\n",
+ id, size);
+ return NULL;
+ }
+ __adf_mstate_sect_update(mgr, sect, size, sub_sects_mgr.n_sects);
+
+ return sect;
+}
+
+static int adf_mstate_sect_validate(struct adf_mstate_mgr *mgr)
+{
+ struct adf_mstate_sect_h *start = (struct adf_mstate_sect_h *)mgr->state;
+ struct adf_mstate_sect_h *sect = start;
+ u64 end;
+ int i;
+
+ end = (uintptr_t)mgr->buf + mgr->size;
+ for (i = 0; i < mgr->n_sects; i++) {
+ uintptr_t s_start = (uintptr_t)sect->state;
+ uintptr_t s_end = s_start + sect->size;
+
+ if (s_end < s_start || s_end > end) {
+ pr_debug("QAT: LM - Corrupted state section (index=%u, size=%u) in state_mgr (size=%u, secs=%u)\n",
+ i, sect->size, mgr->size, mgr->n_sects);
+ return -EINVAL;
+ }
+ sect = (struct adf_mstate_sect_h *)s_end;
+ }
+
+ pr_debug("QAT: LM - Scanned section (last child=%s, size=%lu) in state_mgr (size=%u, secs=%u)\n",
+ start->id, sizeof(struct adf_mstate_sect_h) * (ulong)(sect - start),
+ mgr->size, mgr->n_sects);
+
+ return 0;
+}
+
+u32 adf_mstate_state_size_from_remote(struct adf_mstate_mgr *mgr)
+{
+ struct adf_mstate_preh *preh = (struct adf_mstate_preh *)mgr->buf;
+
+ return preh->preh_len + preh->size;
+}
+
+int adf_mstate_mgr_init_from_remote(struct adf_mstate_mgr *mgr, u8 *buf, u32 size,
+ adf_mstate_preamble_checker pre_checker,
+ void *opaque)
+{
+ struct adf_mstate_preh *pre;
+ int ret;
+
+ adf_mstate_mgr_init(mgr, buf, size);
+ pre = (struct adf_mstate_preh *)(mgr->buf);
+
+ pr_debug("QAT: LM - Dump state preambles\n");
+ print_hex_dump_debug("", DUMP_PREFIX_OFFSET, 16, 2, pre, pre->preh_len, 0);
+
+ if (pre_checker)
+ ret = (*pre_checker)(pre, opaque);
+ else
+ ret = adf_mstate_preamble_def_checker(pre, mgr);
+ if (ret)
+ return ret;
+
+ mgr->state = mgr->buf + pre->preh_len;
+ mgr->n_sects = pre->n_sects;
+
+ return adf_mstate_sect_validate(mgr);
+}
+
+struct adf_mstate_sect_h *adf_mstate_sect_lookup(struct adf_mstate_mgr *mgr,
+ const char *id,
+ adf_mstate_action action,
+ void *opaque)
+{
+ struct adf_mstate_sect_h *sect = (struct adf_mstate_sect_h *)mgr->state;
+ struct adf_mstate_mgr sub_sects_mgr;
+ int i, ret;
+
+ for (i = 0; i < mgr->n_sects; i++) {
+ if (!strncmp(sect->id, id, sizeof(sect->id)))
+ goto found;
+
+ sect = (struct adf_mstate_sect_h *)(sect->state + sect->size);
+ }
+
+ return NULL;
+
+found:
+ adf_mstate_dump_sect(sect, "Found");
+
+ adf_mstate_mgr_init_from_psect(&sub_sects_mgr, sect);
+ if (sect->sub_sects && adf_mstate_sect_validate(&sub_sects_mgr))
+ return NULL;
+
+ if (!action)
+ return sect;
+
+ ret = (*action)(&sub_sects_mgr, sect->state, sect->size, opaque);
+ if (ret)
+ return NULL;
+
+ return sect;
+}
diff --git a/drivers/crypto/intel/qat/qat_common/adf_mstate_mgr.h b/drivers/crypto/intel/qat/qat_common/adf_mstate_mgr.h
new file mode 100644
index 000000000000..81d263a596c5
--- /dev/null
+++ b/drivers/crypto/intel/qat/qat_common/adf_mstate_mgr.h
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright(c) 2024 Intel Corporation */
+
+#ifndef ADF_MSTATE_MGR_H
+#define ADF_MSTATE_MGR_H
+
+#define ADF_MSTATE_ID_LEN 8
+
+#define ADF_MSTATE_ETRB_IDS "ETRBAR"
+#define ADF_MSTATE_MISCB_IDS "MISCBAR"
+#define ADF_MSTATE_EXTB_IDS "EXTBAR"
+#define ADF_MSTATE_GEN_IDS "GENER"
+#define ADF_MSTATE_CONFIG_IDS "CONFIG"
+#define ADF_MSTATE_SECTION_NUM 5
+
+#define ADF_MSTATE_BANK_IDX_IDS "bnk"
+
+#define ADF_MSTATE_ETR_REGS_IDS "mregs"
+#define ADF_MSTATE_VINTSRC_IDS "visrc"
+#define ADF_MSTATE_VINTMSK_IDS "vimsk"
+#define ADF_MSTATE_SLA_IDS "sla"
+#define ADF_MSTATE_IOV_INIT_IDS "iovinit"
+#define ADF_MSTATE_COMPAT_VER_IDS "compver"
+#define ADF_MSTATE_GEN_CAP_IDS "gencap"
+#define ADF_MSTATE_GEN_SVCMAP_IDS "svcmap"
+#define ADF_MSTATE_GEN_EXTDC_IDS "extdc"
+#define ADF_MSTATE_VINTSRC_PF2VM_IDS "vispv"
+#define ADF_MSTATE_VINTMSK_PF2VM_IDS "vimpv"
+#define ADF_MSTATE_VM2PF_IDS "vm2pf"
+#define ADF_MSTATE_PF2VM_IDS "pf2vm"
+
+struct adf_mstate_mgr {
+ u8 *buf;
+ u8 *state;
+ u32 size;
+ u32 n_sects;
+};
+
+struct adf_mstate_preh {
+ u32 magic;
+ u32 version;
+ u16 preh_len;
+ u16 n_sects;
+ u32 size;
+};
+
+struct adf_mstate_vreginfo {
+ void *addr;
+ u32 size;
+};
+
+struct adf_mstate_sect_h;
+
+typedef int (*adf_mstate_preamble_checker)(struct adf_mstate_preh *preamble, void *opa);
+typedef int (*adf_mstate_populate)(struct adf_mstate_mgr *sub_mgr, u8 *buf,
+ u32 size, void *opa);
+typedef int (*adf_mstate_action)(struct adf_mstate_mgr *sub_mgr, u8 *buf, u32 size,
+ void *opa);
+
+struct adf_mstate_mgr *adf_mstate_mgr_new(u8 *buf, u32 size);
+void adf_mstate_mgr_destroy(struct adf_mstate_mgr *mgr);
+void adf_mstate_mgr_init(struct adf_mstate_mgr *mgr, u8 *buf, u32 size);
+void adf_mstate_mgr_init_from_parent(struct adf_mstate_mgr *mgr,
+ struct adf_mstate_mgr *p_mgr);
+void adf_mstate_mgr_init_from_psect(struct adf_mstate_mgr *mgr,
+ struct adf_mstate_sect_h *p_sect);
+int adf_mstate_mgr_init_from_remote(struct adf_mstate_mgr *mgr,
+ u8 *buf, u32 size,
+ adf_mstate_preamble_checker checker,
+ void *opaque);
+struct adf_mstate_preh *adf_mstate_preamble_add(struct adf_mstate_mgr *mgr);
+int adf_mstate_preamble_update(struct adf_mstate_mgr *mgr);
+u32 adf_mstate_state_size(struct adf_mstate_mgr *mgr);
+u32 adf_mstate_state_size_from_remote(struct adf_mstate_mgr *mgr);
+void adf_mstate_sect_update(struct adf_mstate_mgr *p_mgr,
+ struct adf_mstate_mgr *curr_mgr,
+ struct adf_mstate_sect_h *sect);
+struct adf_mstate_sect_h *adf_mstate_sect_add_vreg(struct adf_mstate_mgr *mgr,
+ const char *id,
+ struct adf_mstate_vreginfo *info);
+struct adf_mstate_sect_h *adf_mstate_sect_add(struct adf_mstate_mgr *mgr,
+ const char *id,
+ adf_mstate_populate populate,
+ void *opaque);
+struct adf_mstate_sect_h *adf_mstate_sect_lookup(struct adf_mstate_mgr *mgr,
+ const char *id,
+ adf_mstate_action action,
+ void *opaque);
+#endif
diff --git a/drivers/crypto/intel/qat/qat_common/adf_pfvf_msg.h b/drivers/crypto/intel/qat/qat_common/adf_pfvf_msg.h
index 204a42438992..d1b3ef9cadac 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_pfvf_msg.h
+++ b/drivers/crypto/intel/qat/qat_common/adf_pfvf_msg.h
@@ -99,6 +99,8 @@ enum pf2vf_msgtype {
ADF_PF2VF_MSGTYPE_RESTARTING = 0x01,
ADF_PF2VF_MSGTYPE_VERSION_RESP = 0x02,
ADF_PF2VF_MSGTYPE_BLKMSG_RESP = 0x03,
+ ADF_PF2VF_MSGTYPE_FATAL_ERROR = 0x04,
+ ADF_PF2VF_MSGTYPE_RESTARTED = 0x05,
/* Values from 0x10 are Gen4 specific, message type is only 4 bits in Gen2 devices. */
ADF_PF2VF_MSGTYPE_RP_RESET_RESP = 0x10,
};
@@ -112,6 +114,7 @@ enum vf2pf_msgtype {
ADF_VF2PF_MSGTYPE_LARGE_BLOCK_REQ = 0x07,
ADF_VF2PF_MSGTYPE_MEDIUM_BLOCK_REQ = 0x08,
ADF_VF2PF_MSGTYPE_SMALL_BLOCK_REQ = 0x09,
+ ADF_VF2PF_MSGTYPE_RESTARTING_COMPLETE = 0x0a,
/* Values from 0x10 are Gen4 specific, message type is only 4 bits in Gen2 devices. */
ADF_VF2PF_MSGTYPE_RP_RESET = 0x10,
};
@@ -124,8 +127,10 @@ enum pfvf_compatibility_version {
ADF_PFVF_COMPAT_FAST_ACK = 0x03,
/* Ring to service mapping support for non-standard mappings */
ADF_PFVF_COMPAT_RING_TO_SVC_MAP = 0x04,
+ /* Fallback compat */
+ ADF_PFVF_COMPAT_FALLBACK = 0x05,
/* Reference to the latest version */
- ADF_PFVF_COMPAT_THIS_VERSION = 0x04,
+ ADF_PFVF_COMPAT_THIS_VERSION = 0x05,
};
/* PF->VF Version Response */
diff --git a/drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_msg.c b/drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_msg.c
index 14c069f0d71a..0e31f4b41844 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_msg.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_msg.c
@@ -1,21 +1,83 @@
// SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only)
/* Copyright(c) 2015 - 2021 Intel Corporation */
+#include <linux/delay.h>
#include <linux/pci.h>
#include "adf_accel_devices.h"
#include "adf_pfvf_msg.h"
#include "adf_pfvf_pf_msg.h"
#include "adf_pfvf_pf_proto.h"
+#define ADF_PF_WAIT_RESTARTING_COMPLETE_DELAY 100
+#define ADF_VF_SHUTDOWN_RETRY 100
+
void adf_pf2vf_notify_restarting(struct adf_accel_dev *accel_dev)
{
struct adf_accel_vf_info *vf;
struct pfvf_message msg = { .type = ADF_PF2VF_MSGTYPE_RESTARTING };
int i, num_vfs = pci_num_vf(accel_to_pci_dev(accel_dev));
+ dev_dbg(&GET_DEV(accel_dev), "pf2vf notify restarting\n");
for (i = 0, vf = accel_dev->pf.vf_info; i < num_vfs; i++, vf++) {
- if (vf->init && adf_send_pf2vf_msg(accel_dev, i, msg))
+ vf->restarting = false;
+ if (!vf->init)
+ continue;
+ if (adf_send_pf2vf_msg(accel_dev, i, msg))
dev_err(&GET_DEV(accel_dev),
"Failed to send restarting msg to VF%d\n", i);
+ else if (vf->vf_compat_ver >= ADF_PFVF_COMPAT_FALLBACK)
+ vf->restarting = true;
+ }
+}
+
+void adf_pf2vf_wait_for_restarting_complete(struct adf_accel_dev *accel_dev)
+{
+ int num_vfs = pci_num_vf(accel_to_pci_dev(accel_dev));
+ int i, retries = ADF_VF_SHUTDOWN_RETRY;
+ struct adf_accel_vf_info *vf;
+ bool vf_running;
+
+ dev_dbg(&GET_DEV(accel_dev), "pf2vf wait for restarting complete\n");
+ do {
+ vf_running = false;
+ for (i = 0, vf = accel_dev->pf.vf_info; i < num_vfs; i++, vf++)
+ if (vf->restarting)
+ vf_running = true;
+ if (!vf_running)
+ break;
+ msleep(ADF_PF_WAIT_RESTARTING_COMPLETE_DELAY);
+ } while (--retries);
+
+ if (vf_running)
+ dev_warn(&GET_DEV(accel_dev), "Some VFs are still running\n");
+}
+
+void adf_pf2vf_notify_restarted(struct adf_accel_dev *accel_dev)
+{
+ struct pfvf_message msg = { .type = ADF_PF2VF_MSGTYPE_RESTARTED };
+ int i, num_vfs = pci_num_vf(accel_to_pci_dev(accel_dev));
+ struct adf_accel_vf_info *vf;
+
+ dev_dbg(&GET_DEV(accel_dev), "pf2vf notify restarted\n");
+ for (i = 0, vf = accel_dev->pf.vf_info; i < num_vfs; i++, vf++) {
+ if (vf->init && vf->vf_compat_ver >= ADF_PFVF_COMPAT_FALLBACK &&
+ adf_send_pf2vf_msg(accel_dev, i, msg))
+ dev_err(&GET_DEV(accel_dev),
+ "Failed to send restarted msg to VF%d\n", i);
+ }
+}
+
+void adf_pf2vf_notify_fatal_error(struct adf_accel_dev *accel_dev)
+{
+ struct pfvf_message msg = { .type = ADF_PF2VF_MSGTYPE_FATAL_ERROR };
+ int i, num_vfs = pci_num_vf(accel_to_pci_dev(accel_dev));
+ struct adf_accel_vf_info *vf;
+
+ dev_dbg(&GET_DEV(accel_dev), "pf2vf notify fatal error\n");
+ for (i = 0, vf = accel_dev->pf.vf_info; i < num_vfs; i++, vf++) {
+ if (vf->init && vf->vf_compat_ver >= ADF_PFVF_COMPAT_FALLBACK &&
+ adf_send_pf2vf_msg(accel_dev, i, msg))
+ dev_err(&GET_DEV(accel_dev),
+ "Failed to send fatal error msg to VF%d\n", i);
}
}
diff --git a/drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_msg.h b/drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_msg.h
index e8982d1ac896..f203d88c919c 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_msg.h
+++ b/drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_msg.h
@@ -5,7 +5,28 @@
#include "adf_accel_devices.h"
+#if defined(CONFIG_PCI_IOV)
void adf_pf2vf_notify_restarting(struct adf_accel_dev *accel_dev);
+void adf_pf2vf_wait_for_restarting_complete(struct adf_accel_dev *accel_dev);
+void adf_pf2vf_notify_restarted(struct adf_accel_dev *accel_dev);
+void adf_pf2vf_notify_fatal_error(struct adf_accel_dev *accel_dev);
+#else
+static inline void adf_pf2vf_notify_restarting(struct adf_accel_dev *accel_dev)
+{
+}
+
+static inline void adf_pf2vf_wait_for_restarting_complete(struct adf_accel_dev *accel_dev)
+{
+}
+
+static inline void adf_pf2vf_notify_restarted(struct adf_accel_dev *accel_dev)
+{
+}
+
+static inline void adf_pf2vf_notify_fatal_error(struct adf_accel_dev *accel_dev)
+{
+}
+#endif
typedef int (*adf_pf2vf_blkmsg_provider)(struct adf_accel_dev *accel_dev,
u8 *buffer, u8 compat);
diff --git a/drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_proto.c b/drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_proto.c
index 388e58bcbcaf..b9b5e744a3f1 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_proto.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_proto.c
@@ -242,13 +242,7 @@ static int adf_handle_vf2pf_msg(struct adf_accel_dev *accel_dev, u8 vf_nr,
"VersionRequest received from VF%d (vers %d) to PF (vers %d)\n",
vf_nr, vf_compat_ver, ADF_PFVF_COMPAT_THIS_VERSION);
- if (vf_compat_ver == 0)
- compat = ADF_PF2VF_VF_INCOMPATIBLE;
- else if (vf_compat_ver <= ADF_PFVF_COMPAT_THIS_VERSION)
- compat = ADF_PF2VF_VF_COMPATIBLE;
- else
- compat = ADF_PF2VF_VF_COMPAT_UNKNOWN;
-
+ compat = adf_vf_compat_checker(vf_compat_ver);
vf_info->vf_compat_ver = vf_compat_ver;
resp->type = ADF_PF2VF_MSGTYPE_VERSION_RESP;
@@ -291,6 +285,14 @@ static int adf_handle_vf2pf_msg(struct adf_accel_dev *accel_dev, u8 vf_nr,
vf_info->init = false;
}
break;
+ case ADF_VF2PF_MSGTYPE_RESTARTING_COMPLETE:
+ {
+ dev_dbg(&GET_DEV(accel_dev),
+ "Restarting Complete received from VF%d\n", vf_nr);
+ vf_info->restarting = false;
+ vf_info->init = false;
+ }
+ break;
case ADF_VF2PF_MSGTYPE_LARGE_BLOCK_REQ:
case ADF_VF2PF_MSGTYPE_MEDIUM_BLOCK_REQ:
case ADF_VF2PF_MSGTYPE_SMALL_BLOCK_REQ:
diff --git a/drivers/crypto/intel/qat/qat_common/adf_pfvf_utils.h b/drivers/crypto/intel/qat/qat_common/adf_pfvf_utils.h
index 2be048e2287b..1a044297d873 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_pfvf_utils.h
+++ b/drivers/crypto/intel/qat/qat_common/adf_pfvf_utils.h
@@ -28,4 +28,15 @@ u32 adf_pfvf_csr_msg_of(struct adf_accel_dev *accel_dev, struct pfvf_message msg
struct pfvf_message adf_pfvf_message_of(struct adf_accel_dev *accel_dev, u32 raw_msg,
const struct pfvf_csr_format *fmt);
+static inline u8 adf_vf_compat_checker(u8 vf_compat_ver)
+{
+ if (vf_compat_ver == 0)
+ return ADF_PF2VF_VF_INCOMPATIBLE;
+
+ if (vf_compat_ver <= ADF_PFVF_COMPAT_THIS_VERSION)
+ return ADF_PF2VF_VF_COMPATIBLE;
+
+ return ADF_PF2VF_VF_COMPAT_UNKNOWN;
+}
+
#endif /* ADF_PFVF_UTILS_H */
diff --git a/drivers/crypto/intel/qat/qat_common/adf_pfvf_vf_proto.c b/drivers/crypto/intel/qat/qat_common/adf_pfvf_vf_proto.c
index 1015155b6374..dc284a089c88 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_pfvf_vf_proto.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_pfvf_vf_proto.c
@@ -308,6 +308,12 @@ static bool adf_handle_pf2vf_msg(struct adf_accel_dev *accel_dev,
adf_pf2vf_handle_pf_restarting(accel_dev);
return false;
+ case ADF_PF2VF_MSGTYPE_RESTARTED:
+ dev_dbg(&GET_DEV(accel_dev), "Restarted message received from PF\n");
+ return true;
+ case ADF_PF2VF_MSGTYPE_FATAL_ERROR:
+ dev_err(&GET_DEV(accel_dev), "Fatal error received from PF\n");
+ return true;
case ADF_PF2VF_MSGTYPE_VERSION_RESP:
case ADF_PF2VF_MSGTYPE_BLKMSG_RESP:
case ADF_PF2VF_MSGTYPE_RP_RESET_RESP:
diff --git a/drivers/crypto/intel/qat/qat_common/adf_rl.c b/drivers/crypto/intel/qat/qat_common/adf_rl.c
index de1b214dba1f..346ef8bee99d 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_rl.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_rl.c
@@ -183,14 +183,14 @@ static enum adf_cfg_service_type srv_to_cfg_svc_type(enum adf_base_services rl_s
}
/**
- * get_sla_arr_of_type() - Returns a pointer to SLA type specific array
+ * adf_rl_get_sla_arr_of_type() - Returns a pointer to SLA type specific array
* @rl_data: pointer to ratelimiting data
* @type: SLA type
* @sla_arr: pointer to variable where requested pointer will be stored
*
* Return: Max number of elements allowed for the returned array
*/
-static u32 get_sla_arr_of_type(struct adf_rl *rl_data, enum rl_node_type type,
+u32 adf_rl_get_sla_arr_of_type(struct adf_rl *rl_data, enum rl_node_type type,
struct rl_sla ***sla_arr)
{
switch (type) {
@@ -778,7 +778,7 @@ static void clear_sla(struct adf_rl *rl_data, struct rl_sla *sla)
rp_in_use[sla->ring_pairs_ids[i]] = false;
update_budget(sla, old_cir, true);
- get_sla_arr_of_type(rl_data, sla->type, &sla_type_arr);
+ adf_rl_get_sla_arr_of_type(rl_data, sla->type, &sla_type_arr);
assign_node_to_parent(rl_data->accel_dev, sla, true);
adf_rl_send_admin_delete_msg(rl_data->accel_dev, node_id, sla->type);
mark_rps_usage(sla, rl_data->rp_in_use, false);
@@ -788,6 +788,24 @@ static void clear_sla(struct adf_rl *rl_data, struct rl_sla *sla)
sla_type_arr[node_id] = NULL;
}
+static void free_all_sla(struct adf_accel_dev *accel_dev)
+{
+ struct adf_rl *rl_data = accel_dev->rate_limiting;
+ int sla_id;
+
+ mutex_lock(&rl_data->rl_lock);
+
+ for (sla_id = 0; sla_id < RL_NODES_CNT_MAX; sla_id++) {
+ if (!rl_data->sla[sla_id])
+ continue;
+
+ kfree(rl_data->sla[sla_id]);
+ rl_data->sla[sla_id] = NULL;
+ }
+
+ mutex_unlock(&rl_data->rl_lock);
+}
+
/**
* add_update_sla() - handles the creation and the update of an SLA
* @accel_dev: pointer to acceleration device structure
@@ -857,7 +875,7 @@ static int add_update_sla(struct adf_accel_dev *accel_dev,
if (!is_update) {
mark_rps_usage(sla, rl_data->rp_in_use, true);
- get_sla_arr_of_type(rl_data, sla->type, &sla_type_arr);
+ adf_rl_get_sla_arr_of_type(rl_data, sla->type, &sla_type_arr);
sla_type_arr[sla->node_id] = sla;
rl_data->sla[sla->sla_id] = sla;
}
@@ -1047,7 +1065,7 @@ void adf_rl_remove_sla_all(struct adf_accel_dev *accel_dev, bool incl_default)
/* Unregister and remove all SLAs */
for (j = RL_LEAF; j >= end_type; j--) {
- max_id = get_sla_arr_of_type(rl_data, j, &sla_type_arr);
+ max_id = adf_rl_get_sla_arr_of_type(rl_data, j, &sla_type_arr);
for (i = 0; i < max_id; i++) {
if (!sla_type_arr[i])
@@ -1107,7 +1125,7 @@ int adf_rl_start(struct adf_accel_dev *accel_dev)
}
if ((fw_caps & RL_CAPABILITY_MASK) != RL_CAPABILITY_VALUE) {
- dev_info(&GET_DEV(accel_dev), "not supported\n");
+ dev_info(&GET_DEV(accel_dev), "feature not supported by FW\n");
ret = -EOPNOTSUPP;
goto ret_free;
}
@@ -1155,7 +1173,7 @@ void adf_rl_stop(struct adf_accel_dev *accel_dev)
return;
adf_sysfs_rl_rm(accel_dev);
- adf_rl_remove_sla_all(accel_dev, true);
+ free_all_sla(accel_dev);
}
void adf_rl_exit(struct adf_accel_dev *accel_dev)
diff --git a/drivers/crypto/intel/qat/qat_common/adf_rl.h b/drivers/crypto/intel/qat/qat_common/adf_rl.h
index 269c6656fb90..bfe750ea0e83 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_rl.h
+++ b/drivers/crypto/intel/qat/qat_common/adf_rl.h
@@ -151,6 +151,8 @@ struct rl_sla {
u16 ring_pairs_cnt;
};
+u32 adf_rl_get_sla_arr_of_type(struct adf_rl *rl_data, enum rl_node_type type,
+ struct rl_sla ***sla_arr);
int adf_rl_add_sla(struct adf_accel_dev *accel_dev,
struct adf_rl_sla_input_data *sla_in);
int adf_rl_update_sla(struct adf_accel_dev *accel_dev,
diff --git a/drivers/crypto/intel/qat/qat_common/adf_sriov.c b/drivers/crypto/intel/qat/qat_common/adf_sriov.c
index f44025bb6f99..8d645e7e04aa 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_sriov.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_sriov.c
@@ -26,10 +26,12 @@ static void adf_iov_send_resp(struct work_struct *work)
u32 vf_nr = vf_info->vf_nr;
bool ret;
+ mutex_lock(&vf_info->pfvf_mig_lock);
ret = adf_recv_and_handle_vf2pf_msg(accel_dev, vf_nr);
if (ret)
/* re-enable interrupt on PF from this VF */
adf_enable_vf2pf_interrupts(accel_dev, 1 << vf_nr);
+ mutex_unlock(&vf_info->pfvf_mig_lock);
kfree(pf2vf_resp);
}
@@ -60,9 +62,9 @@ static int adf_enable_sriov(struct adf_accel_dev *accel_dev)
/* This ptr will be populated when VFs will be created */
vf_info->accel_dev = accel_dev;
vf_info->vf_nr = i;
- vf_info->vf_compat_ver = 0;
mutex_init(&vf_info->pf2vf_lock);
+ mutex_init(&vf_info->pfvf_mig_lock);
ratelimit_state_init(&vf_info->vf2pf_ratelimit,
ADF_VF2PF_RATELIMIT_INTERVAL,
ADF_VF2PF_RATELIMIT_BURST);
@@ -84,6 +86,32 @@ static int adf_enable_sriov(struct adf_accel_dev *accel_dev)
return pci_enable_sriov(pdev, totalvfs);
}
+void adf_reenable_sriov(struct adf_accel_dev *accel_dev)
+{
+ struct pci_dev *pdev = accel_to_pci_dev(accel_dev);
+ char cfg[ADF_CFG_MAX_VAL_LEN_IN_BYTES] = {0};
+ unsigned long val = 0;
+
+ if (adf_cfg_get_param_value(accel_dev, ADF_GENERAL_SEC,
+ ADF_SRIOV_ENABLED, cfg))
+ return;
+
+ if (!accel_dev->pf.vf_info)
+ return;
+
+ if (adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, ADF_NUM_CY,
+ &val, ADF_DEC))
+ return;
+
+ if (adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, ADF_NUM_DC,
+ &val, ADF_DEC))
+ return;
+
+ set_bit(ADF_STATUS_CONFIGURED, &accel_dev->status);
+ dev_dbg(&pdev->dev, "Re-enabling SRIOV\n");
+ adf_enable_sriov(accel_dev);
+}
+
/**
* adf_disable_sriov() - Disable SRIOV for the device
* @accel_dev: Pointer to accel device.
@@ -103,6 +131,7 @@ void adf_disable_sriov(struct adf_accel_dev *accel_dev)
return;
adf_pf2vf_notify_restarting(accel_dev);
+ adf_pf2vf_wait_for_restarting_complete(accel_dev);
pci_disable_sriov(accel_to_pci_dev(accel_dev));
/* Disable VF to PF interrupts */
@@ -112,11 +141,15 @@ void adf_disable_sriov(struct adf_accel_dev *accel_dev)
if (hw_data->configure_iov_threads)
hw_data->configure_iov_threads(accel_dev, false);
- for (i = 0, vf = accel_dev->pf.vf_info; i < totalvfs; i++, vf++)
+ for (i = 0, vf = accel_dev->pf.vf_info; i < totalvfs; i++, vf++) {
mutex_destroy(&vf->pf2vf_lock);
+ mutex_destroy(&vf->pfvf_mig_lock);
+ }
- kfree(accel_dev->pf.vf_info);
- accel_dev->pf.vf_info = NULL;
+ if (!test_bit(ADF_STATUS_RESTARTING, &accel_dev->status)) {
+ kfree(accel_dev->pf.vf_info);
+ accel_dev->pf.vf_info = NULL;
+ }
}
EXPORT_SYMBOL_GPL(adf_disable_sriov);
@@ -194,6 +227,10 @@ int adf_sriov_configure(struct pci_dev *pdev, int numvfs)
if (ret)
return ret;
+ val = 1;
+ adf_cfg_add_key_value_param(accel_dev, ADF_GENERAL_SEC, ADF_SRIOV_ENABLED,
+ &val, ADF_DEC);
+
return numvfs;
}
EXPORT_SYMBOL_GPL(adf_sriov_configure);
diff --git a/drivers/crypto/intel/qat/qat_common/adf_sysfs.c b/drivers/crypto/intel/qat/qat_common/adf_sysfs.c
index d450dad32c9e..4e7f70d4049d 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_sysfs.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_sysfs.c
@@ -204,6 +204,42 @@ static ssize_t pm_idle_enabled_store(struct device *dev, struct device_attribute
}
static DEVICE_ATTR_RW(pm_idle_enabled);
+static ssize_t auto_reset_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ char *auto_reset;
+ struct adf_accel_dev *accel_dev;
+
+ accel_dev = adf_devmgr_pci_to_accel_dev(to_pci_dev(dev));
+ if (!accel_dev)
+ return -EINVAL;
+
+ auto_reset = accel_dev->autoreset_on_error ? "on" : "off";
+
+ return sysfs_emit(buf, "%s\n", auto_reset);
+}
+
+static ssize_t auto_reset_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct adf_accel_dev *accel_dev;
+ bool enabled = false;
+ int ret;
+
+ ret = kstrtobool(buf, &enabled);
+ if (ret)
+ return ret;
+
+ accel_dev = adf_devmgr_pci_to_accel_dev(to_pci_dev(dev));
+ if (!accel_dev)
+ return -EINVAL;
+
+ accel_dev->autoreset_on_error = enabled;
+
+ return count;
+}
+static DEVICE_ATTR_RW(auto_reset);
+
static DEVICE_ATTR_RW(state);
static DEVICE_ATTR_RW(cfg_services);
@@ -291,6 +327,7 @@ static struct attribute *qat_attrs[] = {
&dev_attr_pm_idle_enabled.attr,
&dev_attr_rp2srv.attr,
&dev_attr_num_rps.attr,
+ &dev_attr_auto_reset.attr,
NULL,
};
diff --git a/drivers/crypto/intel/qat/qat_common/adf_telemetry.c b/drivers/crypto/intel/qat/qat_common/adf_telemetry.c
index 2ff714d11bd2..74fb0c2ed241 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_telemetry.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_telemetry.c
@@ -41,6 +41,20 @@ static int validate_tl_data(struct adf_tl_hw_data *tl_data)
return 0;
}
+static int validate_tl_slice_counters(struct icp_qat_fw_init_admin_slice_cnt *slice_count,
+ u8 max_slices_per_type)
+{
+ u8 *sl_counter = (u8 *)slice_count;
+ int i;
+
+ for (i = 0; i < ADF_TL_SL_CNT_COUNT; i++) {
+ if (sl_counter[i] > max_slices_per_type)
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int adf_tl_alloc_mem(struct adf_accel_dev *accel_dev)
{
struct adf_tl_hw_data *tl_data = &GET_TL_DATA(accel_dev);
@@ -214,6 +228,13 @@ int adf_tl_run(struct adf_accel_dev *accel_dev, int state)
return ret;
}
+ ret = validate_tl_slice_counters(&telemetry->slice_cnt, tl_data->max_sl_cnt);
+ if (ret) {
+ dev_err(dev, "invalid value returned by FW\n");
+ adf_send_admin_tl_stop(accel_dev);
+ return ret;
+ }
+
telemetry->hbuffs = state;
atomic_set(&telemetry->state, state);
diff --git a/drivers/crypto/intel/qat/qat_common/adf_telemetry.h b/drivers/crypto/intel/qat/qat_common/adf_telemetry.h
index 9be81cd3b886..e54a406cc1b4 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_telemetry.h
+++ b/drivers/crypto/intel/qat/qat_common/adf_telemetry.h
@@ -40,6 +40,7 @@ struct adf_tl_hw_data {
u8 num_dev_counters;
u8 num_rp_counters;
u8 max_rp;
+ u8 max_sl_cnt;
};
struct adf_telemetry {
diff --git a/drivers/crypto/intel/qat/qat_common/adf_transport.c b/drivers/crypto/intel/qat/qat_common/adf_transport.c
index 630d0483c4e0..1efdf46490f1 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_transport.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_transport.c
@@ -474,7 +474,6 @@ err:
int adf_init_etr_data(struct adf_accel_dev *accel_dev)
{
struct adf_etr_data *etr_data;
- struct adf_hw_device_data *hw_data = accel_dev->hw_device;
void __iomem *csr_addr;
u32 size;
u32 num_banks = 0;
@@ -495,8 +494,7 @@ int adf_init_etr_data(struct adf_accel_dev *accel_dev)
}
accel_dev->transport = etr_data;
- i = hw_data->get_etr_bar_id(hw_data);
- csr_addr = accel_dev->accel_pci_dev.pci_bars[i].virt_addr;
+ csr_addr = adf_get_etr_base(accel_dev);
/* accel_dev->debugfs_dir should always be non-NULL here */
etr_data->debug = debugfs_create_dir("transport",
diff --git a/drivers/crypto/intel/qat/qat_common/adf_vf_isr.c b/drivers/crypto/intel/qat/qat_common/adf_vf_isr.c
index b05c3957a160..cdbb2d687b1b 100644
--- a/drivers/crypto/intel/qat/qat_common/adf_vf_isr.c
+++ b/drivers/crypto/intel/qat/qat_common/adf_vf_isr.c
@@ -293,8 +293,6 @@ EXPORT_SYMBOL_GPL(adf_flush_vf_wq);
/**
* adf_init_vf_wq() - Init workqueue for VF
*
- * Function init workqueue 'adf_vf_stop_wq' for VF.
- *
* Return: 0 on success, error code otherwise.
*/
int __init adf_init_vf_wq(void)
diff --git a/drivers/crypto/intel/qat/qat_common/qat_asym_algs.c b/drivers/crypto/intel/qat/qat_common/qat_asym_algs.c
index 4128200a9032..85c682e248fb 100644
--- a/drivers/crypto/intel/qat/qat_common/qat_asym_algs.c
+++ b/drivers/crypto/intel/qat/qat_common/qat_asym_algs.c
@@ -110,6 +110,8 @@ struct qat_dh_ctx {
unsigned int p_size;
bool g2;
struct qat_crypto_instance *inst;
+ struct crypto_kpp *ftfm;
+ bool fallback;
} __packed __aligned(64);
struct qat_asym_request {
@@ -381,6 +383,36 @@ unmap_src:
return ret;
}
+static int qat_dh_generate_public_key(struct kpp_request *req)
+{
+ struct kpp_request *nreq = kpp_request_ctx(req);
+ struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
+ struct qat_dh_ctx *ctx = kpp_tfm_ctx(tfm);
+
+ if (ctx->fallback) {
+ memcpy(nreq, req, sizeof(*req));
+ kpp_request_set_tfm(nreq, ctx->ftfm);
+ return crypto_kpp_generate_public_key(nreq);
+ }
+
+ return qat_dh_compute_value(req);
+}
+
+static int qat_dh_compute_shared_secret(struct kpp_request *req)
+{
+ struct kpp_request *nreq = kpp_request_ctx(req);
+ struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
+ struct qat_dh_ctx *ctx = kpp_tfm_ctx(tfm);
+
+ if (ctx->fallback) {
+ memcpy(nreq, req, sizeof(*req));
+ kpp_request_set_tfm(nreq, ctx->ftfm);
+ return crypto_kpp_compute_shared_secret(nreq);
+ }
+
+ return qat_dh_compute_value(req);
+}
+
static int qat_dh_check_params_length(unsigned int p_len)
{
switch (p_len) {
@@ -398,9 +430,6 @@ static int qat_dh_set_params(struct qat_dh_ctx *ctx, struct dh *params)
struct qat_crypto_instance *inst = ctx->inst;
struct device *dev = &GET_DEV(inst->accel_dev);
- if (qat_dh_check_params_length(params->p_size << 3))
- return -EINVAL;
-
ctx->p_size = params->p_size;
ctx->p = dma_alloc_coherent(dev, ctx->p_size, &ctx->dma_p, GFP_KERNEL);
if (!ctx->p)
@@ -454,6 +483,13 @@ static int qat_dh_set_secret(struct crypto_kpp *tfm, const void *buf,
if (crypto_dh_decode_key(buf, len, &params) < 0)
return -EINVAL;
+ if (qat_dh_check_params_length(params.p_size << 3)) {
+ ctx->fallback = true;
+ return crypto_kpp_set_secret(ctx->ftfm, buf, len);
+ }
+
+ ctx->fallback = false;
+
/* Free old secret if any */
qat_dh_clear_ctx(dev, ctx);
@@ -481,6 +517,9 @@ static unsigned int qat_dh_max_size(struct crypto_kpp *tfm)
{
struct qat_dh_ctx *ctx = kpp_tfm_ctx(tfm);
+ if (ctx->fallback)
+ return crypto_kpp_maxsize(ctx->ftfm);
+
return ctx->p_size;
}
@@ -489,11 +528,22 @@ static int qat_dh_init_tfm(struct crypto_kpp *tfm)
struct qat_dh_ctx *ctx = kpp_tfm_ctx(tfm);
struct qat_crypto_instance *inst =
qat_crypto_get_instance_node(numa_node_id());
+ const char *alg = kpp_alg_name(tfm);
+ unsigned int reqsize;
if (!inst)
return -EINVAL;
- kpp_set_reqsize(tfm, sizeof(struct qat_asym_request) + 64);
+ ctx->ftfm = crypto_alloc_kpp(alg, 0, CRYPTO_ALG_NEED_FALLBACK);
+ if (IS_ERR(ctx->ftfm))
+ return PTR_ERR(ctx->ftfm);
+
+ crypto_kpp_set_flags(ctx->ftfm, crypto_kpp_get_flags(tfm));
+
+ reqsize = max(sizeof(struct qat_asym_request) + 64,
+ sizeof(struct kpp_request) + crypto_kpp_reqsize(ctx->ftfm));
+
+ kpp_set_reqsize(tfm, reqsize);
ctx->p_size = 0;
ctx->g2 = false;
@@ -506,6 +556,9 @@ static void qat_dh_exit_tfm(struct crypto_kpp *tfm)
struct qat_dh_ctx *ctx = kpp_tfm_ctx(tfm);
struct device *dev = &GET_DEV(ctx->inst->accel_dev);
+ if (ctx->ftfm)
+ crypto_free_kpp(ctx->ftfm);
+
qat_dh_clear_ctx(dev, ctx);
qat_crypto_put_instance(ctx->inst);
}
@@ -1265,8 +1318,8 @@ static struct akcipher_alg rsa = {
static struct kpp_alg dh = {
.set_secret = qat_dh_set_secret,
- .generate_public_key = qat_dh_compute_value,
- .compute_shared_secret = qat_dh_compute_value,
+ .generate_public_key = qat_dh_generate_public_key,
+ .compute_shared_secret = qat_dh_compute_shared_secret,
.max_size = qat_dh_max_size,
.init = qat_dh_init_tfm,
.exit = qat_dh_exit_tfm,
@@ -1276,6 +1329,7 @@ static struct kpp_alg dh = {
.cra_priority = 1000,
.cra_module = THIS_MODULE,
.cra_ctxsize = sizeof(struct qat_dh_ctx),
+ .cra_flags = CRYPTO_ALG_NEED_FALLBACK,
},
};
diff --git a/drivers/crypto/intel/qat/qat_common/qat_bl.c b/drivers/crypto/intel/qat/qat_common/qat_bl.c
index 76baed0a76c0..338acf29c487 100644
--- a/drivers/crypto/intel/qat/qat_common/qat_bl.c
+++ b/drivers/crypto/intel/qat/qat_common/qat_bl.c
@@ -81,7 +81,8 @@ static int __qat_bl_sgl_to_bufl(struct adf_accel_dev *accel_dev,
if (unlikely(!bufl))
return -ENOMEM;
} else {
- bufl = &buf->sgl_src.sgl_hdr;
+ bufl = container_of(&buf->sgl_src.sgl_hdr,
+ struct qat_alg_buf_list, hdr);
memset(bufl, 0, sizeof(struct qat_alg_buf_list));
buf->sgl_src_valid = true;
}
@@ -139,7 +140,8 @@ static int __qat_bl_sgl_to_bufl(struct adf_accel_dev *accel_dev,
if (unlikely(!buflout))
goto err_in;
} else {
- buflout = &buf->sgl_dst.sgl_hdr;
+ buflout = container_of(&buf->sgl_dst.sgl_hdr,
+ struct qat_alg_buf_list, hdr);
memset(buflout, 0, sizeof(struct qat_alg_buf_list));
buf->sgl_dst_valid = true;
}
diff --git a/drivers/crypto/intel/qat/qat_common/qat_bl.h b/drivers/crypto/intel/qat/qat_common/qat_bl.h
index d87e4f35ac39..85bc32a9ec0e 100644
--- a/drivers/crypto/intel/qat/qat_common/qat_bl.h
+++ b/drivers/crypto/intel/qat/qat_common/qat_bl.h
@@ -15,14 +15,17 @@ struct qat_alg_buf {
} __packed;
struct qat_alg_buf_list {
- u64 resrvd;
- u32 num_bufs;
- u32 num_mapped_bufs;
+ /* New members must be added within the __struct_group() macro below. */
+ __struct_group(qat_alg_buf_list_hdr, hdr, __packed,
+ u64 resrvd;
+ u32 num_bufs;
+ u32 num_mapped_bufs;
+ );
struct qat_alg_buf buffers[];
} __packed;
struct qat_alg_fixed_buf_list {
- struct qat_alg_buf_list sgl_hdr;
+ struct qat_alg_buf_list_hdr sgl_hdr;
struct qat_alg_buf descriptors[QAT_MAX_BUFF_DESC];
} __packed __aligned(64);
diff --git a/drivers/crypto/intel/qat/qat_common/qat_comp_algs.c b/drivers/crypto/intel/qat/qat_common/qat_comp_algs.c
index bf8c0ee62917..2ba4aa22e092 100644
--- a/drivers/crypto/intel/qat/qat_common/qat_comp_algs.c
+++ b/drivers/crypto/intel/qat/qat_common/qat_comp_algs.c
@@ -13,15 +13,6 @@
#include "qat_compression.h"
#include "qat_algs_send.h"
-#define QAT_RFC_1950_HDR_SIZE 2
-#define QAT_RFC_1950_FOOTER_SIZE 4
-#define QAT_RFC_1950_CM_DEFLATE 8
-#define QAT_RFC_1950_CM_DEFLATE_CINFO_32K 7
-#define QAT_RFC_1950_CM_MASK 0x0f
-#define QAT_RFC_1950_CM_OFFSET 4
-#define QAT_RFC_1950_DICT_MASK 0x20
-#define QAT_RFC_1950_COMP_HDR 0x785e
-
static DEFINE_MUTEX(algs_lock);
static unsigned int active_devs;
diff --git a/drivers/crypto/intel/qat/qat_common/qat_crypto.c b/drivers/crypto/intel/qat/qat_common/qat_crypto.c
index 40c8e74d1cf9..101c6ea41673 100644
--- a/drivers/crypto/intel/qat/qat_common/qat_crypto.c
+++ b/drivers/crypto/intel/qat/qat_common/qat_crypto.c
@@ -105,8 +105,8 @@ struct qat_crypto_instance *qat_crypto_get_instance_node(int node)
}
/**
- * qat_crypto_vf_dev_config()
- * create dev config required to create crypto inst.
+ * qat_crypto_vf_dev_config() - create dev config required to create
+ * crypto inst.
*
* @accel_dev: Pointer to acceleration device.
*
diff --git a/drivers/crypto/intel/qat/qat_common/qat_mig_dev.c b/drivers/crypto/intel/qat/qat_common/qat_mig_dev.c
new file mode 100644
index 000000000000..892c2283a50e
--- /dev/null
+++ b/drivers/crypto/intel/qat/qat_common/qat_mig_dev.c
@@ -0,0 +1,130 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2024 Intel Corporation */
+#include <linux/dev_printk.h>
+#include <linux/export.h>
+#include <linux/pci.h>
+#include <linux/types.h>
+#include <linux/qat/qat_mig_dev.h>
+#include "adf_accel_devices.h"
+#include "adf_common_drv.h"
+
+struct qat_mig_dev *qat_vfmig_create(struct pci_dev *pdev, int vf_id)
+{
+ struct adf_accel_dev *accel_dev;
+ struct qat_migdev_ops *ops;
+ struct qat_mig_dev *mdev;
+
+ accel_dev = adf_devmgr_pci_to_accel_dev(pdev);
+ if (!accel_dev)
+ return ERR_PTR(-ENODEV);
+
+ ops = GET_VFMIG_OPS(accel_dev);
+ if (!ops || !ops->init || !ops->cleanup || !ops->reset || !ops->open ||
+ !ops->close || !ops->suspend || !ops->resume || !ops->save_state ||
+ !ops->load_state || !ops->save_setup || !ops->load_setup)
+ return ERR_PTR(-EINVAL);
+
+ mdev = kmalloc(sizeof(*mdev), GFP_KERNEL);
+ if (!mdev)
+ return ERR_PTR(-ENOMEM);
+
+ mdev->vf_id = vf_id;
+ mdev->parent_accel_dev = accel_dev;
+
+ return mdev;
+}
+EXPORT_SYMBOL_GPL(qat_vfmig_create);
+
+int qat_vfmig_init(struct qat_mig_dev *mdev)
+{
+ struct adf_accel_dev *accel_dev = mdev->parent_accel_dev;
+
+ return GET_VFMIG_OPS(accel_dev)->init(mdev);
+}
+EXPORT_SYMBOL_GPL(qat_vfmig_init);
+
+void qat_vfmig_cleanup(struct qat_mig_dev *mdev)
+{
+ struct adf_accel_dev *accel_dev = mdev->parent_accel_dev;
+
+ return GET_VFMIG_OPS(accel_dev)->cleanup(mdev);
+}
+EXPORT_SYMBOL_GPL(qat_vfmig_cleanup);
+
+void qat_vfmig_reset(struct qat_mig_dev *mdev)
+{
+ struct adf_accel_dev *accel_dev = mdev->parent_accel_dev;
+
+ return GET_VFMIG_OPS(accel_dev)->reset(mdev);
+}
+EXPORT_SYMBOL_GPL(qat_vfmig_reset);
+
+int qat_vfmig_open(struct qat_mig_dev *mdev)
+{
+ struct adf_accel_dev *accel_dev = mdev->parent_accel_dev;
+
+ return GET_VFMIG_OPS(accel_dev)->open(mdev);
+}
+EXPORT_SYMBOL_GPL(qat_vfmig_open);
+
+void qat_vfmig_close(struct qat_mig_dev *mdev)
+{
+ struct adf_accel_dev *accel_dev = mdev->parent_accel_dev;
+
+ GET_VFMIG_OPS(accel_dev)->close(mdev);
+}
+EXPORT_SYMBOL_GPL(qat_vfmig_close);
+
+int qat_vfmig_suspend(struct qat_mig_dev *mdev)
+{
+ struct adf_accel_dev *accel_dev = mdev->parent_accel_dev;
+
+ return GET_VFMIG_OPS(accel_dev)->suspend(mdev);
+}
+EXPORT_SYMBOL_GPL(qat_vfmig_suspend);
+
+int qat_vfmig_resume(struct qat_mig_dev *mdev)
+{
+ struct adf_accel_dev *accel_dev = mdev->parent_accel_dev;
+
+ return GET_VFMIG_OPS(accel_dev)->resume(mdev);
+}
+EXPORT_SYMBOL_GPL(qat_vfmig_resume);
+
+int qat_vfmig_save_state(struct qat_mig_dev *mdev)
+{
+ struct adf_accel_dev *accel_dev = mdev->parent_accel_dev;
+
+ return GET_VFMIG_OPS(accel_dev)->save_state(mdev);
+}
+EXPORT_SYMBOL_GPL(qat_vfmig_save_state);
+
+int qat_vfmig_save_setup(struct qat_mig_dev *mdev)
+{
+ struct adf_accel_dev *accel_dev = mdev->parent_accel_dev;
+
+ return GET_VFMIG_OPS(accel_dev)->save_setup(mdev);
+}
+EXPORT_SYMBOL_GPL(qat_vfmig_save_setup);
+
+int qat_vfmig_load_state(struct qat_mig_dev *mdev)
+{
+ struct adf_accel_dev *accel_dev = mdev->parent_accel_dev;
+
+ return GET_VFMIG_OPS(accel_dev)->load_state(mdev);
+}
+EXPORT_SYMBOL_GPL(qat_vfmig_load_state);
+
+int qat_vfmig_load_setup(struct qat_mig_dev *mdev, int size)
+{
+ struct adf_accel_dev *accel_dev = mdev->parent_accel_dev;
+
+ return GET_VFMIG_OPS(accel_dev)->load_setup(mdev, size);
+}
+EXPORT_SYMBOL_GPL(qat_vfmig_load_setup);
+
+void qat_vfmig_destroy(struct qat_mig_dev *mdev)
+{
+ kfree(mdev);
+}
+EXPORT_SYMBOL_GPL(qat_vfmig_destroy);
diff --git a/drivers/crypto/intel/qat/qat_dh895xcc/Makefile b/drivers/crypto/intel/qat/qat_dh895xcc/Makefile
index 38d6f8e1624a..cfd3bd757715 100644
--- a/drivers/crypto/intel/qat/qat_dh895xcc/Makefile
+++ b/drivers/crypto/intel/qat/qat_dh895xcc/Makefile
@@ -1,4 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
-ccflags-y := -I $(srctree)/$(src)/../qat_common
+ccflags-y := -I $(src)/../qat_common
obj-$(CONFIG_CRYPTO_DEV_QAT_DH895xCC) += qat_dh895xcc.o
qat_dh895xcc-objs := adf_drv.o adf_dh895xcc_hw_data.o
diff --git a/drivers/crypto/intel/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c b/drivers/crypto/intel/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c
index af14090cc4be..6e24d57e6b98 100644
--- a/drivers/crypto/intel/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c
+++ b/drivers/crypto/intel/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c
@@ -5,6 +5,7 @@
#include <adf_common_drv.h>
#include <adf_gen2_config.h>
#include <adf_gen2_dc.h>
+#include <adf_gen2_hw_csr_data.h>
#include <adf_gen2_hw_data.h>
#include <adf_gen2_pfvf.h>
#include "adf_dh895xcc_hw_data.h"
diff --git a/drivers/crypto/intel/qat/qat_dh895xccvf/Makefile b/drivers/crypto/intel/qat/qat_dh895xccvf/Makefile
index 0153c85ce743..64b54e92b2b4 100644
--- a/drivers/crypto/intel/qat/qat_dh895xccvf/Makefile
+++ b/drivers/crypto/intel/qat/qat_dh895xccvf/Makefile
@@ -1,4 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
-ccflags-y := -I $(srctree)/$(src)/../qat_common
+ccflags-y := -I $(src)/../qat_common
obj-$(CONFIG_CRYPTO_DEV_QAT_DH895xCCVF) += qat_dh895xccvf.o
qat_dh895xccvf-objs := adf_drv.o adf_dh895xccvf_hw_data.o
diff --git a/drivers/crypto/intel/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c b/drivers/crypto/intel/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c
index 70e56cc16ece..f4ee4c2e00da 100644
--- a/drivers/crypto/intel/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c
+++ b/drivers/crypto/intel/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c
@@ -4,6 +4,7 @@
#include <adf_common_drv.h>
#include <adf_gen2_config.h>
#include <adf_gen2_dc.h>
+#include <adf_gen2_hw_csr_data.h>
#include <adf_gen2_hw_data.h>
#include <adf_gen2_pfvf.h>
#include <adf_pfvf_vf_msg.h>
diff --git a/drivers/crypto/marvell/octeontx2/cn10k_cpt.c b/drivers/crypto/marvell/octeontx2/cn10k_cpt.c
index 79b4e74804f6..6bfc59e67747 100644
--- a/drivers/crypto/marvell/octeontx2/cn10k_cpt.c
+++ b/drivers/crypto/marvell/octeontx2/cn10k_cpt.c
@@ -138,6 +138,10 @@ int cn10k_cpt_hw_ctx_init(struct pci_dev *pdev,
return -ENOMEM;
cptr_dma = dma_map_single(&pdev->dev, hctx, CN10K_CPT_HW_CTX_SIZE,
DMA_BIDIRECTIONAL);
+ if (dma_mapping_error(&pdev->dev, cptr_dma)) {
+ kfree(hctx);
+ return -ENOMEM;
+ }
cn10k_cpt_hw_ctx_set(hctx, 1);
er_ctx->hw_ctx = hctx;
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_devlink.c b/drivers/crypto/marvell/octeontx2/otx2_cpt_devlink.c
index d2b8d26db968..215a1a8ba7e9 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cpt_devlink.c
+++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_devlink.c
@@ -4,7 +4,8 @@
#include "otx2_cpt_devlink.h"
static int otx2_cpt_dl_egrp_create(struct devlink *dl, u32 id,
- struct devlink_param_gset_ctx *ctx)
+ struct devlink_param_gset_ctx *ctx,
+ struct netlink_ext_ack *extack)
{
struct otx2_cpt_devlink *cpt_dl = devlink_priv(dl);
struct otx2_cptpf_dev *cptpf = cpt_dl->cptpf;
@@ -13,7 +14,8 @@ static int otx2_cpt_dl_egrp_create(struct devlink *dl, u32 id,
}
static int otx2_cpt_dl_egrp_delete(struct devlink *dl, u32 id,
- struct devlink_param_gset_ctx *ctx)
+ struct devlink_param_gset_ctx *ctx,
+ struct netlink_ext_ack *extack)
{
struct otx2_cpt_devlink *cpt_dl = devlink_priv(dl);
struct otx2_cptpf_dev *cptpf = cpt_dl->cptpf;
@@ -45,7 +47,8 @@ static int otx2_cpt_dl_t106_mode_get(struct devlink *dl, u32 id,
}
static int otx2_cpt_dl_t106_mode_set(struct devlink *dl, u32 id,
- struct devlink_param_gset_ctx *ctx)
+ struct devlink_param_gset_ctx *ctx,
+ struct netlink_ext_ack *extack)
{
struct otx2_cpt_devlink *cpt_dl = devlink_priv(dl);
struct otx2_cptpf_dev *cptpf = cpt_dl->cptpf;
diff --git a/drivers/crypto/mxs-dcp.c b/drivers/crypto/mxs-dcp.c
index 2b3ebe0db3a6..057d73c370b7 100644
--- a/drivers/crypto/mxs-dcp.c
+++ b/drivers/crypto/mxs-dcp.c
@@ -15,6 +15,7 @@
#include <linux/platform_device.h>
#include <linux/stmp_device.h>
#include <linux/clk.h>
+#include <soc/fsl/dcp.h>
#include <crypto/aes.h>
#include <crypto/sha1.h>
@@ -101,6 +102,7 @@ struct dcp_async_ctx {
struct crypto_skcipher *fallback;
unsigned int key_len;
uint8_t key[AES_KEYSIZE_128];
+ bool key_referenced;
};
struct dcp_aes_req_ctx {
@@ -155,6 +157,7 @@ static struct dcp *global_sdcp;
#define MXS_DCP_CONTROL0_HASH_TERM (1 << 13)
#define MXS_DCP_CONTROL0_HASH_INIT (1 << 12)
#define MXS_DCP_CONTROL0_PAYLOAD_KEY (1 << 11)
+#define MXS_DCP_CONTROL0_OTP_KEY (1 << 10)
#define MXS_DCP_CONTROL0_CIPHER_ENCRYPT (1 << 8)
#define MXS_DCP_CONTROL0_CIPHER_INIT (1 << 9)
#define MXS_DCP_CONTROL0_ENABLE_HASH (1 << 6)
@@ -168,6 +171,8 @@ static struct dcp *global_sdcp;
#define MXS_DCP_CONTROL1_CIPHER_MODE_ECB (0 << 4)
#define MXS_DCP_CONTROL1_CIPHER_SELECT_AES128 (0 << 0)
+#define MXS_DCP_CONTROL1_KEY_SELECT_SHIFT 8
+
static int mxs_dcp_start_dma(struct dcp_async_ctx *actx)
{
int dma_err;
@@ -224,13 +229,16 @@ static int mxs_dcp_run_aes(struct dcp_async_ctx *actx,
struct dcp *sdcp = global_sdcp;
struct dcp_dma_desc *desc = &sdcp->coh->desc[actx->chan];
struct dcp_aes_req_ctx *rctx = skcipher_request_ctx(req);
+ bool key_referenced = actx->key_referenced;
int ret;
- key_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_key,
- 2 * AES_KEYSIZE_128, DMA_TO_DEVICE);
- ret = dma_mapping_error(sdcp->dev, key_phys);
- if (ret)
- return ret;
+ if (!key_referenced) {
+ key_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_key,
+ 2 * AES_KEYSIZE_128, DMA_TO_DEVICE);
+ ret = dma_mapping_error(sdcp->dev, key_phys);
+ if (ret)
+ return ret;
+ }
src_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_in_buf,
DCP_BUF_SZ, DMA_TO_DEVICE);
@@ -255,8 +263,12 @@ static int mxs_dcp_run_aes(struct dcp_async_ctx *actx,
MXS_DCP_CONTROL0_INTERRUPT |
MXS_DCP_CONTROL0_ENABLE_CIPHER;
- /* Payload contains the key. */
- desc->control0 |= MXS_DCP_CONTROL0_PAYLOAD_KEY;
+ if (key_referenced)
+ /* Set OTP key bit to select the key via KEY_SELECT. */
+ desc->control0 |= MXS_DCP_CONTROL0_OTP_KEY;
+ else
+ /* Payload contains the key. */
+ desc->control0 |= MXS_DCP_CONTROL0_PAYLOAD_KEY;
if (rctx->enc)
desc->control0 |= MXS_DCP_CONTROL0_CIPHER_ENCRYPT;
@@ -270,6 +282,9 @@ static int mxs_dcp_run_aes(struct dcp_async_ctx *actx,
else
desc->control1 |= MXS_DCP_CONTROL1_CIPHER_MODE_CBC;
+ if (key_referenced)
+ desc->control1 |= sdcp->coh->aes_key[0] << MXS_DCP_CONTROL1_KEY_SELECT_SHIFT;
+
desc->next_cmd_addr = 0;
desc->source = src_phys;
desc->destination = dst_phys;
@@ -284,9 +299,9 @@ aes_done_run:
err_dst:
dma_unmap_single(sdcp->dev, src_phys, DCP_BUF_SZ, DMA_TO_DEVICE);
err_src:
- dma_unmap_single(sdcp->dev, key_phys, 2 * AES_KEYSIZE_128,
- DMA_TO_DEVICE);
-
+ if (!key_referenced)
+ dma_unmap_single(sdcp->dev, key_phys, 2 * AES_KEYSIZE_128,
+ DMA_TO_DEVICE);
return ret;
}
@@ -453,7 +468,7 @@ static int mxs_dcp_aes_enqueue(struct skcipher_request *req, int enc, int ecb)
struct dcp_aes_req_ctx *rctx = skcipher_request_ctx(req);
int ret;
- if (unlikely(actx->key_len != AES_KEYSIZE_128))
+ if (unlikely(actx->key_len != AES_KEYSIZE_128 && !actx->key_referenced))
return mxs_dcp_block_fallback(req, enc);
rctx->enc = enc;
@@ -500,6 +515,7 @@ static int mxs_dcp_aes_setkey(struct crypto_skcipher *tfm, const u8 *key,
* there can still be an operation in progress.
*/
actx->key_len = len;
+ actx->key_referenced = false;
if (len == AES_KEYSIZE_128) {
memcpy(actx->key, key, len);
return 0;
@@ -516,6 +532,32 @@ static int mxs_dcp_aes_setkey(struct crypto_skcipher *tfm, const u8 *key,
return crypto_skcipher_setkey(actx->fallback, key, len);
}
+static int mxs_dcp_aes_setrefkey(struct crypto_skcipher *tfm, const u8 *key,
+ unsigned int len)
+{
+ struct dcp_async_ctx *actx = crypto_skcipher_ctx(tfm);
+
+ if (len != DCP_PAES_KEYSIZE)
+ return -EINVAL;
+
+ switch (key[0]) {
+ case DCP_PAES_KEY_SLOT0:
+ case DCP_PAES_KEY_SLOT1:
+ case DCP_PAES_KEY_SLOT2:
+ case DCP_PAES_KEY_SLOT3:
+ case DCP_PAES_KEY_UNIQUE:
+ case DCP_PAES_KEY_OTP:
+ memcpy(actx->key, key, len);
+ actx->key_len = len;
+ actx->key_referenced = true;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int mxs_dcp_aes_fallback_init_tfm(struct crypto_skcipher *tfm)
{
const char *name = crypto_tfm_alg_name(crypto_skcipher_tfm(tfm));
@@ -539,6 +581,13 @@ static void mxs_dcp_aes_fallback_exit_tfm(struct crypto_skcipher *tfm)
crypto_free_skcipher(actx->fallback);
}
+static int mxs_dcp_paes_init_tfm(struct crypto_skcipher *tfm)
+{
+ crypto_skcipher_set_reqsize(tfm, sizeof(struct dcp_aes_req_ctx));
+
+ return 0;
+}
+
/*
* Hashing (SHA1/SHA256)
*/
@@ -889,6 +938,39 @@ static struct skcipher_alg dcp_aes_algs[] = {
.ivsize = AES_BLOCK_SIZE,
.init = mxs_dcp_aes_fallback_init_tfm,
.exit = mxs_dcp_aes_fallback_exit_tfm,
+ }, {
+ .base.cra_name = "ecb(paes)",
+ .base.cra_driver_name = "ecb-paes-dcp",
+ .base.cra_priority = 401,
+ .base.cra_alignmask = 15,
+ .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_INTERNAL,
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct dcp_async_ctx),
+ .base.cra_module = THIS_MODULE,
+
+ .min_keysize = DCP_PAES_KEYSIZE,
+ .max_keysize = DCP_PAES_KEYSIZE,
+ .setkey = mxs_dcp_aes_setrefkey,
+ .encrypt = mxs_dcp_aes_ecb_encrypt,
+ .decrypt = mxs_dcp_aes_ecb_decrypt,
+ .init = mxs_dcp_paes_init_tfm,
+ }, {
+ .base.cra_name = "cbc(paes)",
+ .base.cra_driver_name = "cbc-paes-dcp",
+ .base.cra_priority = 401,
+ .base.cra_alignmask = 15,
+ .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_INTERNAL,
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct dcp_async_ctx),
+ .base.cra_module = THIS_MODULE,
+
+ .min_keysize = DCP_PAES_KEYSIZE,
+ .max_keysize = DCP_PAES_KEYSIZE,
+ .setkey = mxs_dcp_aes_setrefkey,
+ .encrypt = mxs_dcp_aes_cbc_encrypt,
+ .decrypt = mxs_dcp_aes_cbc_decrypt,
+ .ivsize = AES_BLOCK_SIZE,
+ .init = mxs_dcp_paes_init_tfm,
},
};
diff --git a/drivers/crypto/n2_core.c b/drivers/crypto/n2_core.c
index 7a3083debc2b..59d472cb11e7 100644
--- a/drivers/crypto/n2_core.c
+++ b/drivers/crypto/n2_core.c
@@ -41,7 +41,7 @@
static const char version[] =
DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
-MODULE_AUTHOR("David S. Miller (davem@davemloft.net)");
+MODULE_AUTHOR("David S. Miller <davem@davemloft.net>");
MODULE_DESCRIPTION("Niagara2 Crypto driver");
MODULE_LICENSE("GPL");
MODULE_VERSION(DRV_MODULE_VERSION);
diff --git a/drivers/crypto/nx/nx-842.c b/drivers/crypto/nx/nx-842.c
index 2ab90ec10e61..82214cde2bcd 100644
--- a/drivers/crypto/nx/nx-842.c
+++ b/drivers/crypto/nx/nx-842.c
@@ -251,7 +251,9 @@ int nx842_crypto_compress(struct crypto_tfm *tfm,
u8 *dst, unsigned int *dlen)
{
struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm);
- struct nx842_crypto_header *hdr = &ctx->header;
+ struct nx842_crypto_header *hdr =
+ container_of(&ctx->header,
+ struct nx842_crypto_header, hdr);
struct nx842_crypto_param p;
struct nx842_constraints c = *ctx->driver->constraints;
unsigned int groups, hdrsize, h;
@@ -490,7 +492,7 @@ int nx842_crypto_decompress(struct crypto_tfm *tfm,
}
memcpy(&ctx->header, src, hdr_len);
- hdr = &ctx->header;
+ hdr = container_of(&ctx->header, struct nx842_crypto_header, hdr);
for (n = 0; n < hdr->groups; n++) {
/* ignore applies to last group */
diff --git a/drivers/crypto/nx/nx-842.h b/drivers/crypto/nx/nx-842.h
index 7590bfb24d79..25fa70b2112c 100644
--- a/drivers/crypto/nx/nx-842.h
+++ b/drivers/crypto/nx/nx-842.h
@@ -157,9 +157,11 @@ struct nx842_crypto_header_group {
} __packed;
struct nx842_crypto_header {
- __be16 magic; /* NX842_CRYPTO_MAGIC */
- __be16 ignore; /* decompressed end bytes to ignore */
- u8 groups; /* total groups in this header */
+ struct_group_tagged(nx842_crypto_header_hdr, hdr,
+ __be16 magic; /* NX842_CRYPTO_MAGIC */
+ __be16 ignore; /* decompressed end bytes to ignore */
+ u8 groups; /* total groups in this header */
+ );
struct nx842_crypto_header_group group[];
} __packed;
@@ -171,7 +173,7 @@ struct nx842_crypto_ctx {
u8 *wmem;
u8 *sbounce, *dbounce;
- struct nx842_crypto_header header;
+ struct nx842_crypto_header_hdr header;
struct nx842_crypto_header_group group[NX842_CRYPTO_GROUP_MAX];
struct nx842_driver *driver;
diff --git a/drivers/crypto/rockchip/rk3288_crypto.c b/drivers/crypto/rockchip/rk3288_crypto.c
index 70edf40bc523..f74b3c81ba6d 100644
--- a/drivers/crypto/rockchip/rk3288_crypto.c
+++ b/drivers/crypto/rockchip/rk3288_crypto.c
@@ -371,6 +371,11 @@ static int rk_crypto_probe(struct platform_device *pdev)
}
crypto_info->engine = crypto_engine_alloc_init(&pdev->dev, true);
+ if (!crypto_info->engine) {
+ err = -ENOMEM;
+ goto err_crypto;
+ }
+
crypto_engine_start(crypto_info->engine);
init_completion(&crypto_info->complete);
diff --git a/drivers/crypto/sahara.c b/drivers/crypto/sahara.c
index 3423b5cde1c7..96d4af5d48a6 100644
--- a/drivers/crypto/sahara.c
+++ b/drivers/crypto/sahara.c
@@ -559,7 +559,7 @@ static int sahara_aes_process(struct skcipher_request *req)
struct sahara_ctx *ctx;
struct sahara_aes_reqctx *rctx;
int ret;
- unsigned long timeout;
+ unsigned long time_left;
/* Request is ready to be dispatched by the device */
dev_dbg(dev->device,
@@ -597,15 +597,15 @@ static int sahara_aes_process(struct skcipher_request *req)
if (ret)
return -EINVAL;
- timeout = wait_for_completion_timeout(&dev->dma_completion,
- msecs_to_jiffies(SAHARA_TIMEOUT_MS));
+ time_left = wait_for_completion_timeout(&dev->dma_completion,
+ msecs_to_jiffies(SAHARA_TIMEOUT_MS));
dma_unmap_sg(dev->device, dev->out_sg, dev->nb_out_sg,
DMA_FROM_DEVICE);
dma_unmap_sg(dev->device, dev->in_sg, dev->nb_in_sg,
DMA_TO_DEVICE);
- if (!timeout) {
+ if (!time_left) {
dev_err(dev->device, "AES timeout\n");
return -ETIMEDOUT;
}
@@ -931,7 +931,7 @@ static int sahara_sha_process(struct ahash_request *req)
struct sahara_dev *dev = dev_ptr;
struct sahara_sha_reqctx *rctx = ahash_request_ctx(req);
int ret;
- unsigned long timeout;
+ unsigned long time_left;
ret = sahara_sha_prepare_request(req);
if (!ret)
@@ -963,14 +963,14 @@ static int sahara_sha_process(struct ahash_request *req)
sahara_write(dev, dev->hw_phys_desc[0], SAHARA_REG_DAR);
- timeout = wait_for_completion_timeout(&dev->dma_completion,
- msecs_to_jiffies(SAHARA_TIMEOUT_MS));
+ time_left = wait_for_completion_timeout(&dev->dma_completion,
+ msecs_to_jiffies(SAHARA_TIMEOUT_MS));
if (rctx->sg_in_idx)
dma_unmap_sg(dev->device, dev->in_sg, dev->nb_in_sg,
DMA_TO_DEVICE);
- if (!timeout) {
+ if (!time_left) {
dev_err(dev->device, "SHA timeout\n");
return -ETIMEDOUT;
}
diff --git a/drivers/crypto/starfive/Kconfig b/drivers/crypto/starfive/Kconfig
index cb59357b58b2..0fe389e9f932 100644
--- a/drivers/crypto/starfive/Kconfig
+++ b/drivers/crypto/starfive/Kconfig
@@ -14,6 +14,10 @@ config CRYPTO_DEV_JH7110
select CRYPTO_RSA
select CRYPTO_AES
select CRYPTO_CCM
+ select CRYPTO_GCM
+ select CRYPTO_ECB
+ select CRYPTO_CBC
+ select CRYPTO_CTR
help
Support for StarFive JH7110 crypto hardware acceleration engine.
This module provides acceleration for public key algo,
diff --git a/drivers/crypto/starfive/jh7110-aes.c b/drivers/crypto/starfive/jh7110-aes.c
index 1ac15cc4ef3c..86a1a1fa9f8f 100644
--- a/drivers/crypto/starfive/jh7110-aes.c
+++ b/drivers/crypto/starfive/jh7110-aes.c
@@ -78,7 +78,7 @@ static inline int is_gcm(struct starfive_cryp_dev *cryp)
return (cryp->flags & FLG_MODE_MASK) == STARFIVE_AES_MODE_GCM;
}
-static inline int is_encrypt(struct starfive_cryp_dev *cryp)
+static inline bool is_encrypt(struct starfive_cryp_dev *cryp)
{
return cryp->flags & FLG_ENCRYPT;
}
@@ -103,16 +103,6 @@ static void starfive_aes_aead_hw_start(struct starfive_cryp_ctx *ctx, u32 hw_mod
}
}
-static inline void starfive_aes_set_ivlen(struct starfive_cryp_ctx *ctx)
-{
- struct starfive_cryp_dev *cryp = ctx->cryp;
-
- if (is_gcm(cryp))
- writel(GCM_AES_IV_SIZE, cryp->base + STARFIVE_AES_IVLEN);
- else
- writel(AES_BLOCK_SIZE, cryp->base + STARFIVE_AES_IVLEN);
-}
-
static inline void starfive_aes_set_alen(struct starfive_cryp_ctx *ctx)
{
struct starfive_cryp_dev *cryp = ctx->cryp;
@@ -261,7 +251,6 @@ static int starfive_aes_hw_init(struct starfive_cryp_ctx *ctx)
rctx->csr.aes.mode = hw_mode;
rctx->csr.aes.cmode = !is_encrypt(cryp);
- rctx->csr.aes.ie = 1;
rctx->csr.aes.stmode = STARFIVE_AES_MODE_XFB_1;
if (cryp->side_chan) {
@@ -279,7 +268,7 @@ static int starfive_aes_hw_init(struct starfive_cryp_ctx *ctx)
case STARFIVE_AES_MODE_GCM:
starfive_aes_set_alen(ctx);
starfive_aes_set_mlen(ctx);
- starfive_aes_set_ivlen(ctx);
+ writel(GCM_AES_IV_SIZE, cryp->base + STARFIVE_AES_IVLEN);
starfive_aes_aead_hw_start(ctx, hw_mode);
starfive_aes_write_iv(ctx, (void *)cryp->req.areq->iv);
break;
@@ -300,52 +289,49 @@ static int starfive_aes_hw_init(struct starfive_cryp_ctx *ctx)
return cryp->err;
}
-static int starfive_aes_read_authtag(struct starfive_cryp_dev *cryp)
+static int starfive_aes_read_authtag(struct starfive_cryp_ctx *ctx)
{
- int i, start_addr;
+ struct starfive_cryp_dev *cryp = ctx->cryp;
+ struct starfive_cryp_request_ctx *rctx = ctx->rctx;
+ int i;
if (starfive_aes_wait_busy(cryp))
return dev_err_probe(cryp->dev, -ETIMEDOUT,
"Timeout waiting for tag generation.");
- start_addr = STARFIVE_AES_NONCE0;
-
- if (is_gcm(cryp))
- for (i = 0; i < AES_BLOCK_32; i++, start_addr += 4)
- cryp->tag_out[i] = readl(cryp->base + start_addr);
- else
+ if ((cryp->flags & FLG_MODE_MASK) == STARFIVE_AES_MODE_GCM) {
+ cryp->tag_out[0] = readl(cryp->base + STARFIVE_AES_NONCE0);
+ cryp->tag_out[1] = readl(cryp->base + STARFIVE_AES_NONCE1);
+ cryp->tag_out[2] = readl(cryp->base + STARFIVE_AES_NONCE2);
+ cryp->tag_out[3] = readl(cryp->base + STARFIVE_AES_NONCE3);
+ } else {
for (i = 0; i < AES_BLOCK_32; i++)
cryp->tag_out[i] = readl(cryp->base + STARFIVE_AES_AESDIO0R);
+ }
if (is_encrypt(cryp)) {
- scatterwalk_copychunks(cryp->tag_out, &cryp->out_walk, cryp->authsize, 1);
+ scatterwalk_map_and_copy(cryp->tag_out, rctx->out_sg,
+ cryp->total_in, cryp->authsize, 1);
} else {
- scatterwalk_copychunks(cryp->tag_in, &cryp->in_walk, cryp->authsize, 0);
-
if (crypto_memneq(cryp->tag_in, cryp->tag_out, cryp->authsize))
- return dev_err_probe(cryp->dev, -EBADMSG, "Failed tag verification\n");
+ return -EBADMSG;
}
return 0;
}
-static void starfive_aes_finish_req(struct starfive_cryp_dev *cryp)
+static void starfive_aes_finish_req(struct starfive_cryp_ctx *ctx)
{
- union starfive_aes_csr csr;
+ struct starfive_cryp_dev *cryp = ctx->cryp;
int err = cryp->err;
if (!err && cryp->authsize)
- err = starfive_aes_read_authtag(cryp);
+ err = starfive_aes_read_authtag(ctx);
if (!err && ((cryp->flags & FLG_MODE_MASK) == STARFIVE_AES_MODE_CBC ||
(cryp->flags & FLG_MODE_MASK) == STARFIVE_AES_MODE_CTR))
starfive_aes_get_iv(cryp, (void *)cryp->req.sreq->iv);
- /* reset irq flags*/
- csr.v = 0;
- csr.aesrst = 1;
- writel(csr.v, cryp->base + STARFIVE_AES_CSR);
-
if (cryp->authsize)
crypto_finalize_aead_request(cryp->engine, cryp->req.areq, err);
else
@@ -353,39 +339,6 @@ static void starfive_aes_finish_req(struct starfive_cryp_dev *cryp)
err);
}
-void starfive_aes_done_task(unsigned long param)
-{
- struct starfive_cryp_dev *cryp = (struct starfive_cryp_dev *)param;
- u32 block[AES_BLOCK_32];
- u32 stat;
- int i;
-
- for (i = 0; i < AES_BLOCK_32; i++)
- block[i] = readl(cryp->base + STARFIVE_AES_AESDIO0R);
-
- scatterwalk_copychunks(block, &cryp->out_walk, min_t(size_t, AES_BLOCK_SIZE,
- cryp->total_out), 1);
-
- cryp->total_out -= min_t(size_t, AES_BLOCK_SIZE, cryp->total_out);
-
- if (!cryp->total_out) {
- starfive_aes_finish_req(cryp);
- return;
- }
-
- memset(block, 0, AES_BLOCK_SIZE);
- scatterwalk_copychunks(block, &cryp->in_walk, min_t(size_t, AES_BLOCK_SIZE,
- cryp->total_in), 0);
- cryp->total_in -= min_t(size_t, AES_BLOCK_SIZE, cryp->total_in);
-
- for (i = 0; i < AES_BLOCK_32; i++)
- writel(block[i], cryp->base + STARFIVE_AES_AESDIO0R);
-
- stat = readl(cryp->base + STARFIVE_IE_MASK_OFFSET);
- stat &= ~STARFIVE_IE_MASK_AES_DONE;
- writel(stat, cryp->base + STARFIVE_IE_MASK_OFFSET);
-}
-
static int starfive_aes_gcm_write_adata(struct starfive_cryp_ctx *ctx)
{
struct starfive_cryp_dev *cryp = ctx->cryp;
@@ -451,60 +404,165 @@ static int starfive_aes_ccm_write_adata(struct starfive_cryp_ctx *ctx)
return 0;
}
-static int starfive_aes_prepare_req(struct skcipher_request *req,
- struct aead_request *areq)
+static void starfive_aes_dma_done(void *param)
{
- struct starfive_cryp_ctx *ctx;
- struct starfive_cryp_request_ctx *rctx;
- struct starfive_cryp_dev *cryp;
+ struct starfive_cryp_dev *cryp = param;
- if (!req && !areq)
- return -EINVAL;
+ complete(&cryp->dma_done);
+}
- ctx = req ? crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)) :
- crypto_aead_ctx(crypto_aead_reqtfm(areq));
+static void starfive_aes_dma_init(struct starfive_cryp_dev *cryp)
+{
+ cryp->cfg_in.direction = DMA_MEM_TO_DEV;
+ cryp->cfg_in.src_addr_width = DMA_SLAVE_BUSWIDTH_16_BYTES;
+ cryp->cfg_in.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+ cryp->cfg_in.src_maxburst = cryp->dma_maxburst;
+ cryp->cfg_in.dst_maxburst = cryp->dma_maxburst;
+ cryp->cfg_in.dst_addr = cryp->phys_base + STARFIVE_ALG_FIFO_OFFSET;
- cryp = ctx->cryp;
- rctx = req ? skcipher_request_ctx(req) : aead_request_ctx(areq);
+ dmaengine_slave_config(cryp->tx, &cryp->cfg_in);
- if (req) {
- cryp->req.sreq = req;
- cryp->total_in = req->cryptlen;
- cryp->total_out = req->cryptlen;
- cryp->assoclen = 0;
- cryp->authsize = 0;
- } else {
- cryp->req.areq = areq;
- cryp->assoclen = areq->assoclen;
- cryp->authsize = crypto_aead_authsize(crypto_aead_reqtfm(areq));
- if (is_encrypt(cryp)) {
- cryp->total_in = areq->cryptlen;
- cryp->total_out = areq->cryptlen;
- } else {
- cryp->total_in = areq->cryptlen - cryp->authsize;
- cryp->total_out = cryp->total_in;
- }
- }
+ cryp->cfg_out.direction = DMA_DEV_TO_MEM;
+ cryp->cfg_out.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+ cryp->cfg_out.dst_addr_width = DMA_SLAVE_BUSWIDTH_16_BYTES;
+ cryp->cfg_out.src_maxburst = 4;
+ cryp->cfg_out.dst_maxburst = 4;
+ cryp->cfg_out.src_addr = cryp->phys_base + STARFIVE_ALG_FIFO_OFFSET;
- rctx->in_sg = req ? req->src : areq->src;
- scatterwalk_start(&cryp->in_walk, rctx->in_sg);
+ dmaengine_slave_config(cryp->rx, &cryp->cfg_out);
- rctx->out_sg = req ? req->dst : areq->dst;
- scatterwalk_start(&cryp->out_walk, rctx->out_sg);
+ init_completion(&cryp->dma_done);
+}
- if (cryp->assoclen) {
- rctx->adata = kzalloc(cryp->assoclen + AES_BLOCK_SIZE, GFP_KERNEL);
- if (!rctx->adata)
- return dev_err_probe(cryp->dev, -ENOMEM,
- "Failed to alloc memory for adata");
+static int starfive_aes_dma_xfer(struct starfive_cryp_dev *cryp,
+ struct scatterlist *src,
+ struct scatterlist *dst,
+ int len)
+{
+ struct dma_async_tx_descriptor *in_desc, *out_desc;
+ union starfive_alg_cr alg_cr;
+ int ret = 0, in_save, out_save;
+
+ alg_cr.v = 0;
+ alg_cr.start = 1;
+ alg_cr.aes_dma_en = 1;
+ writel(alg_cr.v, cryp->base + STARFIVE_ALG_CR_OFFSET);
+
+ in_save = sg_dma_len(src);
+ out_save = sg_dma_len(dst);
- scatterwalk_copychunks(rctx->adata, &cryp->in_walk, cryp->assoclen, 0);
- scatterwalk_copychunks(NULL, &cryp->out_walk, cryp->assoclen, 2);
+ writel(ALIGN(len, AES_BLOCK_SIZE), cryp->base + STARFIVE_DMA_IN_LEN_OFFSET);
+ writel(ALIGN(len, AES_BLOCK_SIZE), cryp->base + STARFIVE_DMA_OUT_LEN_OFFSET);
+
+ sg_dma_len(src) = ALIGN(len, AES_BLOCK_SIZE);
+ sg_dma_len(dst) = ALIGN(len, AES_BLOCK_SIZE);
+
+ out_desc = dmaengine_prep_slave_sg(cryp->rx, dst, 1, DMA_DEV_TO_MEM,
+ DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+ if (!out_desc) {
+ ret = -EINVAL;
+ goto dma_err;
}
- ctx->rctx = rctx;
+ out_desc->callback = starfive_aes_dma_done;
+ out_desc->callback_param = cryp;
+
+ reinit_completion(&cryp->dma_done);
+ dmaengine_submit(out_desc);
+ dma_async_issue_pending(cryp->rx);
+
+ in_desc = dmaengine_prep_slave_sg(cryp->tx, src, 1, DMA_MEM_TO_DEV,
+ DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+ if (!in_desc) {
+ ret = -EINVAL;
+ goto dma_err;
+ }
+
+ dmaengine_submit(in_desc);
+ dma_async_issue_pending(cryp->tx);
+
+ if (!wait_for_completion_timeout(&cryp->dma_done,
+ msecs_to_jiffies(1000)))
+ ret = -ETIMEDOUT;
+
+dma_err:
+ sg_dma_len(src) = in_save;
+ sg_dma_len(dst) = out_save;
+
+ alg_cr.v = 0;
+ alg_cr.clear = 1;
+ writel(alg_cr.v, cryp->base + STARFIVE_ALG_CR_OFFSET);
+
+ return ret;
+}
+
+static int starfive_aes_map_sg(struct starfive_cryp_dev *cryp,
+ struct scatterlist *src,
+ struct scatterlist *dst)
+{
+ struct scatterlist *stsg, *dtsg;
+ struct scatterlist _src[2], _dst[2];
+ unsigned int remain = cryp->total_in;
+ unsigned int len, src_nents, dst_nents;
+ int ret;
+
+ if (src == dst) {
+ for (stsg = src, dtsg = dst; remain > 0;
+ stsg = sg_next(stsg), dtsg = sg_next(dtsg)) {
+ src_nents = dma_map_sg(cryp->dev, stsg, 1, DMA_BIDIRECTIONAL);
+ if (src_nents == 0)
+ return dev_err_probe(cryp->dev, -ENOMEM,
+ "dma_map_sg error\n");
+
+ dst_nents = src_nents;
+ len = min(sg_dma_len(stsg), remain);
+
+ ret = starfive_aes_dma_xfer(cryp, stsg, dtsg, len);
+ dma_unmap_sg(cryp->dev, stsg, 1, DMA_BIDIRECTIONAL);
+ if (ret)
+ return ret;
+
+ remain -= len;
+ }
+ } else {
+ for (stsg = src, dtsg = dst;;) {
+ src_nents = dma_map_sg(cryp->dev, stsg, 1, DMA_TO_DEVICE);
+ if (src_nents == 0)
+ return dev_err_probe(cryp->dev, -ENOMEM,
+ "dma_map_sg src error\n");
+
+ dst_nents = dma_map_sg(cryp->dev, dtsg, 1, DMA_FROM_DEVICE);
+ if (dst_nents == 0)
+ return dev_err_probe(cryp->dev, -ENOMEM,
+ "dma_map_sg dst error\n");
+
+ len = min(sg_dma_len(stsg), sg_dma_len(dtsg));
+ len = min(len, remain);
+
+ ret = starfive_aes_dma_xfer(cryp, stsg, dtsg, len);
+ dma_unmap_sg(cryp->dev, stsg, 1, DMA_TO_DEVICE);
+ dma_unmap_sg(cryp->dev, dtsg, 1, DMA_FROM_DEVICE);
+ if (ret)
+ return ret;
+
+ remain -= len;
+ if (remain == 0)
+ break;
+
+ if (sg_dma_len(stsg) - len) {
+ stsg = scatterwalk_ffwd(_src, stsg, len);
+ dtsg = sg_next(dtsg);
+ } else if (sg_dma_len(dtsg) - len) {
+ dtsg = scatterwalk_ffwd(_dst, dtsg, len);
+ stsg = sg_next(stsg);
+ } else {
+ stsg = sg_next(stsg);
+ dtsg = sg_next(dtsg);
+ }
+ }
+ }
- return starfive_aes_hw_init(ctx);
+ return 0;
}
static int starfive_aes_do_one_req(struct crypto_engine *engine, void *areq)
@@ -513,35 +571,42 @@ static int starfive_aes_do_one_req(struct crypto_engine *engine, void *areq)
container_of(areq, struct skcipher_request, base);
struct starfive_cryp_ctx *ctx =
crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
+ struct starfive_cryp_request_ctx *rctx = skcipher_request_ctx(req);
struct starfive_cryp_dev *cryp = ctx->cryp;
- u32 block[AES_BLOCK_32];
- u32 stat;
- int err;
- int i;
+ int ret;
- err = starfive_aes_prepare_req(req, NULL);
- if (err)
- return err;
+ cryp->req.sreq = req;
+ cryp->total_in = req->cryptlen;
+ cryp->total_out = req->cryptlen;
+ cryp->assoclen = 0;
+ cryp->authsize = 0;
- /*
- * Write first plain/ciphertext block to start the module
- * then let irq tasklet handle the rest of the data blocks.
- */
- scatterwalk_copychunks(block, &cryp->in_walk, min_t(size_t, AES_BLOCK_SIZE,
- cryp->total_in), 0);
- cryp->total_in -= min_t(size_t, AES_BLOCK_SIZE, cryp->total_in);
+ rctx->in_sg = req->src;
+ rctx->out_sg = req->dst;
+
+ ctx->rctx = rctx;
+
+ ret = starfive_aes_hw_init(ctx);
+ if (ret)
+ return ret;
- for (i = 0; i < AES_BLOCK_32; i++)
- writel(block[i], cryp->base + STARFIVE_AES_AESDIO0R);
+ if (!cryp->total_in)
+ goto finish_req;
- stat = readl(cryp->base + STARFIVE_IE_MASK_OFFSET);
- stat &= ~STARFIVE_IE_MASK_AES_DONE;
- writel(stat, cryp->base + STARFIVE_IE_MASK_OFFSET);
+ starfive_aes_dma_init(cryp);
+
+ ret = starfive_aes_map_sg(cryp, rctx->in_sg, rctx->out_sg);
+ if (ret)
+ return ret;
+
+finish_req:
+ starfive_aes_finish_req(ctx);
return 0;
}
-static int starfive_aes_init_tfm(struct crypto_skcipher *tfm)
+static int starfive_aes_init_tfm(struct crypto_skcipher *tfm,
+ const char *alg_name)
{
struct starfive_cryp_ctx *ctx = crypto_skcipher_ctx(tfm);
@@ -549,12 +614,26 @@ static int starfive_aes_init_tfm(struct crypto_skcipher *tfm)
if (!ctx->cryp)
return -ENODEV;
+ ctx->skcipher_fbk = crypto_alloc_skcipher(alg_name, 0,
+ CRYPTO_ALG_NEED_FALLBACK);
+ if (IS_ERR(ctx->skcipher_fbk))
+ return dev_err_probe(ctx->cryp->dev, PTR_ERR(ctx->skcipher_fbk),
+ "%s() failed to allocate fallback for %s\n",
+ __func__, alg_name);
+
crypto_skcipher_set_reqsize(tfm, sizeof(struct starfive_cryp_request_ctx) +
- sizeof(struct skcipher_request));
+ crypto_skcipher_reqsize(ctx->skcipher_fbk));
return 0;
}
+static void starfive_aes_exit_tfm(struct crypto_skcipher *tfm)
+{
+ struct starfive_cryp_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+ crypto_free_skcipher(ctx->skcipher_fbk);
+}
+
static int starfive_aes_aead_do_one_req(struct crypto_engine *engine, void *areq)
{
struct aead_request *req =
@@ -562,79 +641,99 @@ static int starfive_aes_aead_do_one_req(struct crypto_engine *engine, void *areq
struct starfive_cryp_ctx *ctx =
crypto_aead_ctx(crypto_aead_reqtfm(req));
struct starfive_cryp_dev *cryp = ctx->cryp;
- struct starfive_cryp_request_ctx *rctx;
- u32 block[AES_BLOCK_32];
- u32 stat;
- int err;
- int i;
+ struct starfive_cryp_request_ctx *rctx = aead_request_ctx(req);
+ struct scatterlist _src[2], _dst[2];
+ int ret;
+
+ cryp->req.areq = req;
+ cryp->assoclen = req->assoclen;
+ cryp->authsize = crypto_aead_authsize(crypto_aead_reqtfm(req));
+
+ rctx->in_sg = scatterwalk_ffwd(_src, req->src, cryp->assoclen);
+ if (req->src == req->dst)
+ rctx->out_sg = rctx->in_sg;
+ else
+ rctx->out_sg = scatterwalk_ffwd(_dst, req->dst, cryp->assoclen);
+
+ if (is_encrypt(cryp)) {
+ cryp->total_in = req->cryptlen;
+ cryp->total_out = req->cryptlen;
+ } else {
+ cryp->total_in = req->cryptlen - cryp->authsize;
+ cryp->total_out = cryp->total_in;
+ scatterwalk_map_and_copy(cryp->tag_in, req->src,
+ cryp->total_in + cryp->assoclen,
+ cryp->authsize, 0);
+ }
- err = starfive_aes_prepare_req(NULL, req);
- if (err)
- return err;
+ if (cryp->assoclen) {
+ rctx->adata = kzalloc(cryp->assoclen + AES_BLOCK_SIZE, GFP_KERNEL);
+ if (!rctx->adata)
+ return dev_err_probe(cryp->dev, -ENOMEM,
+ "Failed to alloc memory for adata");
+
+ if (sg_copy_to_buffer(req->src, sg_nents_for_len(req->src, cryp->assoclen),
+ rctx->adata, cryp->assoclen) != cryp->assoclen)
+ return -EINVAL;
+ }
+
+ if (cryp->total_in)
+ sg_zero_buffer(rctx->in_sg, sg_nents(rctx->in_sg),
+ sg_dma_len(rctx->in_sg) - cryp->total_in,
+ cryp->total_in);
- rctx = ctx->rctx;
+ ctx->rctx = rctx;
+
+ ret = starfive_aes_hw_init(ctx);
+ if (ret)
+ return ret;
if (!cryp->assoclen)
goto write_text;
if ((cryp->flags & FLG_MODE_MASK) == STARFIVE_AES_MODE_CCM)
- cryp->err = starfive_aes_ccm_write_adata(ctx);
+ ret = starfive_aes_ccm_write_adata(ctx);
else
- cryp->err = starfive_aes_gcm_write_adata(ctx);
+ ret = starfive_aes_gcm_write_adata(ctx);
kfree(rctx->adata);
- if (cryp->err)
- return cryp->err;
+ if (ret)
+ return ret;
write_text:
if (!cryp->total_in)
goto finish_req;
- /*
- * Write first plain/ciphertext block to start the module
- * then let irq tasklet handle the rest of the data blocks.
- */
- scatterwalk_copychunks(block, &cryp->in_walk, min_t(size_t, AES_BLOCK_SIZE,
- cryp->total_in), 0);
- cryp->total_in -= min_t(size_t, AES_BLOCK_SIZE, cryp->total_in);
-
- for (i = 0; i < AES_BLOCK_32; i++)
- writel(block[i], cryp->base + STARFIVE_AES_AESDIO0R);
-
- stat = readl(cryp->base + STARFIVE_IE_MASK_OFFSET);
- stat &= ~STARFIVE_IE_MASK_AES_DONE;
- writel(stat, cryp->base + STARFIVE_IE_MASK_OFFSET);
+ starfive_aes_dma_init(cryp);
- return 0;
+ ret = starfive_aes_map_sg(cryp, rctx->in_sg, rctx->out_sg);
+ if (ret)
+ return ret;
finish_req:
- starfive_aes_finish_req(cryp);
+ starfive_aes_finish_req(ctx);
return 0;
}
-static int starfive_aes_aead_init_tfm(struct crypto_aead *tfm)
+static int starfive_aes_aead_init_tfm(struct crypto_aead *tfm,
+ const char *alg_name)
{
struct starfive_cryp_ctx *ctx = crypto_aead_ctx(tfm);
- struct starfive_cryp_dev *cryp = ctx->cryp;
- struct crypto_tfm *aead = crypto_aead_tfm(tfm);
- struct crypto_alg *alg = aead->__crt_alg;
ctx->cryp = starfive_cryp_find_dev(ctx);
if (!ctx->cryp)
return -ENODEV;
- if (alg->cra_flags & CRYPTO_ALG_NEED_FALLBACK) {
- ctx->aead_fbk = crypto_alloc_aead(alg->cra_name, 0,
- CRYPTO_ALG_NEED_FALLBACK);
- if (IS_ERR(ctx->aead_fbk))
- return dev_err_probe(cryp->dev, PTR_ERR(ctx->aead_fbk),
- "%s() failed to allocate fallback for %s\n",
- __func__, alg->cra_name);
- }
+ ctx->aead_fbk = crypto_alloc_aead(alg_name, 0,
+ CRYPTO_ALG_NEED_FALLBACK);
+ if (IS_ERR(ctx->aead_fbk))
+ return dev_err_probe(ctx->cryp->dev, PTR_ERR(ctx->aead_fbk),
+ "%s() failed to allocate fallback for %s\n",
+ __func__, alg_name);
- crypto_aead_set_reqsize(tfm, sizeof(struct starfive_cryp_ctx) +
- sizeof(struct aead_request));
+ crypto_aead_set_reqsize(tfm, sizeof(struct starfive_cryp_request_ctx) +
+ crypto_aead_reqsize(ctx->aead_fbk));
return 0;
}
@@ -646,6 +745,46 @@ static void starfive_aes_aead_exit_tfm(struct crypto_aead *tfm)
crypto_free_aead(ctx->aead_fbk);
}
+static bool starfive_aes_check_unaligned(struct starfive_cryp_dev *cryp,
+ struct scatterlist *src,
+ struct scatterlist *dst)
+{
+ struct scatterlist *tsg;
+ int i;
+
+ for_each_sg(src, tsg, sg_nents(src), i)
+ if (!IS_ALIGNED(tsg->offset, sizeof(u32)) ||
+ (!IS_ALIGNED(tsg->length, AES_BLOCK_SIZE) &&
+ !sg_is_last(tsg)))
+ return true;
+
+ if (src != dst)
+ for_each_sg(dst, tsg, sg_nents(dst), i)
+ if (!IS_ALIGNED(tsg->offset, sizeof(u32)) ||
+ (!IS_ALIGNED(tsg->length, AES_BLOCK_SIZE) &&
+ !sg_is_last(tsg)))
+ return true;
+
+ return false;
+}
+
+static int starfive_aes_do_fallback(struct skcipher_request *req, bool enc)
+{
+ struct starfive_cryp_ctx *ctx =
+ crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
+ struct skcipher_request *subreq = skcipher_request_ctx(req);
+
+ skcipher_request_set_tfm(subreq, ctx->skcipher_fbk);
+ skcipher_request_set_callback(subreq, req->base.flags,
+ req->base.complete,
+ req->base.data);
+ skcipher_request_set_crypt(subreq, req->src, req->dst,
+ req->cryptlen, req->iv);
+
+ return enc ? crypto_skcipher_encrypt(subreq) :
+ crypto_skcipher_decrypt(subreq);
+}
+
static int starfive_aes_crypt(struct skcipher_request *req, unsigned long flags)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
@@ -660,32 +799,54 @@ static int starfive_aes_crypt(struct skcipher_request *req, unsigned long flags)
if (req->cryptlen & blocksize_align)
return -EINVAL;
+ if (starfive_aes_check_unaligned(cryp, req->src, req->dst))
+ return starfive_aes_do_fallback(req, is_encrypt(cryp));
+
return crypto_transfer_skcipher_request_to_engine(cryp->engine, req);
}
+static int starfive_aes_aead_do_fallback(struct aead_request *req, bool enc)
+{
+ struct starfive_cryp_ctx *ctx =
+ crypto_aead_ctx(crypto_aead_reqtfm(req));
+ struct aead_request *subreq = aead_request_ctx(req);
+
+ aead_request_set_tfm(subreq, ctx->aead_fbk);
+ aead_request_set_callback(subreq, req->base.flags,
+ req->base.complete,
+ req->base.data);
+ aead_request_set_crypt(subreq, req->src, req->dst,
+ req->cryptlen, req->iv);
+ aead_request_set_ad(subreq, req->assoclen);
+
+ return enc ? crypto_aead_encrypt(subreq) :
+ crypto_aead_decrypt(subreq);
+}
+
static int starfive_aes_aead_crypt(struct aead_request *req, unsigned long flags)
{
struct starfive_cryp_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req));
struct starfive_cryp_dev *cryp = ctx->cryp;
+ struct scatterlist *src, *dst, _src[2], _dst[2];
cryp->flags = flags;
- /*
- * HW engine could not perform CCM tag verification on
- * non-blocksize aligned text, use fallback algo instead
+ /* aes-ccm does not support tag verification for non-aligned text,
+ * use fallback for ccm decryption instead.
*/
- if (ctx->aead_fbk && !is_encrypt(cryp)) {
- struct aead_request *subreq = aead_request_ctx(req);
+ if (((cryp->flags & FLG_MODE_MASK) == STARFIVE_AES_MODE_CCM) &&
+ !is_encrypt(cryp))
+ return starfive_aes_aead_do_fallback(req, 0);
- aead_request_set_tfm(subreq, ctx->aead_fbk);
- aead_request_set_callback(subreq, req->base.flags,
- req->base.complete, req->base.data);
- aead_request_set_crypt(subreq, req->src,
- req->dst, req->cryptlen, req->iv);
- aead_request_set_ad(subreq, req->assoclen);
+ src = scatterwalk_ffwd(_src, req->src, req->assoclen);
- return crypto_aead_decrypt(subreq);
- }
+ if (req->src == req->dst)
+ dst = src;
+ else
+ dst = scatterwalk_ffwd(_dst, req->dst, req->assoclen);
+
+ if (starfive_aes_check_unaligned(cryp, src, dst))
+ return starfive_aes_aead_do_fallback(req, is_encrypt(cryp));
return crypto_transfer_aead_request_to_engine(cryp->engine, req);
}
@@ -706,7 +867,7 @@ static int starfive_aes_setkey(struct crypto_skcipher *tfm, const u8 *key,
memcpy(ctx->key, key, keylen);
ctx->keylen = keylen;
- return 0;
+ return crypto_skcipher_setkey(ctx->skcipher_fbk, key, keylen);
}
static int starfive_aes_aead_setkey(struct crypto_aead *tfm, const u8 *key,
@@ -725,16 +886,20 @@ static int starfive_aes_aead_setkey(struct crypto_aead *tfm, const u8 *key,
memcpy(ctx->key, key, keylen);
ctx->keylen = keylen;
- if (ctx->aead_fbk)
- return crypto_aead_setkey(ctx->aead_fbk, key, keylen);
-
- return 0;
+ return crypto_aead_setkey(ctx->aead_fbk, key, keylen);
}
static int starfive_aes_gcm_setauthsize(struct crypto_aead *tfm,
unsigned int authsize)
{
- return crypto_gcm_check_authsize(authsize);
+ struct starfive_cryp_ctx *ctx = crypto_aead_ctx(tfm);
+ int ret;
+
+ ret = crypto_gcm_check_authsize(authsize);
+ if (ret)
+ return ret;
+
+ return crypto_aead_setauthsize(ctx->aead_fbk, authsize);
}
static int starfive_aes_ccm_setauthsize(struct crypto_aead *tfm,
@@ -820,9 +985,35 @@ static int starfive_aes_ccm_decrypt(struct aead_request *req)
return starfive_aes_aead_crypt(req, STARFIVE_AES_MODE_CCM);
}
+static int starfive_aes_ecb_init_tfm(struct crypto_skcipher *tfm)
+{
+ return starfive_aes_init_tfm(tfm, "ecb(aes-generic)");
+}
+
+static int starfive_aes_cbc_init_tfm(struct crypto_skcipher *tfm)
+{
+ return starfive_aes_init_tfm(tfm, "cbc(aes-generic)");
+}
+
+static int starfive_aes_ctr_init_tfm(struct crypto_skcipher *tfm)
+{
+ return starfive_aes_init_tfm(tfm, "ctr(aes-generic)");
+}
+
+static int starfive_aes_ccm_init_tfm(struct crypto_aead *tfm)
+{
+ return starfive_aes_aead_init_tfm(tfm, "ccm_base(ctr(aes-generic),cbcmac(aes-generic))");
+}
+
+static int starfive_aes_gcm_init_tfm(struct crypto_aead *tfm)
+{
+ return starfive_aes_aead_init_tfm(tfm, "gcm_base(ctr(aes-generic),ghash-generic)");
+}
+
static struct skcipher_engine_alg skcipher_algs[] = {
{
- .base.init = starfive_aes_init_tfm,
+ .base.init = starfive_aes_ecb_init_tfm,
+ .base.exit = starfive_aes_exit_tfm,
.base.setkey = starfive_aes_setkey,
.base.encrypt = starfive_aes_ecb_encrypt,
.base.decrypt = starfive_aes_ecb_decrypt,
@@ -832,7 +1023,8 @@ static struct skcipher_engine_alg skcipher_algs[] = {
.cra_name = "ecb(aes)",
.cra_driver_name = "starfive-ecb-aes",
.cra_priority = 200,
- .cra_flags = CRYPTO_ALG_ASYNC,
+ .cra_flags = CRYPTO_ALG_ASYNC |
+ CRYPTO_ALG_NEED_FALLBACK,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct starfive_cryp_ctx),
.cra_alignmask = 0xf,
@@ -842,7 +1034,8 @@ static struct skcipher_engine_alg skcipher_algs[] = {
.do_one_request = starfive_aes_do_one_req,
},
}, {
- .base.init = starfive_aes_init_tfm,
+ .base.init = starfive_aes_cbc_init_tfm,
+ .base.exit = starfive_aes_exit_tfm,
.base.setkey = starfive_aes_setkey,
.base.encrypt = starfive_aes_cbc_encrypt,
.base.decrypt = starfive_aes_cbc_decrypt,
@@ -853,7 +1046,8 @@ static struct skcipher_engine_alg skcipher_algs[] = {
.cra_name = "cbc(aes)",
.cra_driver_name = "starfive-cbc-aes",
.cra_priority = 200,
- .cra_flags = CRYPTO_ALG_ASYNC,
+ .cra_flags = CRYPTO_ALG_ASYNC |
+ CRYPTO_ALG_NEED_FALLBACK,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct starfive_cryp_ctx),
.cra_alignmask = 0xf,
@@ -863,7 +1057,8 @@ static struct skcipher_engine_alg skcipher_algs[] = {
.do_one_request = starfive_aes_do_one_req,
},
}, {
- .base.init = starfive_aes_init_tfm,
+ .base.init = starfive_aes_ctr_init_tfm,
+ .base.exit = starfive_aes_exit_tfm,
.base.setkey = starfive_aes_setkey,
.base.encrypt = starfive_aes_ctr_encrypt,
.base.decrypt = starfive_aes_ctr_decrypt,
@@ -874,7 +1069,8 @@ static struct skcipher_engine_alg skcipher_algs[] = {
.cra_name = "ctr(aes)",
.cra_driver_name = "starfive-ctr-aes",
.cra_priority = 200,
- .cra_flags = CRYPTO_ALG_ASYNC,
+ .cra_flags = CRYPTO_ALG_ASYNC |
+ CRYPTO_ALG_NEED_FALLBACK,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct starfive_cryp_ctx),
.cra_alignmask = 0xf,
@@ -892,7 +1088,7 @@ static struct aead_engine_alg aead_algs[] = {
.base.setauthsize = starfive_aes_gcm_setauthsize,
.base.encrypt = starfive_aes_gcm_encrypt,
.base.decrypt = starfive_aes_gcm_decrypt,
- .base.init = starfive_aes_aead_init_tfm,
+ .base.init = starfive_aes_gcm_init_tfm,
.base.exit = starfive_aes_aead_exit_tfm,
.base.ivsize = GCM_AES_IV_SIZE,
.base.maxauthsize = AES_BLOCK_SIZE,
@@ -900,7 +1096,8 @@ static struct aead_engine_alg aead_algs[] = {
.cra_name = "gcm(aes)",
.cra_driver_name = "starfive-gcm-aes",
.cra_priority = 200,
- .cra_flags = CRYPTO_ALG_ASYNC,
+ .cra_flags = CRYPTO_ALG_ASYNC |
+ CRYPTO_ALG_NEED_FALLBACK,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct starfive_cryp_ctx),
.cra_alignmask = 0xf,
@@ -914,7 +1111,7 @@ static struct aead_engine_alg aead_algs[] = {
.base.setauthsize = starfive_aes_ccm_setauthsize,
.base.encrypt = starfive_aes_ccm_encrypt,
.base.decrypt = starfive_aes_ccm_decrypt,
- .base.init = starfive_aes_aead_init_tfm,
+ .base.init = starfive_aes_ccm_init_tfm,
.base.exit = starfive_aes_aead_exit_tfm,
.base.ivsize = AES_BLOCK_SIZE,
.base.maxauthsize = AES_BLOCK_SIZE,
diff --git a/drivers/crypto/starfive/jh7110-cryp.c b/drivers/crypto/starfive/jh7110-cryp.c
index 425fddf3a8ab..e4dfed7ee0b0 100644
--- a/drivers/crypto/starfive/jh7110-cryp.c
+++ b/drivers/crypto/starfive/jh7110-cryp.c
@@ -89,34 +89,10 @@ static void starfive_dma_cleanup(struct starfive_cryp_dev *cryp)
dma_release_channel(cryp->rx);
}
-static irqreturn_t starfive_cryp_irq(int irq, void *priv)
-{
- u32 status;
- u32 mask;
- struct starfive_cryp_dev *cryp = (struct starfive_cryp_dev *)priv;
-
- mask = readl(cryp->base + STARFIVE_IE_MASK_OFFSET);
- status = readl(cryp->base + STARFIVE_IE_FLAG_OFFSET);
- if (status & STARFIVE_IE_FLAG_AES_DONE) {
- mask |= STARFIVE_IE_MASK_AES_DONE;
- writel(mask, cryp->base + STARFIVE_IE_MASK_OFFSET);
- tasklet_schedule(&cryp->aes_done);
- }
-
- if (status & STARFIVE_IE_FLAG_HASH_DONE) {
- mask |= STARFIVE_IE_MASK_HASH_DONE;
- writel(mask, cryp->base + STARFIVE_IE_MASK_OFFSET);
- tasklet_schedule(&cryp->hash_done);
- }
-
- return IRQ_HANDLED;
-}
-
static int starfive_cryp_probe(struct platform_device *pdev)
{
struct starfive_cryp_dev *cryp;
struct resource *res;
- int irq;
int ret;
cryp = devm_kzalloc(&pdev->dev, sizeof(*cryp), GFP_KERNEL);
@@ -131,9 +107,6 @@ static int starfive_cryp_probe(struct platform_device *pdev)
return dev_err_probe(&pdev->dev, PTR_ERR(cryp->base),
"Error remapping memory for platform device\n");
- tasklet_init(&cryp->aes_done, starfive_aes_done_task, (unsigned long)cryp);
- tasklet_init(&cryp->hash_done, starfive_hash_done_task, (unsigned long)cryp);
-
cryp->phys_base = res->start;
cryp->dma_maxburst = 32;
cryp->side_chan = side_chan;
@@ -153,16 +126,6 @@ static int starfive_cryp_probe(struct platform_device *pdev)
return dev_err_probe(&pdev->dev, PTR_ERR(cryp->rst),
"Error getting hardware reset line\n");
- irq = platform_get_irq(pdev, 0);
- if (irq < 0)
- return irq;
-
- ret = devm_request_irq(&pdev->dev, irq, starfive_cryp_irq, 0, pdev->name,
- (void *)cryp);
- if (ret)
- return dev_err_probe(&pdev->dev, ret,
- "Failed to register interrupt handler\n");
-
clk_prepare_enable(cryp->hclk);
clk_prepare_enable(cryp->ahb);
reset_control_deassert(cryp->rst);
@@ -219,9 +182,6 @@ err_dma_init:
clk_disable_unprepare(cryp->ahb);
reset_control_assert(cryp->rst);
- tasklet_kill(&cryp->aes_done);
- tasklet_kill(&cryp->hash_done);
-
return ret;
}
@@ -233,9 +193,6 @@ static void starfive_cryp_remove(struct platform_device *pdev)
starfive_hash_unregister_algs();
starfive_rsa_unregister_algs();
- tasklet_kill(&cryp->aes_done);
- tasklet_kill(&cryp->hash_done);
-
crypto_engine_stop(cryp->engine);
crypto_engine_exit(cryp->engine);
diff --git a/drivers/crypto/starfive/jh7110-cryp.h b/drivers/crypto/starfive/jh7110-cryp.h
index 6cdf6db5d904..494a74f52706 100644
--- a/drivers/crypto/starfive/jh7110-cryp.h
+++ b/drivers/crypto/starfive/jh7110-cryp.h
@@ -91,6 +91,7 @@ union starfive_hash_csr {
#define STARFIVE_HASH_KEY_DONE BIT(13)
u32 key_done :1;
u32 key_flag :1;
+#define STARFIVE_HASH_HMAC_DONE BIT(15)
u32 hmac_done :1;
#define STARFIVE_HASH_BUSY BIT(16)
u32 busy :1;
@@ -168,6 +169,7 @@ struct starfive_cryp_ctx {
struct crypto_akcipher *akcipher_fbk;
struct crypto_ahash *ahash_fbk;
struct crypto_aead *aead_fbk;
+ struct crypto_skcipher *skcipher_fbk;
};
struct starfive_cryp_dev {
@@ -185,11 +187,8 @@ struct starfive_cryp_dev {
struct dma_chan *rx;
struct dma_slave_config cfg_in;
struct dma_slave_config cfg_out;
- struct scatter_walk in_walk;
- struct scatter_walk out_walk;
struct crypto_engine *engine;
- struct tasklet_struct aes_done;
- struct tasklet_struct hash_done;
+ struct completion dma_done;
size_t assoclen;
size_t total_in;
size_t total_out;
@@ -236,7 +235,4 @@ void starfive_rsa_unregister_algs(void);
int starfive_aes_register_algs(void);
void starfive_aes_unregister_algs(void);
-
-void starfive_hash_done_task(unsigned long param);
-void starfive_aes_done_task(unsigned long param);
#endif
diff --git a/drivers/crypto/starfive/jh7110-hash.c b/drivers/crypto/starfive/jh7110-hash.c
index b6d1808012ca..2c60a1047bc3 100644
--- a/drivers/crypto/starfive/jh7110-hash.c
+++ b/drivers/crypto/starfive/jh7110-hash.c
@@ -36,15 +36,22 @@
#define STARFIVE_HASH_BUFLEN SHA512_BLOCK_SIZE
#define STARFIVE_HASH_RESET 0x2
-static inline int starfive_hash_wait_busy(struct starfive_cryp_ctx *ctx)
+static inline int starfive_hash_wait_busy(struct starfive_cryp_dev *cryp)
{
- struct starfive_cryp_dev *cryp = ctx->cryp;
u32 status;
return readl_relaxed_poll_timeout(cryp->base + STARFIVE_HASH_SHACSR, status,
!(status & STARFIVE_HASH_BUSY), 10, 100000);
}
+static inline int starfive_hash_wait_hmac_done(struct starfive_cryp_dev *cryp)
+{
+ u32 status;
+
+ return readl_relaxed_poll_timeout(cryp->base + STARFIVE_HASH_SHACSR, status,
+ (status & STARFIVE_HASH_HMAC_DONE), 10, 100000);
+}
+
static inline int starfive_hash_wait_key_done(struct starfive_cryp_ctx *ctx)
{
struct starfive_cryp_dev *cryp = ctx->cryp;
@@ -84,64 +91,26 @@ static int starfive_hash_hmac_key(struct starfive_cryp_ctx *ctx)
return 0;
}
-static void starfive_hash_start(void *param)
+static void starfive_hash_start(struct starfive_cryp_dev *cryp)
{
- struct starfive_cryp_ctx *ctx = param;
- struct starfive_cryp_request_ctx *rctx = ctx->rctx;
- struct starfive_cryp_dev *cryp = ctx->cryp;
- union starfive_alg_cr alg_cr;
union starfive_hash_csr csr;
- u32 stat;
-
- dma_unmap_sg(cryp->dev, rctx->in_sg, rctx->in_sg_len, DMA_TO_DEVICE);
-
- alg_cr.v = 0;
- alg_cr.clear = 1;
-
- writel(alg_cr.v, cryp->base + STARFIVE_ALG_CR_OFFSET);
csr.v = readl(cryp->base + STARFIVE_HASH_SHACSR);
csr.firstb = 0;
csr.final = 1;
-
- stat = readl(cryp->base + STARFIVE_IE_MASK_OFFSET);
- stat &= ~STARFIVE_IE_MASK_HASH_DONE;
- writel(stat, cryp->base + STARFIVE_IE_MASK_OFFSET);
writel(csr.v, cryp->base + STARFIVE_HASH_SHACSR);
}
-static int starfive_hash_xmit_dma(struct starfive_cryp_ctx *ctx)
+static void starfive_hash_dma_callback(void *param)
{
- struct starfive_cryp_request_ctx *rctx = ctx->rctx;
- struct starfive_cryp_dev *cryp = ctx->cryp;
- struct dma_async_tx_descriptor *in_desc;
- union starfive_alg_cr alg_cr;
- int total_len;
- int ret;
-
- if (!rctx->total) {
- starfive_hash_start(ctx);
- return 0;
- }
+ struct starfive_cryp_dev *cryp = param;
- writel(rctx->total, cryp->base + STARFIVE_DMA_IN_LEN_OFFSET);
-
- total_len = rctx->total;
- total_len = (total_len & 0x3) ? (((total_len >> 2) + 1) << 2) : total_len;
- sg_dma_len(rctx->in_sg) = total_len;
-
- alg_cr.v = 0;
- alg_cr.start = 1;
- alg_cr.hash_dma_en = 1;
-
- writel(alg_cr.v, cryp->base + STARFIVE_ALG_CR_OFFSET);
-
- ret = dma_map_sg(cryp->dev, rctx->in_sg, rctx->in_sg_len, DMA_TO_DEVICE);
- if (!ret)
- return dev_err_probe(cryp->dev, -EINVAL, "dma_map_sg() error\n");
+ complete(&cryp->dma_done);
+}
- cryp->cfg_in.direction = DMA_MEM_TO_DEV;
- cryp->cfg_in.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+static void starfive_hash_dma_init(struct starfive_cryp_dev *cryp)
+{
+ cryp->cfg_in.src_addr_width = DMA_SLAVE_BUSWIDTH_16_BYTES;
cryp->cfg_in.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
cryp->cfg_in.src_maxburst = cryp->dma_maxburst;
cryp->cfg_in.dst_maxburst = cryp->dma_maxburst;
@@ -149,50 +118,48 @@ static int starfive_hash_xmit_dma(struct starfive_cryp_ctx *ctx)
dmaengine_slave_config(cryp->tx, &cryp->cfg_in);
- in_desc = dmaengine_prep_slave_sg(cryp->tx, rctx->in_sg,
- ret, DMA_MEM_TO_DEV,
- DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
-
- if (!in_desc)
- return -EINVAL;
-
- in_desc->callback = starfive_hash_start;
- in_desc->callback_param = ctx;
-
- dmaengine_submit(in_desc);
- dma_async_issue_pending(cryp->tx);
-
- return 0;
+ init_completion(&cryp->dma_done);
}
-static int starfive_hash_xmit(struct starfive_cryp_ctx *ctx)
+static int starfive_hash_dma_xfer(struct starfive_cryp_dev *cryp,
+ struct scatterlist *sg)
{
- struct starfive_cryp_request_ctx *rctx = ctx->rctx;
- struct starfive_cryp_dev *cryp = ctx->cryp;
+ struct dma_async_tx_descriptor *in_desc;
+ union starfive_alg_cr alg_cr;
int ret = 0;
- rctx->csr.hash.v = 0;
- rctx->csr.hash.reset = 1;
- writel(rctx->csr.hash.v, cryp->base + STARFIVE_HASH_SHACSR);
-
- if (starfive_hash_wait_busy(ctx))
- return dev_err_probe(cryp->dev, -ETIMEDOUT, "Error resetting engine.\n");
+ alg_cr.v = 0;
+ alg_cr.start = 1;
+ alg_cr.hash_dma_en = 1;
+ writel(alg_cr.v, cryp->base + STARFIVE_ALG_CR_OFFSET);
- rctx->csr.hash.v = 0;
- rctx->csr.hash.mode = ctx->hash_mode;
- rctx->csr.hash.ie = 1;
+ writel(sg_dma_len(sg), cryp->base + STARFIVE_DMA_IN_LEN_OFFSET);
+ sg_dma_len(sg) = ALIGN(sg_dma_len(sg), sizeof(u32));
- if (ctx->is_hmac) {
- ret = starfive_hash_hmac_key(ctx);
- if (ret)
- return ret;
- } else {
- rctx->csr.hash.start = 1;
- rctx->csr.hash.firstb = 1;
- writel(rctx->csr.hash.v, cryp->base + STARFIVE_HASH_SHACSR);
+ in_desc = dmaengine_prep_slave_sg(cryp->tx, sg, 1, DMA_MEM_TO_DEV,
+ DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+ if (!in_desc) {
+ ret = -EINVAL;
+ goto end;
}
- return starfive_hash_xmit_dma(ctx);
+ reinit_completion(&cryp->dma_done);
+ in_desc->callback = starfive_hash_dma_callback;
+ in_desc->callback_param = cryp;
+
+ dmaengine_submit(in_desc);
+ dma_async_issue_pending(cryp->tx);
+
+ if (!wait_for_completion_timeout(&cryp->dma_done,
+ msecs_to_jiffies(1000)))
+ ret = -ETIMEDOUT;
+
+end:
+ alg_cr.v = 0;
+ alg_cr.clear = 1;
+ writel(alg_cr.v, cryp->base + STARFIVE_ALG_CR_OFFSET);
+
+ return ret;
}
static int starfive_hash_copy_hash(struct ahash_request *req)
@@ -215,58 +182,74 @@ static int starfive_hash_copy_hash(struct ahash_request *req)
return 0;
}
-void starfive_hash_done_task(unsigned long param)
+static void starfive_hash_done_task(struct starfive_cryp_dev *cryp)
{
- struct starfive_cryp_dev *cryp = (struct starfive_cryp_dev *)param;
int err = cryp->err;
if (!err)
err = starfive_hash_copy_hash(cryp->req.hreq);
- /* Reset to clear hash_done in irq register*/
- writel(STARFIVE_HASH_RESET, cryp->base + STARFIVE_HASH_SHACSR);
-
crypto_finalize_hash_request(cryp->engine, cryp->req.hreq, err);
}
-static int starfive_hash_check_aligned(struct scatterlist *sg, size_t total, size_t align)
+static int starfive_hash_one_request(struct crypto_engine *engine, void *areq)
{
- int len = 0;
+ struct ahash_request *req = container_of(areq, struct ahash_request,
+ base);
+ struct starfive_cryp_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req));
+ struct starfive_cryp_request_ctx *rctx = ctx->rctx;
+ struct starfive_cryp_dev *cryp = ctx->cryp;
+ struct scatterlist *tsg;
+ int ret, src_nents, i;
- if (!total)
- return 0;
+ writel(STARFIVE_HASH_RESET, cryp->base + STARFIVE_HASH_SHACSR);
- if (!IS_ALIGNED(total, align))
- return -EINVAL;
+ if (starfive_hash_wait_busy(cryp))
+ return dev_err_probe(cryp->dev, -ETIMEDOUT, "Error resetting hardware\n");
- while (sg) {
- if (!IS_ALIGNED(sg->offset, sizeof(u32)))
- return -EINVAL;
+ rctx->csr.hash.v = 0;
+ rctx->csr.hash.mode = ctx->hash_mode;
- if (!IS_ALIGNED(sg->length, align))
- return -EINVAL;
+ if (ctx->is_hmac) {
+ ret = starfive_hash_hmac_key(ctx);
+ if (ret)
+ return ret;
+ } else {
+ rctx->csr.hash.start = 1;
+ rctx->csr.hash.firstb = 1;
+ writel(rctx->csr.hash.v, cryp->base + STARFIVE_HASH_SHACSR);
+ }
+
+ /* No input message, get digest and end. */
+ if (!rctx->total)
+ goto hash_start;
+
+ starfive_hash_dma_init(cryp);
+
+ for_each_sg(rctx->in_sg, tsg, rctx->in_sg_len, i) {
+ src_nents = dma_map_sg(cryp->dev, tsg, 1, DMA_TO_DEVICE);
+ if (src_nents == 0)
+ return dev_err_probe(cryp->dev, -ENOMEM,
+ "dma_map_sg error\n");
- len += sg->length;
- sg = sg_next(sg);
+ ret = starfive_hash_dma_xfer(cryp, tsg);
+ dma_unmap_sg(cryp->dev, tsg, 1, DMA_TO_DEVICE);
+ if (ret)
+ return ret;
}
- if (len != total)
- return -EINVAL;
+hash_start:
+ starfive_hash_start(cryp);
- return 0;
-}
+ if (starfive_hash_wait_busy(cryp))
+ return dev_err_probe(cryp->dev, -ETIMEDOUT, "Error generating digest\n");
-static int starfive_hash_one_request(struct crypto_engine *engine, void *areq)
-{
- struct ahash_request *req = container_of(areq, struct ahash_request,
- base);
- struct starfive_cryp_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req));
- struct starfive_cryp_dev *cryp = ctx->cryp;
+ if (ctx->is_hmac)
+ cryp->err = starfive_hash_wait_hmac_done(cryp);
- if (!cryp)
- return -ENODEV;
+ starfive_hash_done_task(cryp);
- return starfive_hash_xmit(ctx);
+ return 0;
}
static int starfive_hash_init(struct ahash_request *req)
@@ -337,22 +320,6 @@ static int starfive_hash_finup(struct ahash_request *req)
return crypto_ahash_finup(&rctx->ahash_fbk_req);
}
-static int starfive_hash_digest_fb(struct ahash_request *req)
-{
- struct starfive_cryp_request_ctx *rctx = ahash_request_ctx(req);
- struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
- struct starfive_cryp_ctx *ctx = crypto_ahash_ctx(tfm);
-
- ahash_request_set_tfm(&rctx->ahash_fbk_req, ctx->ahash_fbk);
- ahash_request_set_callback(&rctx->ahash_fbk_req, req->base.flags,
- req->base.complete, req->base.data);
-
- ahash_request_set_crypt(&rctx->ahash_fbk_req, req->src,
- req->result, req->nbytes);
-
- return crypto_ahash_digest(&rctx->ahash_fbk_req);
-}
-
static int starfive_hash_digest(struct ahash_request *req)
{
struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
@@ -370,9 +337,6 @@ static int starfive_hash_digest(struct ahash_request *req)
rctx->in_sg_len = sg_nents_for_len(rctx->in_sg, rctx->total);
ctx->rctx = rctx;
- if (starfive_hash_check_aligned(rctx->in_sg, rctx->total, rctx->blksize))
- return starfive_hash_digest_fb(req);
-
return crypto_transfer_hash_request_to_engine(cryp->engine, req);
}
@@ -406,7 +370,8 @@ static int starfive_hash_import(struct ahash_request *req, const void *in)
static int starfive_hash_init_tfm(struct crypto_ahash *hash,
const char *alg_name,
- unsigned int mode)
+ unsigned int mode,
+ bool is_hmac)
{
struct starfive_cryp_ctx *ctx = crypto_ahash_ctx(hash);
@@ -426,7 +391,7 @@ static int starfive_hash_init_tfm(struct crypto_ahash *hash,
crypto_ahash_set_reqsize(hash, sizeof(struct starfive_cryp_request_ctx) +
crypto_ahash_reqsize(ctx->ahash_fbk));
- ctx->keylen = 0;
+ ctx->is_hmac = is_hmac;
ctx->hash_mode = mode;
return 0;
@@ -529,81 +494,61 @@ static int starfive_hash_setkey(struct crypto_ahash *hash,
static int starfive_sha224_init_tfm(struct crypto_ahash *hash)
{
return starfive_hash_init_tfm(hash, "sha224-generic",
- STARFIVE_HASH_SHA224);
+ STARFIVE_HASH_SHA224, 0);
}
static int starfive_sha256_init_tfm(struct crypto_ahash *hash)
{
return starfive_hash_init_tfm(hash, "sha256-generic",
- STARFIVE_HASH_SHA256);
+ STARFIVE_HASH_SHA256, 0);
}
static int starfive_sha384_init_tfm(struct crypto_ahash *hash)
{
return starfive_hash_init_tfm(hash, "sha384-generic",
- STARFIVE_HASH_SHA384);
+ STARFIVE_HASH_SHA384, 0);
}
static int starfive_sha512_init_tfm(struct crypto_ahash *hash)
{
return starfive_hash_init_tfm(hash, "sha512-generic",
- STARFIVE_HASH_SHA512);
+ STARFIVE_HASH_SHA512, 0);
}
static int starfive_sm3_init_tfm(struct crypto_ahash *hash)
{
return starfive_hash_init_tfm(hash, "sm3-generic",
- STARFIVE_HASH_SM3);
+ STARFIVE_HASH_SM3, 0);
}
static int starfive_hmac_sha224_init_tfm(struct crypto_ahash *hash)
{
- struct starfive_cryp_ctx *ctx = crypto_ahash_ctx(hash);
-
- ctx->is_hmac = true;
-
return starfive_hash_init_tfm(hash, "hmac(sha224-generic)",
- STARFIVE_HASH_SHA224);
+ STARFIVE_HASH_SHA224, 1);
}
static int starfive_hmac_sha256_init_tfm(struct crypto_ahash *hash)
{
- struct starfive_cryp_ctx *ctx = crypto_ahash_ctx(hash);
-
- ctx->is_hmac = true;
-
return starfive_hash_init_tfm(hash, "hmac(sha256-generic)",
- STARFIVE_HASH_SHA256);
+ STARFIVE_HASH_SHA256, 1);
}
static int starfive_hmac_sha384_init_tfm(struct crypto_ahash *hash)
{
- struct starfive_cryp_ctx *ctx = crypto_ahash_ctx(hash);
-
- ctx->is_hmac = true;
-
return starfive_hash_init_tfm(hash, "hmac(sha384-generic)",
- STARFIVE_HASH_SHA384);
+ STARFIVE_HASH_SHA384, 1);
}
static int starfive_hmac_sha512_init_tfm(struct crypto_ahash *hash)
{
- struct starfive_cryp_ctx *ctx = crypto_ahash_ctx(hash);
-
- ctx->is_hmac = true;
-
return starfive_hash_init_tfm(hash, "hmac(sha512-generic)",
- STARFIVE_HASH_SHA512);
+ STARFIVE_HASH_SHA512, 1);
}
static int starfive_hmac_sm3_init_tfm(struct crypto_ahash *hash)
{
- struct starfive_cryp_ctx *ctx = crypto_ahash_ctx(hash);
-
- ctx->is_hmac = true;
-
return starfive_hash_init_tfm(hash, "hmac(sm3-generic)",
- STARFIVE_HASH_SM3);
+ STARFIVE_HASH_SM3, 1);
}
static struct ahash_engine_alg algs_sha2_sm3[] = {
diff --git a/drivers/crypto/starfive/jh7110-rsa.c b/drivers/crypto/starfive/jh7110-rsa.c
index cf8bda7f0855..33093ba4b13a 100644
--- a/drivers/crypto/starfive/jh7110-rsa.c
+++ b/drivers/crypto/starfive/jh7110-rsa.c
@@ -45,6 +45,9 @@ static inline int starfive_pka_wait_done(struct starfive_cryp_ctx *ctx)
static void starfive_rsa_free_key(struct starfive_rsa_key *key)
{
+ if (!key->key_sz)
+ return;
+
kfree_sensitive(key->d);
kfree_sensitive(key->e);
kfree_sensitive(key->n);
@@ -273,7 +276,6 @@ static int starfive_rsa_enc_core(struct starfive_cryp_ctx *ctx, int enc)
err_rsa_crypt:
writel(STARFIVE_RSA_RESET, cryp->base + STARFIVE_PKA_CACR_OFFSET);
- kfree(rctx->rsa_data);
return ret;
}
@@ -534,16 +536,14 @@ static int starfive_rsa_init_tfm(struct crypto_akcipher *tfm)
{
struct starfive_cryp_ctx *ctx = akcipher_tfm_ctx(tfm);
+ ctx->cryp = starfive_cryp_find_dev(ctx);
+ if (!ctx->cryp)
+ return -ENODEV;
+
ctx->akcipher_fbk = crypto_alloc_akcipher("rsa-generic", 0, 0);
if (IS_ERR(ctx->akcipher_fbk))
return PTR_ERR(ctx->akcipher_fbk);
- ctx->cryp = starfive_cryp_find_dev(ctx);
- if (!ctx->cryp) {
- crypto_free_akcipher(ctx->akcipher_fbk);
- return -ENODEV;
- }
-
akcipher_set_reqsize(tfm, sizeof(struct starfive_cryp_request_ctx) +
sizeof(struct crypto_akcipher) + 32);
diff --git a/drivers/crypto/stm32/stm32-hash.c b/drivers/crypto/stm32/stm32-hash.c
index 34e0d7e381a8..351827372ea6 100644
--- a/drivers/crypto/stm32/stm32-hash.c
+++ b/drivers/crypto/stm32/stm32-hash.c
@@ -94,6 +94,7 @@
#define HASH_FLAGS_ERRORS BIT(21)
#define HASH_FLAGS_EMPTY BIT(22)
#define HASH_FLAGS_HMAC BIT(23)
+#define HASH_FLAGS_SGS_COPIED BIT(24)
#define HASH_OP_UPDATE 1
#define HASH_OP_FINAL 2
@@ -145,7 +146,7 @@ struct stm32_hash_state {
u16 bufcnt;
u16 blocklen;
- u8 buffer[HASH_BUFLEN] __aligned(4);
+ u8 buffer[HASH_BUFLEN] __aligned(sizeof(u32));
/* hash state */
u32 hw_context[3 + HASH_CSR_NB_MAX];
@@ -158,8 +159,8 @@ struct stm32_hash_request_ctx {
u8 digest[SHA512_DIGEST_SIZE] __aligned(sizeof(u32));
size_t digcnt;
- /* DMA */
struct scatterlist *sg;
+ struct scatterlist sgl[2]; /* scatterlist used to realize alignment */
unsigned int offset;
unsigned int total;
struct scatterlist sg_key;
@@ -184,6 +185,7 @@ struct stm32_hash_pdata {
size_t algs_info_size;
bool has_sr;
bool has_mdmat;
+ bool context_secured;
bool broken_emptymsg;
bool ux500;
};
@@ -195,6 +197,7 @@ struct stm32_hash_dev {
struct reset_control *rst;
void __iomem *io_base;
phys_addr_t phys_base;
+ u8 xmit_buf[HASH_BUFLEN] __aligned(sizeof(u32));
u32 dma_mode;
bool polled;
@@ -220,6 +223,8 @@ static struct stm32_hash_drv stm32_hash = {
};
static void stm32_hash_dma_callback(void *param);
+static int stm32_hash_prepare_request(struct ahash_request *req);
+static void stm32_hash_unprepare_request(struct ahash_request *req);
static inline u32 stm32_hash_read(struct stm32_hash_dev *hdev, u32 offset)
{
@@ -232,6 +237,11 @@ static inline void stm32_hash_write(struct stm32_hash_dev *hdev,
writel_relaxed(value, hdev->io_base + offset);
}
+/**
+ * stm32_hash_wait_busy - wait until hash processor is available. It return an
+ * error if the hash core is processing a block of data for more than 10 ms.
+ * @hdev: the stm32_hash_dev device.
+ */
static inline int stm32_hash_wait_busy(struct stm32_hash_dev *hdev)
{
u32 status;
@@ -245,6 +255,11 @@ static inline int stm32_hash_wait_busy(struct stm32_hash_dev *hdev)
!(status & HASH_SR_BUSY), 10, 10000);
}
+/**
+ * stm32_hash_set_nblw - set the number of valid bytes in the last word.
+ * @hdev: the stm32_hash_dev device.
+ * @length: the length of the final word.
+ */
static void stm32_hash_set_nblw(struct stm32_hash_dev *hdev, int length)
{
u32 reg;
@@ -282,6 +297,11 @@ static int stm32_hash_write_key(struct stm32_hash_dev *hdev)
return 0;
}
+/**
+ * stm32_hash_write_ctrl - Initialize the hash processor, only if
+ * HASH_FLAGS_INIT is set.
+ * @hdev: the stm32_hash_dev device
+ */
static void stm32_hash_write_ctrl(struct stm32_hash_dev *hdev)
{
struct stm32_hash_request_ctx *rctx = ahash_request_ctx(hdev->req);
@@ -469,9 +489,7 @@ static int stm32_hash_update_cpu(struct stm32_hash_dev *hdev)
{
struct stm32_hash_request_ctx *rctx = ahash_request_ctx(hdev->req);
struct stm32_hash_state *state = &rctx->state;
- u32 *preg = state->hw_context;
int bufcnt, err = 0, final;
- int i, swap_reg;
dev_dbg(hdev->dev, "%s flags %x\n", __func__, state->flags);
@@ -495,34 +513,23 @@ static int stm32_hash_update_cpu(struct stm32_hash_dev *hdev)
return stm32_hash_xmit_cpu(hdev, state->buffer, bufcnt, 1);
}
- if (!(hdev->flags & HASH_FLAGS_INIT))
- return 0;
-
- if (stm32_hash_wait_busy(hdev))
- return -ETIMEDOUT;
-
- swap_reg = hash_swap_reg(rctx);
-
- if (!hdev->pdata->ux500)
- *preg++ = stm32_hash_read(hdev, HASH_IMR);
- *preg++ = stm32_hash_read(hdev, HASH_STR);
- *preg++ = stm32_hash_read(hdev, HASH_CR);
- for (i = 0; i < swap_reg; i++)
- *preg++ = stm32_hash_read(hdev, HASH_CSR(i));
-
- state->flags |= HASH_FLAGS_INIT;
-
return err;
}
static int stm32_hash_xmit_dma(struct stm32_hash_dev *hdev,
- struct scatterlist *sg, int length, int mdma)
+ struct scatterlist *sg, int length, int mdmat)
{
struct dma_async_tx_descriptor *in_desc;
dma_cookie_t cookie;
u32 reg;
int err;
+ dev_dbg(hdev->dev, "%s mdmat: %x length: %d\n", __func__, mdmat, length);
+
+ /* do not use dma if there is no data to send */
+ if (length <= 0)
+ return 0;
+
in_desc = dmaengine_prep_slave_sg(hdev->dma_lch, sg, 1,
DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT |
DMA_CTRL_ACK);
@@ -535,13 +542,12 @@ static int stm32_hash_xmit_dma(struct stm32_hash_dev *hdev,
in_desc->callback = stm32_hash_dma_callback;
in_desc->callback_param = hdev;
- hdev->flags |= HASH_FLAGS_FINAL;
hdev->flags |= HASH_FLAGS_DMA_ACTIVE;
reg = stm32_hash_read(hdev, HASH_CR);
if (hdev->pdata->has_mdmat) {
- if (mdma)
+ if (mdmat)
reg |= HASH_CR_MDMAT;
else
reg &= ~HASH_CR_MDMAT;
@@ -550,7 +556,6 @@ static int stm32_hash_xmit_dma(struct stm32_hash_dev *hdev,
stm32_hash_write(hdev, HASH_CR, reg);
- stm32_hash_set_nblw(hdev, length);
cookie = dmaengine_submit(in_desc);
err = dma_submit_error(cookie);
@@ -590,7 +595,7 @@ static int stm32_hash_hmac_dma_send(struct stm32_hash_dev *hdev)
struct stm32_hash_ctx *ctx = crypto_ahash_ctx(tfm);
int err;
- if (ctx->keylen < rctx->state.blocklen || hdev->dma_mode == 1) {
+ if (ctx->keylen < rctx->state.blocklen || hdev->dma_mode > 0) {
err = stm32_hash_write_key(hdev);
if (stm32_hash_wait_busy(hdev))
return -ETIMEDOUT;
@@ -655,18 +660,20 @@ static int stm32_hash_dma_send(struct stm32_hash_dev *hdev)
struct scatterlist sg[1], *tsg;
int err = 0, reg, ncp = 0;
unsigned int i, len = 0, bufcnt = 0;
+ bool final = hdev->flags & HASH_FLAGS_FINAL;
bool is_last = false;
+ u32 last_word;
- rctx->sg = hdev->req->src;
- rctx->total = hdev->req->nbytes;
+ dev_dbg(hdev->dev, "%s total: %d bufcnt: %d final: %d\n",
+ __func__, rctx->total, rctx->state.bufcnt, final);
- rctx->nents = sg_nents(rctx->sg);
if (rctx->nents < 0)
return -EINVAL;
stm32_hash_write_ctrl(hdev);
- if (hdev->flags & HASH_FLAGS_HMAC) {
+ if (hdev->flags & HASH_FLAGS_HMAC && (!(hdev->flags & HASH_FLAGS_HMAC_KEY))) {
+ hdev->flags |= HASH_FLAGS_HMAC_KEY;
err = stm32_hash_hmac_dma_send(hdev);
if (err != -EINPROGRESS)
return err;
@@ -677,22 +684,36 @@ static int stm32_hash_dma_send(struct stm32_hash_dev *hdev)
len = sg->length;
if (sg_is_last(sg) || (bufcnt + sg[0].length) >= rctx->total) {
- sg->length = rctx->total - bufcnt;
- is_last = true;
- if (hdev->dma_mode == 1) {
- len = (ALIGN(sg->length, 16) - 16);
-
- ncp = sg_pcopy_to_buffer(
- rctx->sg, rctx->nents,
- rctx->state.buffer, sg->length - len,
- rctx->total - sg->length + len);
-
- sg->length = len;
+ if (!final) {
+ /* Always manually put the last word of a non-final transfer. */
+ len -= sizeof(u32);
+ sg_pcopy_to_buffer(rctx->sg, rctx->nents, &last_word, 4, len);
+ sg->length -= sizeof(u32);
} else {
- if (!(IS_ALIGNED(sg->length, sizeof(u32)))) {
- len = sg->length;
- sg->length = ALIGN(sg->length,
- sizeof(u32));
+ /*
+ * In Multiple DMA mode, DMA must be aborted before the final
+ * transfer.
+ */
+ sg->length = rctx->total - bufcnt;
+ if (hdev->dma_mode > 0) {
+ len = (ALIGN(sg->length, 16) - 16);
+
+ ncp = sg_pcopy_to_buffer(rctx->sg, rctx->nents,
+ rctx->state.buffer,
+ sg->length - len,
+ rctx->total - sg->length + len);
+
+ if (!len)
+ break;
+
+ sg->length = len;
+ } else {
+ is_last = true;
+ if (!(IS_ALIGNED(sg->length, sizeof(u32)))) {
+ len = sg->length;
+ sg->length = ALIGN(sg->length,
+ sizeof(u32));
+ }
}
}
}
@@ -706,43 +727,67 @@ static int stm32_hash_dma_send(struct stm32_hash_dev *hdev)
err = stm32_hash_xmit_dma(hdev, sg, len, !is_last);
+ /* The last word of a non final transfer is sent manually. */
+ if (!final) {
+ stm32_hash_write(hdev, HASH_DIN, last_word);
+ len += sizeof(u32);
+ }
+
+ rctx->total -= len;
+
bufcnt += sg[0].length;
dma_unmap_sg(hdev->dev, sg, 1, DMA_TO_DEVICE);
- if (err == -ENOMEM)
+ if (err == -ENOMEM || err == -ETIMEDOUT)
return err;
if (is_last)
break;
}
- if (hdev->dma_mode == 1) {
- if (stm32_hash_wait_busy(hdev))
- return -ETIMEDOUT;
- reg = stm32_hash_read(hdev, HASH_CR);
- reg &= ~HASH_CR_DMAE;
- reg |= HASH_CR_DMAA;
- stm32_hash_write(hdev, HASH_CR, reg);
+ /*
+ * When the second last block transfer of 4 words is performed by the DMA,
+ * the software must set the DMA Abort bit (DMAA) to 1 before completing the
+ * last transfer of 4 words or less.
+ */
+ if (final) {
+ if (hdev->dma_mode > 0) {
+ if (stm32_hash_wait_busy(hdev))
+ return -ETIMEDOUT;
+ reg = stm32_hash_read(hdev, HASH_CR);
+ reg &= ~HASH_CR_DMAE;
+ reg |= HASH_CR_DMAA;
+ stm32_hash_write(hdev, HASH_CR, reg);
+
+ if (ncp) {
+ memset(buffer + ncp, 0, 4 - DIV_ROUND_UP(ncp, sizeof(u32)));
+ writesl(hdev->io_base + HASH_DIN, buffer,
+ DIV_ROUND_UP(ncp, sizeof(u32)));
+ }
- if (ncp) {
- memset(buffer + ncp, 0,
- DIV_ROUND_UP(ncp, sizeof(u32)) - ncp);
- writesl(hdev->io_base + HASH_DIN, buffer,
- DIV_ROUND_UP(ncp, sizeof(u32)));
+ stm32_hash_set_nblw(hdev, ncp);
+ reg = stm32_hash_read(hdev, HASH_STR);
+ reg |= HASH_STR_DCAL;
+ stm32_hash_write(hdev, HASH_STR, reg);
+ err = -EINPROGRESS;
}
- stm32_hash_set_nblw(hdev, ncp);
- reg = stm32_hash_read(hdev, HASH_STR);
- reg |= HASH_STR_DCAL;
- stm32_hash_write(hdev, HASH_STR, reg);
- err = -EINPROGRESS;
- }
- if (hdev->flags & HASH_FLAGS_HMAC) {
- if (stm32_hash_wait_busy(hdev))
- return -ETIMEDOUT;
- err = stm32_hash_hmac_dma_send(hdev);
+ /*
+ * The hash processor needs the key to be loaded a second time in order
+ * to process the HMAC.
+ */
+ if (hdev->flags & HASH_FLAGS_HMAC) {
+ if (stm32_hash_wait_busy(hdev))
+ return -ETIMEDOUT;
+ err = stm32_hash_hmac_dma_send(hdev);
+ }
+
+ return err;
}
- return err;
+ if (err != -EINPROGRESS)
+ return err;
+
+ return 0;
}
static struct stm32_hash_dev *stm32_hash_find_dev(struct stm32_hash_ctx *ctx)
@@ -765,33 +810,6 @@ static struct stm32_hash_dev *stm32_hash_find_dev(struct stm32_hash_ctx *ctx)
return hdev;
}
-static bool stm32_hash_dma_aligned_data(struct ahash_request *req)
-{
- struct scatterlist *sg;
- struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req);
- struct stm32_hash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req));
- struct stm32_hash_dev *hdev = stm32_hash_find_dev(ctx);
- int i;
-
- if (!hdev->dma_lch || req->nbytes <= rctx->state.blocklen)
- return false;
-
- if (sg_nents(req->src) > 1) {
- if (hdev->dma_mode == 1)
- return false;
- for_each_sg(req->src, sg, sg_nents(req->src), i) {
- if ((!IS_ALIGNED(sg->length, sizeof(u32))) &&
- (!sg_is_last(sg)))
- return false;
- }
- }
-
- if (req->src->offset % 4)
- return false;
-
- return true;
-}
-
static int stm32_hash_init(struct ahash_request *req)
{
struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
@@ -802,8 +820,10 @@ static int stm32_hash_init(struct ahash_request *req)
bool sha3_mode = ctx->flags & HASH_FLAGS_SHA3_MODE;
rctx->hdev = hdev;
+ state->flags = 0;
- state->flags = HASH_FLAGS_CPU;
+ if (!(hdev->dma_lch && hdev->pdata->has_mdmat))
+ state->flags |= HASH_FLAGS_CPU;
if (sha3_mode)
state->flags |= HASH_FLAGS_SHA3_MODE;
@@ -857,6 +877,7 @@ static int stm32_hash_init(struct ahash_request *req)
dev_err(hdev->dev, "Error, block too large");
return -EINVAL;
}
+ rctx->nents = 0;
rctx->total = 0;
rctx->offset = 0;
rctx->data_type = HASH_DATA_8_BITS;
@@ -874,6 +895,9 @@ static int stm32_hash_update_req(struct stm32_hash_dev *hdev)
struct stm32_hash_request_ctx *rctx = ahash_request_ctx(hdev->req);
struct stm32_hash_state *state = &rctx->state;
+ dev_dbg(hdev->dev, "update_req: total: %u, digcnt: %zd, final: 0",
+ rctx->total, rctx->digcnt);
+
if (!(state->flags & HASH_FLAGS_CPU))
return stm32_hash_dma_send(hdev);
@@ -887,6 +911,11 @@ static int stm32_hash_final_req(struct stm32_hash_dev *hdev)
struct stm32_hash_state *state = &rctx->state;
int buflen = state->bufcnt;
+ if (!(state->flags & HASH_FLAGS_CPU)) {
+ hdev->flags |= HASH_FLAGS_FINAL;
+ return stm32_hash_dma_send(hdev);
+ }
+
if (state->flags & HASH_FLAGS_FINUP)
return stm32_hash_update_req(hdev);
@@ -968,15 +997,21 @@ static int stm32_hash_finish(struct ahash_request *req)
static void stm32_hash_finish_req(struct ahash_request *req, int err)
{
struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req);
+ struct stm32_hash_state *state = &rctx->state;
struct stm32_hash_dev *hdev = rctx->hdev;
+ if (hdev->flags & HASH_FLAGS_DMA_ACTIVE)
+ state->flags |= HASH_FLAGS_DMA_ACTIVE;
+ else
+ state->flags &= ~HASH_FLAGS_DMA_ACTIVE;
+
if (!err && (HASH_FLAGS_FINAL & hdev->flags)) {
stm32_hash_copy_hash(req);
err = stm32_hash_finish(req);
}
- pm_runtime_mark_last_busy(hdev->dev);
- pm_runtime_put_autosuspend(hdev->dev);
+ /* Finalized request mist be unprepared here */
+ stm32_hash_unprepare_request(req);
crypto_finalize_hash_request(hdev->engine, req, err);
}
@@ -1006,6 +1041,10 @@ static int stm32_hash_one_request(struct crypto_engine *engine, void *areq)
pm_runtime_get_sync(hdev->dev);
+ err = stm32_hash_prepare_request(req);
+ if (err)
+ return err;
+
hdev->req = req;
hdev->flags = 0;
swap_reg = hash_swap_reg(rctx);
@@ -1030,6 +1069,12 @@ static int stm32_hash_one_request(struct crypto_engine *engine, void *areq)
if (state->flags & HASH_FLAGS_HMAC)
hdev->flags |= HASH_FLAGS_HMAC |
HASH_FLAGS_HMAC_KEY;
+
+ if (state->flags & HASH_FLAGS_CPU)
+ hdev->flags |= HASH_FLAGS_CPU;
+
+ if (state->flags & HASH_FLAGS_DMA_ACTIVE)
+ hdev->flags |= HASH_FLAGS_DMA_ACTIVE;
}
if (rctx->op == HASH_OP_UPDATE)
@@ -1054,6 +1099,284 @@ static int stm32_hash_one_request(struct crypto_engine *engine, void *areq)
return 0;
}
+static int stm32_hash_copy_sgs(struct stm32_hash_request_ctx *rctx,
+ struct scatterlist *sg, int bs,
+ unsigned int new_len)
+{
+ struct stm32_hash_state *state = &rctx->state;
+ int pages;
+ void *buf;
+
+ pages = get_order(new_len);
+
+ buf = (void *)__get_free_pages(GFP_ATOMIC, pages);
+ if (!buf) {
+ pr_err("Couldn't allocate pages for unaligned cases.\n");
+ return -ENOMEM;
+ }
+
+ if (state->bufcnt)
+ memcpy(buf, rctx->hdev->xmit_buf, state->bufcnt);
+
+ scatterwalk_map_and_copy(buf + state->bufcnt, sg, rctx->offset,
+ min(new_len, rctx->total) - state->bufcnt, 0);
+ sg_init_table(rctx->sgl, 1);
+ sg_set_buf(rctx->sgl, buf, new_len);
+ rctx->sg = rctx->sgl;
+ state->flags |= HASH_FLAGS_SGS_COPIED;
+ rctx->nents = 1;
+ rctx->offset += new_len - state->bufcnt;
+ state->bufcnt = 0;
+ rctx->total = new_len;
+
+ return 0;
+}
+
+static int stm32_hash_align_sgs(struct scatterlist *sg,
+ int nbytes, int bs, bool init, bool final,
+ struct stm32_hash_request_ctx *rctx)
+{
+ struct stm32_hash_state *state = &rctx->state;
+ struct stm32_hash_dev *hdev = rctx->hdev;
+ struct scatterlist *sg_tmp = sg;
+ int offset = rctx->offset;
+ int new_len;
+ int n = 0;
+ int bufcnt = state->bufcnt;
+ bool secure_ctx = hdev->pdata->context_secured;
+ bool aligned = true;
+
+ if (!sg || !sg->length || !nbytes) {
+ if (bufcnt) {
+ bufcnt = DIV_ROUND_UP(bufcnt, bs) * bs;
+ sg_init_table(rctx->sgl, 1);
+ sg_set_buf(rctx->sgl, rctx->hdev->xmit_buf, bufcnt);
+ rctx->sg = rctx->sgl;
+ rctx->nents = 1;
+ }
+
+ return 0;
+ }
+
+ new_len = nbytes;
+
+ if (offset)
+ aligned = false;
+
+ if (final) {
+ new_len = DIV_ROUND_UP(new_len, bs) * bs;
+ } else {
+ new_len = (new_len - 1) / bs * bs; // return n block - 1 block
+
+ /*
+ * Context save in some version of HASH IP can only be done when the
+ * FIFO is ready to get a new block. This implies to send n block plus a
+ * 32 bit word in the first DMA send.
+ */
+ if (init && secure_ctx) {
+ new_len += sizeof(u32);
+ if (unlikely(new_len > nbytes))
+ new_len -= bs;
+ }
+ }
+
+ if (!new_len)
+ return 0;
+
+ if (nbytes != new_len)
+ aligned = false;
+
+ while (nbytes > 0 && sg_tmp) {
+ n++;
+
+ if (bufcnt) {
+ if (!IS_ALIGNED(bufcnt, bs)) {
+ aligned = false;
+ break;
+ }
+ nbytes -= bufcnt;
+ bufcnt = 0;
+ if (!nbytes)
+ aligned = false;
+
+ continue;
+ }
+
+ if (offset < sg_tmp->length) {
+ if (!IS_ALIGNED(offset + sg_tmp->offset, 4)) {
+ aligned = false;
+ break;
+ }
+
+ if (!IS_ALIGNED(sg_tmp->length - offset, bs)) {
+ aligned = false;
+ break;
+ }
+ }
+
+ if (offset) {
+ offset -= sg_tmp->length;
+ if (offset < 0) {
+ nbytes += offset;
+ offset = 0;
+ }
+ } else {
+ nbytes -= sg_tmp->length;
+ }
+
+ sg_tmp = sg_next(sg_tmp);
+
+ if (nbytes < 0) {
+ aligned = false;
+ break;
+ }
+ }
+
+ if (!aligned)
+ return stm32_hash_copy_sgs(rctx, sg, bs, new_len);
+
+ rctx->total = new_len;
+ rctx->offset += new_len;
+ rctx->nents = n;
+ if (state->bufcnt) {
+ sg_init_table(rctx->sgl, 2);
+ sg_set_buf(rctx->sgl, rctx->hdev->xmit_buf, state->bufcnt);
+ sg_chain(rctx->sgl, 2, sg);
+ rctx->sg = rctx->sgl;
+ } else {
+ rctx->sg = sg;
+ }
+
+ return 0;
+}
+
+static int stm32_hash_prepare_request(struct ahash_request *req)
+{
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct stm32_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+ struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req);
+ struct stm32_hash_dev *hdev = stm32_hash_find_dev(ctx);
+ struct stm32_hash_state *state = &rctx->state;
+ unsigned int nbytes;
+ int ret, hash_later, bs;
+ bool update = rctx->op & HASH_OP_UPDATE;
+ bool init = !(state->flags & HASH_FLAGS_INIT);
+ bool finup = state->flags & HASH_FLAGS_FINUP;
+ bool final = state->flags & HASH_FLAGS_FINAL;
+
+ if (!hdev->dma_lch || state->flags & HASH_FLAGS_CPU)
+ return 0;
+
+ bs = crypto_ahash_blocksize(tfm);
+
+ nbytes = state->bufcnt;
+
+ /*
+ * In case of update request nbytes must correspond to the content of the
+ * buffer + the offset minus the content of the request already in the
+ * buffer.
+ */
+ if (update || finup)
+ nbytes += req->nbytes - rctx->offset;
+
+ dev_dbg(hdev->dev,
+ "%s: nbytes=%d, bs=%d, total=%d, offset=%d, bufcnt=%d\n",
+ __func__, nbytes, bs, rctx->total, rctx->offset, state->bufcnt);
+
+ if (!nbytes)
+ return 0;
+
+ rctx->total = nbytes;
+
+ if (update && req->nbytes && (!IS_ALIGNED(state->bufcnt, bs))) {
+ int len = bs - state->bufcnt % bs;
+
+ if (len > req->nbytes)
+ len = req->nbytes;
+ scatterwalk_map_and_copy(state->buffer + state->bufcnt, req->src,
+ 0, len, 0);
+ state->bufcnt += len;
+ rctx->offset = len;
+ }
+
+ /* copy buffer in a temporary one that is used for sg alignment */
+ if (state->bufcnt)
+ memcpy(hdev->xmit_buf, state->buffer, state->bufcnt);
+
+ ret = stm32_hash_align_sgs(req->src, nbytes, bs, init, final, rctx);
+ if (ret)
+ return ret;
+
+ hash_later = nbytes - rctx->total;
+ if (hash_later < 0)
+ hash_later = 0;
+
+ if (hash_later && hash_later <= state->blocklen) {
+ scatterwalk_map_and_copy(state->buffer,
+ req->src,
+ req->nbytes - hash_later,
+ hash_later, 0);
+
+ state->bufcnt = hash_later;
+ } else {
+ state->bufcnt = 0;
+ }
+
+ if (hash_later > state->blocklen) {
+ /* FIXME: add support of this case */
+ pr_err("Buffer contains more than one block.\n");
+ return -ENOMEM;
+ }
+
+ rctx->total = min(nbytes, rctx->total);
+
+ return 0;
+}
+
+static void stm32_hash_unprepare_request(struct ahash_request *req)
+{
+ struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req);
+ struct stm32_hash_state *state = &rctx->state;
+ struct stm32_hash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req));
+ struct stm32_hash_dev *hdev = stm32_hash_find_dev(ctx);
+ u32 *preg = state->hw_context;
+ int swap_reg, i;
+
+ if (hdev->dma_lch)
+ dmaengine_terminate_sync(hdev->dma_lch);
+
+ if (state->flags & HASH_FLAGS_SGS_COPIED)
+ free_pages((unsigned long)sg_virt(rctx->sg), get_order(rctx->sg->length));
+
+ rctx->sg = NULL;
+ rctx->offset = 0;
+
+ state->flags &= ~(HASH_FLAGS_SGS_COPIED);
+
+ if (!(hdev->flags & HASH_FLAGS_INIT))
+ goto pm_runtime;
+
+ state->flags |= HASH_FLAGS_INIT;
+
+ if (stm32_hash_wait_busy(hdev)) {
+ dev_warn(hdev->dev, "Wait busy failed.");
+ return;
+ }
+
+ swap_reg = hash_swap_reg(rctx);
+
+ if (!hdev->pdata->ux500)
+ *preg++ = stm32_hash_read(hdev, HASH_IMR);
+ *preg++ = stm32_hash_read(hdev, HASH_STR);
+ *preg++ = stm32_hash_read(hdev, HASH_CR);
+ for (i = 0; i < swap_reg; i++)
+ *preg++ = stm32_hash_read(hdev, HASH_CSR(i));
+
+pm_runtime:
+ pm_runtime_mark_last_busy(hdev->dev);
+ pm_runtime_put_autosuspend(hdev->dev);
+}
+
static int stm32_hash_enqueue(struct ahash_request *req, unsigned int op)
{
struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req);
@@ -1070,16 +1393,26 @@ static int stm32_hash_update(struct ahash_request *req)
struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req);
struct stm32_hash_state *state = &rctx->state;
- if (!req->nbytes || !(state->flags & HASH_FLAGS_CPU))
+ if (!req->nbytes)
return 0;
- rctx->total = req->nbytes;
- rctx->sg = req->src;
- rctx->offset = 0;
- if ((state->bufcnt + rctx->total < state->blocklen)) {
- stm32_hash_append_sg(rctx);
- return 0;
+ if (state->flags & HASH_FLAGS_CPU) {
+ rctx->total = req->nbytes;
+ rctx->sg = req->src;
+ rctx->offset = 0;
+
+ if ((state->bufcnt + rctx->total < state->blocklen)) {
+ stm32_hash_append_sg(rctx);
+ return 0;
+ }
+ } else { /* DMA mode */
+ if (state->bufcnt + req->nbytes <= state->blocklen) {
+ scatterwalk_map_and_copy(state->buffer + state->bufcnt, req->src,
+ 0, req->nbytes, 0);
+ state->bufcnt += req->nbytes;
+ return 0;
+ }
}
return stm32_hash_enqueue(req, HASH_OP_UPDATE);
@@ -1098,20 +1431,18 @@ static int stm32_hash_final(struct ahash_request *req)
static int stm32_hash_finup(struct ahash_request *req)
{
struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req);
- struct stm32_hash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req));
- struct stm32_hash_dev *hdev = stm32_hash_find_dev(ctx);
struct stm32_hash_state *state = &rctx->state;
if (!req->nbytes)
goto out;
state->flags |= HASH_FLAGS_FINUP;
- rctx->total = req->nbytes;
- rctx->sg = req->src;
- rctx->offset = 0;
- if (hdev->dma_lch && stm32_hash_dma_aligned_data(req))
- state->flags &= ~HASH_FLAGS_CPU;
+ if ((state->flags & HASH_FLAGS_CPU)) {
+ rctx->total = req->nbytes;
+ rctx->sg = req->src;
+ rctx->offset = 0;
+ }
out:
return stm32_hash_final(req);
@@ -1215,7 +1546,6 @@ static int stm32_hash_cra_sha3_hmac_init(struct crypto_tfm *tfm)
HASH_FLAGS_HMAC);
}
-
static void stm32_hash_cra_exit(struct crypto_tfm *tfm)
{
struct stm32_hash_ctx *ctx = crypto_tfm_ctx(tfm);
@@ -1228,14 +1558,9 @@ static irqreturn_t stm32_hash_irq_thread(int irq, void *dev_id)
{
struct stm32_hash_dev *hdev = dev_id;
- if (HASH_FLAGS_CPU & hdev->flags) {
- if (HASH_FLAGS_OUTPUT_READY & hdev->flags) {
- hdev->flags &= ~HASH_FLAGS_OUTPUT_READY;
- goto finish;
- }
- } else if (HASH_FLAGS_DMA_ACTIVE & hdev->flags) {
- hdev->flags &= ~HASH_FLAGS_DMA_ACTIVE;
- goto finish;
+ if (HASH_FLAGS_OUTPUT_READY & hdev->flags) {
+ hdev->flags &= ~HASH_FLAGS_OUTPUT_READY;
+ goto finish;
}
return IRQ_HANDLED;
@@ -1984,6 +2309,7 @@ static const struct stm32_hash_pdata stm32_hash_pdata_stm32mp13 = {
.algs_info_size = ARRAY_SIZE(stm32_hash_algs_info_stm32mp13),
.has_sr = true,
.has_mdmat = true,
+ .context_secured = true,
};
static const struct of_device_id stm32_hash_of_match[] = {
diff --git a/drivers/crypto/tegra/Makefile b/drivers/crypto/tegra/Makefile
new file mode 100644
index 000000000000..a32001e58eb2
--- /dev/null
+++ b/drivers/crypto/tegra/Makefile
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+tegra-se-objs := tegra-se-key.o tegra-se-main.o
+
+tegra-se-y += tegra-se-aes.o
+tegra-se-y += tegra-se-hash.o
+
+obj-$(CONFIG_CRYPTO_DEV_TEGRA) += tegra-se.o
diff --git a/drivers/crypto/tegra/tegra-se-aes.c b/drivers/crypto/tegra/tegra-se-aes.c
new file mode 100644
index 000000000000..ae7a0f8435fc
--- /dev/null
+++ b/drivers/crypto/tegra/tegra-se-aes.c
@@ -0,0 +1,1933 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+/*
+ * Crypto driver to handle block cipher algorithms using NVIDIA Security Engine.
+ */
+
+#include <linux/clk.h>
+#include <linux/dma-mapping.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+
+#include <crypto/aead.h>
+#include <crypto/aes.h>
+#include <crypto/engine.h>
+#include <crypto/gcm.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/xts.h>
+#include <crypto/internal/aead.h>
+#include <crypto/internal/hash.h>
+#include <crypto/internal/skcipher.h>
+
+#include "tegra-se.h"
+
+struct tegra_aes_ctx {
+ struct tegra_se *se;
+ u32 alg;
+ u32 ivsize;
+ u32 key1_id;
+ u32 key2_id;
+};
+
+struct tegra_aes_reqctx {
+ struct tegra_se_datbuf datbuf;
+ bool encrypt;
+ u32 config;
+ u32 crypto_config;
+ u32 len;
+ u32 *iv;
+};
+
+struct tegra_aead_ctx {
+ struct tegra_se *se;
+ unsigned int authsize;
+ u32 alg;
+ u32 keylen;
+ u32 key_id;
+};
+
+struct tegra_aead_reqctx {
+ struct tegra_se_datbuf inbuf;
+ struct tegra_se_datbuf outbuf;
+ struct scatterlist *src_sg;
+ struct scatterlist *dst_sg;
+ unsigned int assoclen;
+ unsigned int cryptlen;
+ unsigned int authsize;
+ bool encrypt;
+ u32 config;
+ u32 crypto_config;
+ u32 key_id;
+ u32 iv[4];
+ u8 authdata[16];
+};
+
+struct tegra_cmac_ctx {
+ struct tegra_se *se;
+ unsigned int alg;
+ u32 key_id;
+ struct crypto_shash *fallback_tfm;
+};
+
+struct tegra_cmac_reqctx {
+ struct scatterlist *src_sg;
+ struct tegra_se_datbuf datbuf;
+ struct tegra_se_datbuf residue;
+ unsigned int total_len;
+ unsigned int blk_size;
+ unsigned int task;
+ u32 crypto_config;
+ u32 config;
+ u32 key_id;
+ u32 *iv;
+ u32 result[CMAC_RESULT_REG_COUNT];
+};
+
+/* increment counter (128-bit int) */
+static void ctr_iv_inc(__u8 *counter, __u8 bits, __u32 nums)
+{
+ do {
+ --bits;
+ nums += counter[bits];
+ counter[bits] = nums & 0xff;
+ nums >>= 8;
+ } while (bits && nums);
+}
+
+static void tegra_cbc_iv_copyback(struct skcipher_request *req, struct tegra_aes_ctx *ctx)
+{
+ struct tegra_aes_reqctx *rctx = skcipher_request_ctx(req);
+ unsigned int offset;
+
+ offset = req->cryptlen - ctx->ivsize;
+
+ if (rctx->encrypt)
+ memcpy(req->iv, rctx->datbuf.buf + offset, ctx->ivsize);
+ else
+ scatterwalk_map_and_copy(req->iv, req->src, offset, ctx->ivsize, 0);
+}
+
+static void tegra_aes_update_iv(struct skcipher_request *req, struct tegra_aes_ctx *ctx)
+{
+ int num;
+
+ if (ctx->alg == SE_ALG_CBC) {
+ tegra_cbc_iv_copyback(req, ctx);
+ } else if (ctx->alg == SE_ALG_CTR) {
+ num = req->cryptlen / ctx->ivsize;
+ if (req->cryptlen % ctx->ivsize)
+ num++;
+
+ ctr_iv_inc(req->iv, ctx->ivsize, num);
+ }
+}
+
+static int tegra234_aes_crypto_cfg(u32 alg, bool encrypt)
+{
+ switch (alg) {
+ case SE_ALG_CMAC:
+ case SE_ALG_GMAC:
+ case SE_ALG_GCM:
+ case SE_ALG_GCM_FINAL:
+ return 0;
+ case SE_ALG_CBC:
+ if (encrypt)
+ return SE_CRYPTO_CFG_CBC_ENCRYPT;
+ else
+ return SE_CRYPTO_CFG_CBC_DECRYPT;
+ case SE_ALG_ECB:
+ if (encrypt)
+ return SE_CRYPTO_CFG_ECB_ENCRYPT;
+ else
+ return SE_CRYPTO_CFG_ECB_DECRYPT;
+ case SE_ALG_XTS:
+ if (encrypt)
+ return SE_CRYPTO_CFG_XTS_ENCRYPT;
+ else
+ return SE_CRYPTO_CFG_XTS_DECRYPT;
+
+ case SE_ALG_CTR:
+ return SE_CRYPTO_CFG_CTR;
+ case SE_ALG_CBC_MAC:
+ return SE_CRYPTO_CFG_CBC_MAC;
+
+ default:
+ break;
+ }
+
+ return -EINVAL;
+}
+
+static int tegra234_aes_cfg(u32 alg, bool encrypt)
+{
+ switch (alg) {
+ case SE_ALG_CBC:
+ case SE_ALG_ECB:
+ case SE_ALG_XTS:
+ case SE_ALG_CTR:
+ if (encrypt)
+ return SE_CFG_AES_ENCRYPT;
+ else
+ return SE_CFG_AES_DECRYPT;
+
+ case SE_ALG_GMAC:
+ if (encrypt)
+ return SE_CFG_GMAC_ENCRYPT;
+ else
+ return SE_CFG_GMAC_DECRYPT;
+
+ case SE_ALG_GCM:
+ if (encrypt)
+ return SE_CFG_GCM_ENCRYPT;
+ else
+ return SE_CFG_GCM_DECRYPT;
+
+ case SE_ALG_GCM_FINAL:
+ if (encrypt)
+ return SE_CFG_GCM_FINAL_ENCRYPT;
+ else
+ return SE_CFG_GCM_FINAL_DECRYPT;
+
+ case SE_ALG_CMAC:
+ return SE_CFG_CMAC;
+
+ case SE_ALG_CBC_MAC:
+ return SE_AES_ENC_ALG_AES_ENC |
+ SE_AES_DST_HASH_REG;
+ }
+ return -EINVAL;
+}
+
+static unsigned int tegra_aes_prep_cmd(struct tegra_aes_ctx *ctx,
+ struct tegra_aes_reqctx *rctx)
+{
+ unsigned int data_count, res_bits, i = 0, j;
+ struct tegra_se *se = ctx->se;
+ u32 *cpuvaddr = se->cmdbuf->addr;
+ dma_addr_t addr = rctx->datbuf.addr;
+
+ data_count = rctx->len / AES_BLOCK_SIZE;
+ res_bits = (rctx->len % AES_BLOCK_SIZE) * 8;
+
+ /*
+ * Hardware processes data_count + 1 blocks.
+ * Reduce 1 block if there is no residue
+ */
+ if (!res_bits)
+ data_count--;
+
+ if (rctx->iv) {
+ cpuvaddr[i++] = host1x_opcode_setpayload(SE_CRYPTO_CTR_REG_COUNT);
+ cpuvaddr[i++] = se_host1x_opcode_incr_w(se->hw->regs->linear_ctr);
+ for (j = 0; j < SE_CRYPTO_CTR_REG_COUNT; j++)
+ cpuvaddr[i++] = rctx->iv[j];
+ }
+
+ cpuvaddr[i++] = se_host1x_opcode_nonincr(se->hw->regs->last_blk, 1);
+ cpuvaddr[i++] = SE_LAST_BLOCK_VAL(data_count) |
+ SE_LAST_BLOCK_RES_BITS(res_bits);
+
+ cpuvaddr[i++] = se_host1x_opcode_incr(se->hw->regs->config, 6);
+ cpuvaddr[i++] = rctx->config;
+ cpuvaddr[i++] = rctx->crypto_config;
+
+ /* Source address setting */
+ cpuvaddr[i++] = lower_32_bits(addr);
+ cpuvaddr[i++] = SE_ADDR_HI_MSB(upper_32_bits(addr)) | SE_ADDR_HI_SZ(rctx->len);
+
+ /* Destination address setting */
+ cpuvaddr[i++] = lower_32_bits(addr);
+ cpuvaddr[i++] = SE_ADDR_HI_MSB(upper_32_bits(addr)) |
+ SE_ADDR_HI_SZ(rctx->len);
+
+ cpuvaddr[i++] = se_host1x_opcode_nonincr(se->hw->regs->op, 1);
+ cpuvaddr[i++] = SE_AES_OP_WRSTALL | SE_AES_OP_LASTBUF |
+ SE_AES_OP_START;
+
+ cpuvaddr[i++] = se_host1x_opcode_nonincr(host1x_uclass_incr_syncpt_r(), 1);
+ cpuvaddr[i++] = host1x_uclass_incr_syncpt_cond_f(1) |
+ host1x_uclass_incr_syncpt_indx_f(se->syncpt_id);
+
+ dev_dbg(se->dev, "cfg %#x crypto cfg %#x\n", rctx->config, rctx->crypto_config);
+
+ return i;
+}
+
+static int tegra_aes_do_one_req(struct crypto_engine *engine, void *areq)
+{
+ struct skcipher_request *req = container_of(areq, struct skcipher_request, base);
+ struct tegra_aes_ctx *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
+ struct tegra_aes_reqctx *rctx = skcipher_request_ctx(req);
+ struct tegra_se *se = ctx->se;
+ unsigned int cmdlen;
+ int ret;
+
+ rctx->datbuf.buf = dma_alloc_coherent(se->dev, SE_AES_BUFLEN,
+ &rctx->datbuf.addr, GFP_KERNEL);
+ if (!rctx->datbuf.buf)
+ return -ENOMEM;
+
+ rctx->datbuf.size = SE_AES_BUFLEN;
+ rctx->iv = (u32 *)req->iv;
+ rctx->len = req->cryptlen;
+
+ /* Pad input to AES Block size */
+ if (ctx->alg != SE_ALG_XTS) {
+ if (rctx->len % AES_BLOCK_SIZE)
+ rctx->len += AES_BLOCK_SIZE - (rctx->len % AES_BLOCK_SIZE);
+ }
+
+ scatterwalk_map_and_copy(rctx->datbuf.buf, req->src, 0, req->cryptlen, 0);
+
+ /* Prepare the command and submit for execution */
+ cmdlen = tegra_aes_prep_cmd(ctx, rctx);
+ ret = tegra_se_host1x_submit(se, cmdlen);
+
+ /* Copy the result */
+ tegra_aes_update_iv(req, ctx);
+ scatterwalk_map_and_copy(rctx->datbuf.buf, req->dst, 0, req->cryptlen, 1);
+
+ /* Free the buffer */
+ dma_free_coherent(ctx->se->dev, SE_AES_BUFLEN,
+ rctx->datbuf.buf, rctx->datbuf.addr);
+
+ crypto_finalize_skcipher_request(se->engine, req, ret);
+
+ return 0;
+}
+
+static int tegra_aes_cra_init(struct crypto_skcipher *tfm)
+{
+ struct tegra_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
+ struct tegra_se_alg *se_alg;
+ const char *algname;
+ int ret;
+
+ se_alg = container_of(alg, struct tegra_se_alg, alg.skcipher.base);
+
+ crypto_skcipher_set_reqsize(tfm, sizeof(struct tegra_aes_reqctx));
+
+ ctx->ivsize = crypto_skcipher_ivsize(tfm);
+ ctx->se = se_alg->se_dev;
+ ctx->key1_id = 0;
+ ctx->key2_id = 0;
+
+ algname = crypto_tfm_alg_name(&tfm->base);
+ ret = se_algname_to_algid(algname);
+ if (ret < 0) {
+ dev_err(ctx->se->dev, "invalid algorithm\n");
+ return ret;
+ }
+
+ ctx->alg = ret;
+
+ return 0;
+}
+
+static void tegra_aes_cra_exit(struct crypto_skcipher *tfm)
+{
+ struct tegra_aes_ctx *ctx = crypto_tfm_ctx(&tfm->base);
+
+ if (ctx->key1_id)
+ tegra_key_invalidate(ctx->se, ctx->key1_id, ctx->alg);
+
+ if (ctx->key2_id)
+ tegra_key_invalidate(ctx->se, ctx->key2_id, ctx->alg);
+}
+
+static int tegra_aes_setkey(struct crypto_skcipher *tfm,
+ const u8 *key, u32 keylen)
+{
+ struct tegra_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+ if (aes_check_keylen(keylen)) {
+ dev_dbg(ctx->se->dev, "invalid key length (%d)\n", keylen);
+ return -EINVAL;
+ }
+
+ return tegra_key_submit(ctx->se, key, keylen, ctx->alg, &ctx->key1_id);
+}
+
+static int tegra_xts_setkey(struct crypto_skcipher *tfm,
+ const u8 *key, u32 keylen)
+{
+ struct tegra_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ u32 len = keylen / 2;
+ int ret;
+
+ ret = xts_verify_key(tfm, key, keylen);
+ if (ret || aes_check_keylen(len)) {
+ dev_dbg(ctx->se->dev, "invalid key length (%d)\n", keylen);
+ return -EINVAL;
+ }
+
+ ret = tegra_key_submit(ctx->se, key, len,
+ ctx->alg, &ctx->key1_id);
+ if (ret)
+ return ret;
+
+ return tegra_key_submit(ctx->se, key + len, len,
+ ctx->alg, &ctx->key2_id);
+
+ return 0;
+}
+
+static int tegra_aes_kac_manifest(u32 user, u32 alg, u32 keylen)
+{
+ int manifest;
+
+ manifest = SE_KAC_USER_NS;
+
+ switch (alg) {
+ case SE_ALG_CBC:
+ case SE_ALG_ECB:
+ case SE_ALG_CTR:
+ manifest |= SE_KAC_ENC;
+ break;
+ case SE_ALG_XTS:
+ manifest |= SE_KAC_XTS;
+ break;
+ case SE_ALG_GCM:
+ manifest |= SE_KAC_GCM;
+ break;
+ case SE_ALG_CMAC:
+ manifest |= SE_KAC_CMAC;
+ break;
+ case SE_ALG_CBC_MAC:
+ manifest |= SE_KAC_ENC;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ switch (keylen) {
+ case AES_KEYSIZE_128:
+ manifest |= SE_KAC_SIZE_128;
+ break;
+ case AES_KEYSIZE_192:
+ manifest |= SE_KAC_SIZE_192;
+ break;
+ case AES_KEYSIZE_256:
+ manifest |= SE_KAC_SIZE_256;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return manifest;
+}
+
+static int tegra_aes_crypt(struct skcipher_request *req, bool encrypt)
+
+{
+ struct crypto_skcipher *tfm;
+ struct tegra_aes_ctx *ctx;
+ struct tegra_aes_reqctx *rctx;
+
+ tfm = crypto_skcipher_reqtfm(req);
+ ctx = crypto_skcipher_ctx(tfm);
+ rctx = skcipher_request_ctx(req);
+
+ if (ctx->alg != SE_ALG_XTS) {
+ if (!IS_ALIGNED(req->cryptlen, crypto_skcipher_blocksize(tfm))) {
+ dev_dbg(ctx->se->dev, "invalid length (%d)", req->cryptlen);
+ return -EINVAL;
+ }
+ } else if (req->cryptlen < XTS_BLOCK_SIZE) {
+ dev_dbg(ctx->se->dev, "invalid length (%d)", req->cryptlen);
+ return -EINVAL;
+ }
+
+ if (!req->cryptlen)
+ return 0;
+
+ rctx->encrypt = encrypt;
+ rctx->config = tegra234_aes_cfg(ctx->alg, encrypt);
+ rctx->crypto_config = tegra234_aes_crypto_cfg(ctx->alg, encrypt);
+ rctx->crypto_config |= SE_AES_KEY_INDEX(ctx->key1_id);
+
+ if (ctx->key2_id)
+ rctx->crypto_config |= SE_AES_KEY2_INDEX(ctx->key2_id);
+
+ return crypto_transfer_skcipher_request_to_engine(ctx->se->engine, req);
+}
+
+static int tegra_aes_encrypt(struct skcipher_request *req)
+{
+ return tegra_aes_crypt(req, true);
+}
+
+static int tegra_aes_decrypt(struct skcipher_request *req)
+{
+ return tegra_aes_crypt(req, false);
+}
+
+static struct tegra_se_alg tegra_aes_algs[] = {
+ {
+ .alg.skcipher.op.do_one_request = tegra_aes_do_one_req,
+ .alg.skcipher.base = {
+ .init = tegra_aes_cra_init,
+ .exit = tegra_aes_cra_exit,
+ .setkey = tegra_aes_setkey,
+ .encrypt = tegra_aes_encrypt,
+ .decrypt = tegra_aes_decrypt,
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .base = {
+ .cra_name = "cbc(aes)",
+ .cra_driver_name = "cbc-aes-tegra",
+ .cra_priority = 500,
+ .cra_flags = CRYPTO_ALG_TYPE_SKCIPHER | CRYPTO_ALG_ASYNC,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct tegra_aes_ctx),
+ .cra_alignmask = 0xf,
+ .cra_module = THIS_MODULE,
+ },
+ }
+ }, {
+ .alg.skcipher.op.do_one_request = tegra_aes_do_one_req,
+ .alg.skcipher.base = {
+ .init = tegra_aes_cra_init,
+ .exit = tegra_aes_cra_exit,
+ .setkey = tegra_aes_setkey,
+ .encrypt = tegra_aes_encrypt,
+ .decrypt = tegra_aes_decrypt,
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .base = {
+ .cra_name = "ecb(aes)",
+ .cra_driver_name = "ecb-aes-tegra",
+ .cra_priority = 500,
+ .cra_flags = CRYPTO_ALG_TYPE_SKCIPHER | CRYPTO_ALG_ASYNC,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct tegra_aes_ctx),
+ .cra_alignmask = 0xf,
+ .cra_module = THIS_MODULE,
+ },
+ }
+ }, {
+ .alg.skcipher.op.do_one_request = tegra_aes_do_one_req,
+ .alg.skcipher.base = {
+ .init = tegra_aes_cra_init,
+ .exit = tegra_aes_cra_exit,
+ .setkey = tegra_aes_setkey,
+ .encrypt = tegra_aes_encrypt,
+ .decrypt = tegra_aes_decrypt,
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .base = {
+ .cra_name = "ctr(aes)",
+ .cra_driver_name = "ctr-aes-tegra",
+ .cra_priority = 500,
+ .cra_flags = CRYPTO_ALG_TYPE_SKCIPHER | CRYPTO_ALG_ASYNC,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct tegra_aes_ctx),
+ .cra_alignmask = 0xf,
+ .cra_module = THIS_MODULE,
+ },
+ }
+ }, {
+ .alg.skcipher.op.do_one_request = tegra_aes_do_one_req,
+ .alg.skcipher.base = {
+ .init = tegra_aes_cra_init,
+ .exit = tegra_aes_cra_exit,
+ .setkey = tegra_xts_setkey,
+ .encrypt = tegra_aes_encrypt,
+ .decrypt = tegra_aes_decrypt,
+ .min_keysize = 2 * AES_MIN_KEY_SIZE,
+ .max_keysize = 2 * AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .base = {
+ .cra_name = "xts(aes)",
+ .cra_driver_name = "xts-aes-tegra",
+ .cra_priority = 500,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct tegra_aes_ctx),
+ .cra_alignmask = (__alignof__(u64) - 1),
+ .cra_module = THIS_MODULE,
+ },
+ }
+ },
+};
+
+static unsigned int tegra_gmac_prep_cmd(struct tegra_aead_ctx *ctx,
+ struct tegra_aead_reqctx *rctx)
+{
+ unsigned int data_count, res_bits, i = 0;
+ struct tegra_se *se = ctx->se;
+ u32 *cpuvaddr = se->cmdbuf->addr;
+
+ data_count = (rctx->assoclen / AES_BLOCK_SIZE);
+ res_bits = (rctx->assoclen % AES_BLOCK_SIZE) * 8;
+
+ /*
+ * Hardware processes data_count + 1 blocks.
+ * Reduce 1 block if there is no residue
+ */
+ if (!res_bits)
+ data_count--;
+
+ cpuvaddr[i++] = se_host1x_opcode_nonincr(se->hw->regs->last_blk, 1);
+ cpuvaddr[i++] = SE_LAST_BLOCK_VAL(data_count) |
+ SE_LAST_BLOCK_RES_BITS(res_bits);
+
+ cpuvaddr[i++] = se_host1x_opcode_incr(se->hw->regs->config, 4);
+ cpuvaddr[i++] = rctx->config;
+ cpuvaddr[i++] = rctx->crypto_config;
+ cpuvaddr[i++] = lower_32_bits(rctx->inbuf.addr);
+ cpuvaddr[i++] = SE_ADDR_HI_MSB(upper_32_bits(rctx->inbuf.addr)) |
+ SE_ADDR_HI_SZ(rctx->assoclen);
+
+ cpuvaddr[i++] = se_host1x_opcode_nonincr(se->hw->regs->op, 1);
+ cpuvaddr[i++] = SE_AES_OP_WRSTALL | SE_AES_OP_FINAL |
+ SE_AES_OP_INIT | SE_AES_OP_LASTBUF |
+ SE_AES_OP_START;
+
+ cpuvaddr[i++] = se_host1x_opcode_nonincr(host1x_uclass_incr_syncpt_r(), 1);
+ cpuvaddr[i++] = host1x_uclass_incr_syncpt_cond_f(1) |
+ host1x_uclass_incr_syncpt_indx_f(se->syncpt_id);
+
+ return i;
+}
+
+static unsigned int tegra_gcm_crypt_prep_cmd(struct tegra_aead_ctx *ctx,
+ struct tegra_aead_reqctx *rctx)
+{
+ unsigned int data_count, res_bits, i = 0, j;
+ struct tegra_se *se = ctx->se;
+ u32 *cpuvaddr = se->cmdbuf->addr, op;
+
+ data_count = (rctx->cryptlen / AES_BLOCK_SIZE);
+ res_bits = (rctx->cryptlen % AES_BLOCK_SIZE) * 8;
+ op = SE_AES_OP_WRSTALL | SE_AES_OP_FINAL |
+ SE_AES_OP_LASTBUF | SE_AES_OP_START;
+
+ /*
+ * If there is no assoc data,
+ * this will be the init command
+ */
+ if (!rctx->assoclen)
+ op |= SE_AES_OP_INIT;
+
+ /*
+ * Hardware processes data_count + 1 blocks.
+ * Reduce 1 block if there is no residue
+ */
+ if (!res_bits)
+ data_count--;
+
+ cpuvaddr[i++] = host1x_opcode_setpayload(SE_CRYPTO_CTR_REG_COUNT);
+ cpuvaddr[i++] = se_host1x_opcode_incr_w(se->hw->regs->linear_ctr);
+ for (j = 0; j < SE_CRYPTO_CTR_REG_COUNT; j++)
+ cpuvaddr[i++] = rctx->iv[j];
+
+ cpuvaddr[i++] = se_host1x_opcode_nonincr(se->hw->regs->last_blk, 1);
+ cpuvaddr[i++] = SE_LAST_BLOCK_VAL(data_count) |
+ SE_LAST_BLOCK_RES_BITS(res_bits);
+
+ cpuvaddr[i++] = se_host1x_opcode_incr(se->hw->regs->config, 6);
+ cpuvaddr[i++] = rctx->config;
+ cpuvaddr[i++] = rctx->crypto_config;
+
+ /* Source Address */
+ cpuvaddr[i++] = lower_32_bits(rctx->inbuf.addr);
+ cpuvaddr[i++] = SE_ADDR_HI_MSB(upper_32_bits(rctx->inbuf.addr)) |
+ SE_ADDR_HI_SZ(rctx->cryptlen);
+
+ /* Destination Address */
+ cpuvaddr[i++] = lower_32_bits(rctx->outbuf.addr);
+ cpuvaddr[i++] = SE_ADDR_HI_MSB(upper_32_bits(rctx->outbuf.addr)) |
+ SE_ADDR_HI_SZ(rctx->cryptlen);
+
+ cpuvaddr[i++] = se_host1x_opcode_nonincr(se->hw->regs->op, 1);
+ cpuvaddr[i++] = op;
+
+ cpuvaddr[i++] = se_host1x_opcode_nonincr(host1x_uclass_incr_syncpt_r(), 1);
+ cpuvaddr[i++] = host1x_uclass_incr_syncpt_cond_f(1) |
+ host1x_uclass_incr_syncpt_indx_f(se->syncpt_id);
+
+ dev_dbg(se->dev, "cfg %#x crypto cfg %#x\n", rctx->config, rctx->crypto_config);
+ return i;
+}
+
+static int tegra_gcm_prep_final_cmd(struct tegra_se *se, u32 *cpuvaddr,
+ struct tegra_aead_reqctx *rctx)
+{
+ unsigned int i = 0, j;
+ u32 op;
+
+ op = SE_AES_OP_WRSTALL | SE_AES_OP_FINAL |
+ SE_AES_OP_LASTBUF | SE_AES_OP_START;
+
+ /*
+ * Set init for zero sized vector
+ */
+ if (!rctx->assoclen && !rctx->cryptlen)
+ op |= SE_AES_OP_INIT;
+
+ cpuvaddr[i++] = se_host1x_opcode_incr(se->hw->regs->aad_len, 2);
+ cpuvaddr[i++] = rctx->assoclen * 8;
+ cpuvaddr[i++] = 0;
+
+ cpuvaddr[i++] = se_host1x_opcode_incr(se->hw->regs->cryp_msg_len, 2);
+ cpuvaddr[i++] = rctx->cryptlen * 8;
+ cpuvaddr[i++] = 0;
+
+ cpuvaddr[i++] = host1x_opcode_setpayload(SE_CRYPTO_CTR_REG_COUNT);
+ cpuvaddr[i++] = se_host1x_opcode_incr_w(se->hw->regs->linear_ctr);
+ for (j = 0; j < SE_CRYPTO_CTR_REG_COUNT; j++)
+ cpuvaddr[i++] = rctx->iv[j];
+
+ cpuvaddr[i++] = se_host1x_opcode_incr(se->hw->regs->config, 6);
+ cpuvaddr[i++] = rctx->config;
+ cpuvaddr[i++] = rctx->crypto_config;
+ cpuvaddr[i++] = 0;
+ cpuvaddr[i++] = 0;
+
+ /* Destination Address */
+ cpuvaddr[i++] = lower_32_bits(rctx->outbuf.addr);
+ cpuvaddr[i++] = SE_ADDR_HI_MSB(upper_32_bits(rctx->outbuf.addr)) |
+ SE_ADDR_HI_SZ(0x10); /* HW always generates 128-bit tag */
+
+ cpuvaddr[i++] = se_host1x_opcode_nonincr(se->hw->regs->op, 1);
+ cpuvaddr[i++] = op;
+
+ cpuvaddr[i++] = se_host1x_opcode_nonincr(host1x_uclass_incr_syncpt_r(), 1);
+ cpuvaddr[i++] = host1x_uclass_incr_syncpt_cond_f(1) |
+ host1x_uclass_incr_syncpt_indx_f(se->syncpt_id);
+
+ dev_dbg(se->dev, "cfg %#x crypto cfg %#x\n", rctx->config, rctx->crypto_config);
+
+ return i;
+}
+
+static int tegra_gcm_do_gmac(struct tegra_aead_ctx *ctx, struct tegra_aead_reqctx *rctx)
+{
+ struct tegra_se *se = ctx->se;
+ unsigned int cmdlen;
+
+ scatterwalk_map_and_copy(rctx->inbuf.buf,
+ rctx->src_sg, 0, rctx->assoclen, 0);
+
+ rctx->config = tegra234_aes_cfg(SE_ALG_GMAC, rctx->encrypt);
+ rctx->crypto_config = tegra234_aes_crypto_cfg(SE_ALG_GMAC, rctx->encrypt) |
+ SE_AES_KEY_INDEX(ctx->key_id);
+
+ cmdlen = tegra_gmac_prep_cmd(ctx, rctx);
+
+ return tegra_se_host1x_submit(se, cmdlen);
+}
+
+static int tegra_gcm_do_crypt(struct tegra_aead_ctx *ctx, struct tegra_aead_reqctx *rctx)
+{
+ struct tegra_se *se = ctx->se;
+ int cmdlen, ret;
+
+ scatterwalk_map_and_copy(rctx->inbuf.buf, rctx->src_sg,
+ rctx->assoclen, rctx->cryptlen, 0);
+
+ rctx->config = tegra234_aes_cfg(SE_ALG_GCM, rctx->encrypt);
+ rctx->crypto_config = tegra234_aes_crypto_cfg(SE_ALG_GCM, rctx->encrypt) |
+ SE_AES_KEY_INDEX(ctx->key_id);
+
+ /* Prepare command and submit */
+ cmdlen = tegra_gcm_crypt_prep_cmd(ctx, rctx);
+ ret = tegra_se_host1x_submit(se, cmdlen);
+ if (ret)
+ return ret;
+
+ /* Copy the result */
+ scatterwalk_map_and_copy(rctx->outbuf.buf, rctx->dst_sg,
+ rctx->assoclen, rctx->cryptlen, 1);
+
+ return 0;
+}
+
+static int tegra_gcm_do_final(struct tegra_aead_ctx *ctx, struct tegra_aead_reqctx *rctx)
+{
+ struct tegra_se *se = ctx->se;
+ u32 *cpuvaddr = se->cmdbuf->addr;
+ int cmdlen, ret, offset;
+
+ rctx->config = tegra234_aes_cfg(SE_ALG_GCM_FINAL, rctx->encrypt);
+ rctx->crypto_config = tegra234_aes_crypto_cfg(SE_ALG_GCM_FINAL, rctx->encrypt) |
+ SE_AES_KEY_INDEX(ctx->key_id);
+
+ /* Prepare command and submit */
+ cmdlen = tegra_gcm_prep_final_cmd(se, cpuvaddr, rctx);
+ ret = tegra_se_host1x_submit(se, cmdlen);
+ if (ret)
+ return ret;
+
+ if (rctx->encrypt) {
+ /* Copy the result */
+ offset = rctx->assoclen + rctx->cryptlen;
+ scatterwalk_map_and_copy(rctx->outbuf.buf, rctx->dst_sg,
+ offset, rctx->authsize, 1);
+ }
+
+ return 0;
+}
+
+static int tegra_gcm_do_verify(struct tegra_se *se, struct tegra_aead_reqctx *rctx)
+{
+ unsigned int offset;
+ u8 mac[16];
+
+ offset = rctx->assoclen + rctx->cryptlen;
+ scatterwalk_map_and_copy(mac, rctx->src_sg, offset, rctx->authsize, 0);
+
+ if (crypto_memneq(rctx->outbuf.buf, mac, rctx->authsize))
+ return -EBADMSG;
+
+ return 0;
+}
+
+static inline int tegra_ccm_check_iv(const u8 *iv)
+{
+ /* iv[0] gives value of q-1
+ * 2 <= q <= 8 as per NIST 800-38C notation
+ * 2 <= L <= 8, so 1 <= L' <= 7. as per rfc 3610 notation
+ */
+ if (iv[0] < 1 || iv[0] > 7) {
+ pr_debug("ccm_check_iv failed %d\n", iv[0]);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static unsigned int tegra_cbcmac_prep_cmd(struct tegra_aead_ctx *ctx,
+ struct tegra_aead_reqctx *rctx)
+{
+ unsigned int data_count, i = 0;
+ struct tegra_se *se = ctx->se;
+ u32 *cpuvaddr = se->cmdbuf->addr;
+
+ data_count = (rctx->inbuf.size / AES_BLOCK_SIZE) - 1;
+
+ cpuvaddr[i++] = se_host1x_opcode_nonincr(se->hw->regs->last_blk, 1);
+ cpuvaddr[i++] = SE_LAST_BLOCK_VAL(data_count);
+
+ cpuvaddr[i++] = se_host1x_opcode_incr(se->hw->regs->config, 6);
+ cpuvaddr[i++] = rctx->config;
+ cpuvaddr[i++] = rctx->crypto_config;
+
+ cpuvaddr[i++] = lower_32_bits(rctx->inbuf.addr);
+ cpuvaddr[i++] = SE_ADDR_HI_MSB(upper_32_bits(rctx->inbuf.addr)) |
+ SE_ADDR_HI_SZ(rctx->inbuf.size);
+
+ cpuvaddr[i++] = lower_32_bits(rctx->outbuf.addr);
+ cpuvaddr[i++] = SE_ADDR_HI_MSB(upper_32_bits(rctx->outbuf.addr)) |
+ SE_ADDR_HI_SZ(0x10); /* HW always generates 128 bit tag */
+
+ cpuvaddr[i++] = se_host1x_opcode_nonincr(se->hw->regs->op, 1);
+ cpuvaddr[i++] = SE_AES_OP_WRSTALL |
+ SE_AES_OP_LASTBUF | SE_AES_OP_START;
+
+ cpuvaddr[i++] = se_host1x_opcode_nonincr(host1x_uclass_incr_syncpt_r(), 1);
+ cpuvaddr[i++] = host1x_uclass_incr_syncpt_cond_f(1) |
+ host1x_uclass_incr_syncpt_indx_f(se->syncpt_id);
+
+ return i;
+}
+
+static unsigned int tegra_ctr_prep_cmd(struct tegra_aead_ctx *ctx,
+ struct tegra_aead_reqctx *rctx)
+{
+ unsigned int i = 0, j;
+ struct tegra_se *se = ctx->se;
+ u32 *cpuvaddr = se->cmdbuf->addr;
+
+ cpuvaddr[i++] = host1x_opcode_setpayload(SE_CRYPTO_CTR_REG_COUNT);
+ cpuvaddr[i++] = se_host1x_opcode_incr_w(se->hw->regs->linear_ctr);
+ for (j = 0; j < SE_CRYPTO_CTR_REG_COUNT; j++)
+ cpuvaddr[i++] = rctx->iv[j];
+
+ cpuvaddr[i++] = se_host1x_opcode_nonincr(se->hw->regs->last_blk, 1);
+ cpuvaddr[i++] = (rctx->inbuf.size / AES_BLOCK_SIZE) - 1;
+ cpuvaddr[i++] = se_host1x_opcode_incr(se->hw->regs->config, 6);
+ cpuvaddr[i++] = rctx->config;
+ cpuvaddr[i++] = rctx->crypto_config;
+
+ /* Source address setting */
+ cpuvaddr[i++] = lower_32_bits(rctx->inbuf.addr);
+ cpuvaddr[i++] = SE_ADDR_HI_MSB(upper_32_bits(rctx->inbuf.addr)) |
+ SE_ADDR_HI_SZ(rctx->inbuf.size);
+
+ /* Destination address setting */
+ cpuvaddr[i++] = lower_32_bits(rctx->outbuf.addr);
+ cpuvaddr[i++] = SE_ADDR_HI_MSB(upper_32_bits(rctx->outbuf.addr)) |
+ SE_ADDR_HI_SZ(rctx->inbuf.size);
+
+ cpuvaddr[i++] = se_host1x_opcode_nonincr(se->hw->regs->op, 1);
+ cpuvaddr[i++] = SE_AES_OP_WRSTALL | SE_AES_OP_LASTBUF |
+ SE_AES_OP_START;
+
+ cpuvaddr[i++] = se_host1x_opcode_nonincr(host1x_uclass_incr_syncpt_r(), 1);
+ cpuvaddr[i++] = host1x_uclass_incr_syncpt_cond_f(1) |
+ host1x_uclass_incr_syncpt_indx_f(se->syncpt_id);
+
+ dev_dbg(se->dev, "cfg %#x crypto cfg %#x\n",
+ rctx->config, rctx->crypto_config);
+
+ return i;
+}
+
+static int tegra_ccm_do_cbcmac(struct tegra_aead_ctx *ctx, struct tegra_aead_reqctx *rctx)
+{
+ struct tegra_se *se = ctx->se;
+ int cmdlen;
+
+ rctx->config = tegra234_aes_cfg(SE_ALG_CBC_MAC, rctx->encrypt);
+ rctx->crypto_config = tegra234_aes_crypto_cfg(SE_ALG_CBC_MAC,
+ rctx->encrypt) |
+ SE_AES_KEY_INDEX(ctx->key_id);
+
+ /* Prepare command and submit */
+ cmdlen = tegra_cbcmac_prep_cmd(ctx, rctx);
+
+ return tegra_se_host1x_submit(se, cmdlen);
+}
+
+static int tegra_ccm_set_msg_len(u8 *block, unsigned int msglen, int csize)
+{
+ __be32 data;
+
+ memset(block, 0, csize);
+ block += csize;
+
+ if (csize >= 4)
+ csize = 4;
+ else if (msglen > (1 << (8 * csize)))
+ return -EOVERFLOW;
+
+ data = cpu_to_be32(msglen);
+ memcpy(block - csize, (u8 *)&data + 4 - csize, csize);
+
+ return 0;
+}
+
+static int tegra_ccm_format_nonce(struct tegra_aead_reqctx *rctx, u8 *nonce)
+{
+ unsigned int q, t;
+ u8 *q_ptr, *iv = (u8 *)rctx->iv;
+
+ memcpy(nonce, rctx->iv, 16);
+
+ /*** 1. Prepare Flags Octet ***/
+
+ /* Encode t (mac length) */
+ t = rctx->authsize;
+ nonce[0] |= (((t - 2) / 2) << 3);
+
+ /* Adata */
+ if (rctx->assoclen)
+ nonce[0] |= (1 << 6);
+
+ /*** Encode Q - message length ***/
+ q = iv[0] + 1;
+ q_ptr = nonce + 16 - q;
+
+ return tegra_ccm_set_msg_len(q_ptr, rctx->cryptlen, q);
+}
+
+static int tegra_ccm_format_adata(u8 *adata, unsigned int a)
+{
+ int len = 0;
+
+ /* add control info for associated data
+ * RFC 3610 and NIST Special Publication 800-38C
+ */
+ if (a < 65280) {
+ *(__be16 *)adata = cpu_to_be16(a);
+ len = 2;
+ } else {
+ *(__be16 *)adata = cpu_to_be16(0xfffe);
+ *(__be32 *)&adata[2] = cpu_to_be32(a);
+ len = 6;
+ }
+
+ return len;
+}
+
+static int tegra_ccm_add_padding(u8 *buf, unsigned int len)
+{
+ unsigned int padlen = 16 - (len % 16);
+ u8 padding[16] = {0};
+
+ if (padlen == 16)
+ return 0;
+
+ memcpy(buf, padding, padlen);
+
+ return padlen;
+}
+
+static int tegra_ccm_format_blocks(struct tegra_aead_reqctx *rctx)
+{
+ unsigned int alen = 0, offset = 0;
+ u8 nonce[16], adata[16];
+ int ret;
+
+ ret = tegra_ccm_format_nonce(rctx, nonce);
+ if (ret)
+ return ret;
+
+ memcpy(rctx->inbuf.buf, nonce, 16);
+ offset = 16;
+
+ if (rctx->assoclen) {
+ alen = tegra_ccm_format_adata(adata, rctx->assoclen);
+ memcpy(rctx->inbuf.buf + offset, adata, alen);
+ offset += alen;
+
+ scatterwalk_map_and_copy(rctx->inbuf.buf + offset,
+ rctx->src_sg, 0, rctx->assoclen, 0);
+
+ offset += rctx->assoclen;
+ offset += tegra_ccm_add_padding(rctx->inbuf.buf + offset,
+ rctx->assoclen + alen);
+ }
+
+ return offset;
+}
+
+static int tegra_ccm_mac_result(struct tegra_se *se, struct tegra_aead_reqctx *rctx)
+{
+ u32 result[16];
+ int i, ret;
+
+ /* Read and clear Result */
+ for (i = 0; i < CMAC_RESULT_REG_COUNT; i++)
+ result[i] = readl(se->base + se->hw->regs->result + (i * 4));
+
+ for (i = 0; i < CMAC_RESULT_REG_COUNT; i++)
+ writel(0, se->base + se->hw->regs->result + (i * 4));
+
+ if (rctx->encrypt) {
+ memcpy(rctx->authdata, result, rctx->authsize);
+ } else {
+ ret = crypto_memneq(rctx->authdata, result, rctx->authsize);
+ if (ret)
+ return -EBADMSG;
+ }
+
+ return 0;
+}
+
+static int tegra_ccm_ctr_result(struct tegra_se *se, struct tegra_aead_reqctx *rctx)
+{
+ /* Copy result */
+ scatterwalk_map_and_copy(rctx->outbuf.buf + 16, rctx->dst_sg,
+ rctx->assoclen, rctx->cryptlen, 1);
+
+ if (rctx->encrypt)
+ scatterwalk_map_and_copy(rctx->outbuf.buf, rctx->dst_sg,
+ rctx->assoclen + rctx->cryptlen,
+ rctx->authsize, 1);
+ else
+ memcpy(rctx->authdata, rctx->outbuf.buf, rctx->authsize);
+
+ return 0;
+}
+
+static int tegra_ccm_compute_auth(struct tegra_aead_ctx *ctx, struct tegra_aead_reqctx *rctx)
+{
+ struct tegra_se *se = ctx->se;
+ struct scatterlist *sg;
+ int offset, ret;
+
+ offset = tegra_ccm_format_blocks(rctx);
+ if (offset < 0)
+ return -EINVAL;
+
+ /* Copy plain text to the buffer */
+ sg = rctx->encrypt ? rctx->src_sg : rctx->dst_sg;
+
+ scatterwalk_map_and_copy(rctx->inbuf.buf + offset,
+ sg, rctx->assoclen,
+ rctx->cryptlen, 0);
+ offset += rctx->cryptlen;
+ offset += tegra_ccm_add_padding(rctx->inbuf.buf + offset, rctx->cryptlen);
+
+ rctx->inbuf.size = offset;
+
+ ret = tegra_ccm_do_cbcmac(ctx, rctx);
+ if (ret)
+ return ret;
+
+ return tegra_ccm_mac_result(se, rctx);
+}
+
+static int tegra_ccm_do_ctr(struct tegra_aead_ctx *ctx, struct tegra_aead_reqctx *rctx)
+{
+ struct tegra_se *se = ctx->se;
+ unsigned int cmdlen, offset = 0;
+ struct scatterlist *sg = rctx->src_sg;
+ int ret;
+
+ rctx->config = tegra234_aes_cfg(SE_ALG_CTR, rctx->encrypt);
+ rctx->crypto_config = tegra234_aes_crypto_cfg(SE_ALG_CTR, rctx->encrypt) |
+ SE_AES_KEY_INDEX(ctx->key_id);
+
+ /* Copy authdata in the top of buffer for encryption/decryption */
+ if (rctx->encrypt)
+ memcpy(rctx->inbuf.buf, rctx->authdata, rctx->authsize);
+ else
+ scatterwalk_map_and_copy(rctx->inbuf.buf, sg,
+ rctx->assoclen + rctx->cryptlen,
+ rctx->authsize, 0);
+
+ offset += rctx->authsize;
+ offset += tegra_ccm_add_padding(rctx->inbuf.buf + offset, rctx->authsize);
+
+ /* If there is no cryptlen, proceed to submit the task */
+ if (rctx->cryptlen) {
+ scatterwalk_map_and_copy(rctx->inbuf.buf + offset, sg,
+ rctx->assoclen, rctx->cryptlen, 0);
+ offset += rctx->cryptlen;
+ offset += tegra_ccm_add_padding(rctx->inbuf.buf + offset, rctx->cryptlen);
+ }
+
+ rctx->inbuf.size = offset;
+
+ /* Prepare command and submit */
+ cmdlen = tegra_ctr_prep_cmd(ctx, rctx);
+ ret = tegra_se_host1x_submit(se, cmdlen);
+ if (ret)
+ return ret;
+
+ return tegra_ccm_ctr_result(se, rctx);
+}
+
+static int tegra_ccm_crypt_init(struct aead_request *req, struct tegra_se *se,
+ struct tegra_aead_reqctx *rctx)
+{
+ struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+ u8 *iv = (u8 *)rctx->iv;
+ int ret, i;
+
+ rctx->src_sg = req->src;
+ rctx->dst_sg = req->dst;
+ rctx->assoclen = req->assoclen;
+ rctx->authsize = crypto_aead_authsize(tfm);
+
+ memcpy(iv, req->iv, 16);
+
+ ret = tegra_ccm_check_iv(iv);
+ if (ret)
+ return ret;
+
+ /* Note: rfc 3610 and NIST 800-38C require counter (ctr_0) of
+ * zero to encrypt auth tag.
+ * req->iv has the formatted ctr_0 (i.e. Flags || N || 0).
+ */
+ memset(iv + 15 - iv[0], 0, iv[0] + 1);
+
+ /* Clear any previous result */
+ for (i = 0; i < CMAC_RESULT_REG_COUNT; i++)
+ writel(0, se->base + se->hw->regs->result + (i * 4));
+
+ return 0;
+}
+
+static int tegra_ccm_do_one_req(struct crypto_engine *engine, void *areq)
+{
+ struct aead_request *req = container_of(areq, struct aead_request, base);
+ struct tegra_aead_reqctx *rctx = aead_request_ctx(req);
+ struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+ struct tegra_aead_ctx *ctx = crypto_aead_ctx(tfm);
+ struct tegra_se *se = ctx->se;
+ int ret;
+
+ /* Allocate buffers required */
+ rctx->inbuf.buf = dma_alloc_coherent(ctx->se->dev, SE_AES_BUFLEN,
+ &rctx->inbuf.addr, GFP_KERNEL);
+ if (!rctx->inbuf.buf)
+ return -ENOMEM;
+
+ rctx->inbuf.size = SE_AES_BUFLEN;
+
+ rctx->outbuf.buf = dma_alloc_coherent(ctx->se->dev, SE_AES_BUFLEN,
+ &rctx->outbuf.addr, GFP_KERNEL);
+ if (!rctx->outbuf.buf) {
+ ret = -ENOMEM;
+ goto outbuf_err;
+ }
+
+ rctx->outbuf.size = SE_AES_BUFLEN;
+
+ ret = tegra_ccm_crypt_init(req, se, rctx);
+ if (ret)
+ goto out;
+
+ if (rctx->encrypt) {
+ rctx->cryptlen = req->cryptlen;
+
+ /* CBC MAC Operation */
+ ret = tegra_ccm_compute_auth(ctx, rctx);
+ if (ret)
+ goto out;
+
+ /* CTR operation */
+ ret = tegra_ccm_do_ctr(ctx, rctx);
+ if (ret)
+ goto out;
+ } else {
+ rctx->cryptlen = req->cryptlen - ctx->authsize;
+ if (ret)
+ goto out;
+
+ /* CTR operation */
+ ret = tegra_ccm_do_ctr(ctx, rctx);
+ if (ret)
+ goto out;
+
+ /* CBC MAC Operation */
+ ret = tegra_ccm_compute_auth(ctx, rctx);
+ if (ret)
+ goto out;
+ }
+
+out:
+ dma_free_coherent(ctx->se->dev, SE_AES_BUFLEN,
+ rctx->outbuf.buf, rctx->outbuf.addr);
+
+outbuf_err:
+ dma_free_coherent(ctx->se->dev, SE_AES_BUFLEN,
+ rctx->inbuf.buf, rctx->inbuf.addr);
+
+ crypto_finalize_aead_request(ctx->se->engine, req, ret);
+
+ return 0;
+}
+
+static int tegra_gcm_do_one_req(struct crypto_engine *engine, void *areq)
+{
+ struct aead_request *req = container_of(areq, struct aead_request, base);
+ struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+ struct tegra_aead_ctx *ctx = crypto_aead_ctx(tfm);
+ struct tegra_aead_reqctx *rctx = aead_request_ctx(req);
+ int ret;
+
+ /* Allocate buffers required */
+ rctx->inbuf.buf = dma_alloc_coherent(ctx->se->dev, SE_AES_BUFLEN,
+ &rctx->inbuf.addr, GFP_KERNEL);
+ if (!rctx->inbuf.buf)
+ return -ENOMEM;
+
+ rctx->inbuf.size = SE_AES_BUFLEN;
+
+ rctx->outbuf.buf = dma_alloc_coherent(ctx->se->dev, SE_AES_BUFLEN,
+ &rctx->outbuf.addr, GFP_KERNEL);
+ if (!rctx->outbuf.buf) {
+ ret = -ENOMEM;
+ goto outbuf_err;
+ }
+
+ rctx->outbuf.size = SE_AES_BUFLEN;
+
+ rctx->src_sg = req->src;
+ rctx->dst_sg = req->dst;
+ rctx->assoclen = req->assoclen;
+ rctx->authsize = crypto_aead_authsize(tfm);
+
+ if (rctx->encrypt)
+ rctx->cryptlen = req->cryptlen;
+ else
+ rctx->cryptlen = req->cryptlen - ctx->authsize;
+
+ memcpy(rctx->iv, req->iv, GCM_AES_IV_SIZE);
+ rctx->iv[3] = (1 << 24);
+
+ /* If there is associated data perform GMAC operation */
+ if (rctx->assoclen) {
+ ret = tegra_gcm_do_gmac(ctx, rctx);
+ if (ret)
+ goto out;
+ }
+
+ /* GCM Encryption/Decryption operation */
+ if (rctx->cryptlen) {
+ ret = tegra_gcm_do_crypt(ctx, rctx);
+ if (ret)
+ goto out;
+ }
+
+ /* GCM_FINAL operation */
+ ret = tegra_gcm_do_final(ctx, rctx);
+ if (ret)
+ goto out;
+
+ if (!rctx->encrypt)
+ ret = tegra_gcm_do_verify(ctx->se, rctx);
+
+out:
+ dma_free_coherent(ctx->se->dev, SE_AES_BUFLEN,
+ rctx->outbuf.buf, rctx->outbuf.addr);
+
+outbuf_err:
+ dma_free_coherent(ctx->se->dev, SE_AES_BUFLEN,
+ rctx->inbuf.buf, rctx->inbuf.addr);
+
+ /* Finalize the request if there are no errors */
+ crypto_finalize_aead_request(ctx->se->engine, req, ret);
+
+ return 0;
+}
+
+static int tegra_aead_cra_init(struct crypto_aead *tfm)
+{
+ struct tegra_aead_ctx *ctx = crypto_aead_ctx(tfm);
+ struct aead_alg *alg = crypto_aead_alg(tfm);
+ struct tegra_se_alg *se_alg;
+ const char *algname;
+ int ret;
+
+ algname = crypto_tfm_alg_name(&tfm->base);
+
+ se_alg = container_of(alg, struct tegra_se_alg, alg.aead.base);
+
+ crypto_aead_set_reqsize(tfm, sizeof(struct tegra_aead_reqctx));
+
+ ctx->se = se_alg->se_dev;
+ ctx->key_id = 0;
+
+ ret = se_algname_to_algid(algname);
+ if (ret < 0) {
+ dev_err(ctx->se->dev, "invalid algorithm\n");
+ return ret;
+ }
+
+ ctx->alg = ret;
+
+ return 0;
+}
+
+static int tegra_ccm_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
+{
+ struct tegra_aead_ctx *ctx = crypto_aead_ctx(tfm);
+
+ switch (authsize) {
+ case 4:
+ case 6:
+ case 8:
+ case 10:
+ case 12:
+ case 14:
+ case 16:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ ctx->authsize = authsize;
+
+ return 0;
+}
+
+static int tegra_gcm_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
+{
+ struct tegra_aead_ctx *ctx = crypto_aead_ctx(tfm);
+ int ret;
+
+ ret = crypto_gcm_check_authsize(authsize);
+ if (ret)
+ return ret;
+
+ ctx->authsize = authsize;
+
+ return 0;
+}
+
+static void tegra_aead_cra_exit(struct crypto_aead *tfm)
+{
+ struct tegra_aead_ctx *ctx = crypto_tfm_ctx(&tfm->base);
+
+ if (ctx->key_id)
+ tegra_key_invalidate(ctx->se, ctx->key_id, ctx->alg);
+}
+
+static int tegra_aead_crypt(struct aead_request *req, bool encrypt)
+{
+ struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+ struct tegra_aead_ctx *ctx = crypto_aead_ctx(tfm);
+ struct tegra_aead_reqctx *rctx = aead_request_ctx(req);
+
+ rctx->encrypt = encrypt;
+
+ return crypto_transfer_aead_request_to_engine(ctx->se->engine, req);
+}
+
+static int tegra_aead_encrypt(struct aead_request *req)
+{
+ return tegra_aead_crypt(req, true);
+}
+
+static int tegra_aead_decrypt(struct aead_request *req)
+{
+ return tegra_aead_crypt(req, false);
+}
+
+static int tegra_aead_setkey(struct crypto_aead *tfm,
+ const u8 *key, u32 keylen)
+{
+ struct tegra_aead_ctx *ctx = crypto_aead_ctx(tfm);
+
+ if (aes_check_keylen(keylen)) {
+ dev_dbg(ctx->se->dev, "invalid key length (%d)\n", keylen);
+ return -EINVAL;
+ }
+
+ return tegra_key_submit(ctx->se, key, keylen, ctx->alg, &ctx->key_id);
+}
+
+static unsigned int tegra_cmac_prep_cmd(struct tegra_cmac_ctx *ctx,
+ struct tegra_cmac_reqctx *rctx)
+{
+ unsigned int data_count, res_bits = 0, i = 0, j;
+ struct tegra_se *se = ctx->se;
+ u32 *cpuvaddr = se->cmdbuf->addr, op;
+
+ data_count = (rctx->datbuf.size / AES_BLOCK_SIZE);
+
+ op = SE_AES_OP_WRSTALL | SE_AES_OP_START | SE_AES_OP_LASTBUF;
+
+ if (!(rctx->task & SHA_UPDATE)) {
+ op |= SE_AES_OP_FINAL;
+ res_bits = (rctx->datbuf.size % AES_BLOCK_SIZE) * 8;
+ }
+
+ if (!res_bits && data_count)
+ data_count--;
+
+ if (rctx->task & SHA_FIRST) {
+ rctx->task &= ~SHA_FIRST;
+
+ cpuvaddr[i++] = host1x_opcode_setpayload(SE_CRYPTO_CTR_REG_COUNT);
+ cpuvaddr[i++] = se_host1x_opcode_incr_w(se->hw->regs->linear_ctr);
+ /* Load 0 IV */
+ for (j = 0; j < SE_CRYPTO_CTR_REG_COUNT; j++)
+ cpuvaddr[i++] = 0;
+ }
+
+ cpuvaddr[i++] = se_host1x_opcode_nonincr(se->hw->regs->last_blk, 1);
+ cpuvaddr[i++] = SE_LAST_BLOCK_VAL(data_count) |
+ SE_LAST_BLOCK_RES_BITS(res_bits);
+
+ cpuvaddr[i++] = se_host1x_opcode_incr(se->hw->regs->config, 6);
+ cpuvaddr[i++] = rctx->config;
+ cpuvaddr[i++] = rctx->crypto_config;
+
+ /* Source Address */
+ cpuvaddr[i++] = lower_32_bits(rctx->datbuf.addr);
+ cpuvaddr[i++] = SE_ADDR_HI_MSB(upper_32_bits(rctx->datbuf.addr)) |
+ SE_ADDR_HI_SZ(rctx->datbuf.size);
+ cpuvaddr[i++] = 0;
+ cpuvaddr[i++] = SE_ADDR_HI_SZ(AES_BLOCK_SIZE);
+
+ cpuvaddr[i++] = se_host1x_opcode_nonincr(se->hw->regs->op, 1);
+ cpuvaddr[i++] = op;
+
+ cpuvaddr[i++] = se_host1x_opcode_nonincr(host1x_uclass_incr_syncpt_r(), 1);
+ cpuvaddr[i++] = host1x_uclass_incr_syncpt_cond_f(1) |
+ host1x_uclass_incr_syncpt_indx_f(se->syncpt_id);
+
+ return i;
+}
+
+static void tegra_cmac_copy_result(struct tegra_se *se, struct tegra_cmac_reqctx *rctx)
+{
+ int i;
+
+ for (i = 0; i < CMAC_RESULT_REG_COUNT; i++)
+ rctx->result[i] = readl(se->base + se->hw->regs->result + (i * 4));
+}
+
+static void tegra_cmac_paste_result(struct tegra_se *se, struct tegra_cmac_reqctx *rctx)
+{
+ int i;
+
+ for (i = 0; i < CMAC_RESULT_REG_COUNT; i++)
+ writel(rctx->result[i],
+ se->base + se->hw->regs->result + (i * 4));
+}
+
+static int tegra_cmac_do_update(struct ahash_request *req)
+{
+ struct tegra_cmac_reqctx *rctx = ahash_request_ctx(req);
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct tegra_cmac_ctx *ctx = crypto_ahash_ctx(tfm);
+ struct tegra_se *se = ctx->se;
+ unsigned int nblks, nresidue, cmdlen;
+ int ret;
+
+ if (!req->nbytes)
+ return 0;
+
+ nresidue = (req->nbytes + rctx->residue.size) % rctx->blk_size;
+ nblks = (req->nbytes + rctx->residue.size) / rctx->blk_size;
+
+ /*
+ * Reserve the last block as residue during final() to process.
+ */
+ if (!nresidue && nblks) {
+ nresidue += rctx->blk_size;
+ nblks--;
+ }
+
+ rctx->src_sg = req->src;
+ rctx->datbuf.size = (req->nbytes + rctx->residue.size) - nresidue;
+ rctx->total_len += rctx->datbuf.size;
+ rctx->config = tegra234_aes_cfg(SE_ALG_CMAC, 0);
+ rctx->crypto_config = SE_AES_KEY_INDEX(ctx->key_id);
+
+ /*
+ * Keep one block and residue bytes in residue and
+ * return. The bytes will be processed in final()
+ */
+ if (nblks < 1) {
+ scatterwalk_map_and_copy(rctx->residue.buf + rctx->residue.size,
+ rctx->src_sg, 0, req->nbytes, 0);
+
+ rctx->residue.size += req->nbytes;
+ return 0;
+ }
+
+ /* Copy the previous residue first */
+ if (rctx->residue.size)
+ memcpy(rctx->datbuf.buf, rctx->residue.buf, rctx->residue.size);
+
+ scatterwalk_map_and_copy(rctx->datbuf.buf + rctx->residue.size,
+ rctx->src_sg, 0, req->nbytes - nresidue, 0);
+
+ scatterwalk_map_and_copy(rctx->residue.buf, rctx->src_sg,
+ req->nbytes - nresidue, nresidue, 0);
+
+ /* Update residue value with the residue after current block */
+ rctx->residue.size = nresidue;
+
+ /*
+ * If this is not the first 'update' call, paste the previous copied
+ * intermediate results to the registers so that it gets picked up.
+ * This is to support the import/export functionality.
+ */
+ if (!(rctx->task & SHA_FIRST))
+ tegra_cmac_paste_result(ctx->se, rctx);
+
+ cmdlen = tegra_cmac_prep_cmd(ctx, rctx);
+
+ ret = tegra_se_host1x_submit(se, cmdlen);
+ /*
+ * If this is not the final update, copy the intermediate results
+ * from the registers so that it can be used in the next 'update'
+ * call. This is to support the import/export functionality.
+ */
+ if (!(rctx->task & SHA_FINAL))
+ tegra_cmac_copy_result(ctx->se, rctx);
+
+ return ret;
+}
+
+static int tegra_cmac_do_final(struct ahash_request *req)
+{
+ struct tegra_cmac_reqctx *rctx = ahash_request_ctx(req);
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct tegra_cmac_ctx *ctx = crypto_ahash_ctx(tfm);
+ struct tegra_se *se = ctx->se;
+ u32 *result = (u32 *)req->result;
+ int ret = 0, i, cmdlen;
+
+ if (!req->nbytes && !rctx->total_len && ctx->fallback_tfm) {
+ return crypto_shash_tfm_digest(ctx->fallback_tfm,
+ rctx->datbuf.buf, 0, req->result);
+ }
+
+ memcpy(rctx->datbuf.buf, rctx->residue.buf, rctx->residue.size);
+ rctx->datbuf.size = rctx->residue.size;
+ rctx->total_len += rctx->residue.size;
+ rctx->config = tegra234_aes_cfg(SE_ALG_CMAC, 0);
+
+ /* Prepare command and submit */
+ cmdlen = tegra_cmac_prep_cmd(ctx, rctx);
+ ret = tegra_se_host1x_submit(se, cmdlen);
+ if (ret)
+ goto out;
+
+ /* Read and clear Result register */
+ for (i = 0; i < CMAC_RESULT_REG_COUNT; i++)
+ result[i] = readl(se->base + se->hw->regs->result + (i * 4));
+
+ for (i = 0; i < CMAC_RESULT_REG_COUNT; i++)
+ writel(0, se->base + se->hw->regs->result + (i * 4));
+
+out:
+ dma_free_coherent(se->dev, SE_SHA_BUFLEN,
+ rctx->datbuf.buf, rctx->datbuf.addr);
+ dma_free_coherent(se->dev, crypto_ahash_blocksize(tfm) * 2,
+ rctx->residue.buf, rctx->residue.addr);
+ return ret;
+}
+
+static int tegra_cmac_do_one_req(struct crypto_engine *engine, void *areq)
+{
+ struct ahash_request *req = ahash_request_cast(areq);
+ struct tegra_cmac_reqctx *rctx = ahash_request_ctx(req);
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct tegra_cmac_ctx *ctx = crypto_ahash_ctx(tfm);
+ struct tegra_se *se = ctx->se;
+ int ret;
+
+ if (rctx->task & SHA_UPDATE) {
+ ret = tegra_cmac_do_update(req);
+ rctx->task &= ~SHA_UPDATE;
+ }
+
+ if (rctx->task & SHA_FINAL) {
+ ret = tegra_cmac_do_final(req);
+ rctx->task &= ~SHA_FINAL;
+ }
+
+ crypto_finalize_hash_request(se->engine, req, ret);
+
+ return 0;
+}
+
+static void tegra_cmac_init_fallback(struct crypto_ahash *tfm, struct tegra_cmac_ctx *ctx,
+ const char *algname)
+{
+ unsigned int statesize;
+
+ ctx->fallback_tfm = crypto_alloc_shash(algname, 0, CRYPTO_ALG_NEED_FALLBACK);
+
+ if (IS_ERR(ctx->fallback_tfm)) {
+ dev_warn(ctx->se->dev, "failed to allocate fallback for %s\n", algname);
+ ctx->fallback_tfm = NULL;
+ return;
+ }
+
+ statesize = crypto_shash_statesize(ctx->fallback_tfm);
+
+ if (statesize > sizeof(struct tegra_cmac_reqctx))
+ crypto_ahash_set_statesize(tfm, statesize);
+}
+
+static int tegra_cmac_cra_init(struct crypto_tfm *tfm)
+{
+ struct tegra_cmac_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct crypto_ahash *ahash_tfm = __crypto_ahash_cast(tfm);
+ struct ahash_alg *alg = __crypto_ahash_alg(tfm->__crt_alg);
+ struct tegra_se_alg *se_alg;
+ const char *algname;
+ int ret;
+
+ algname = crypto_tfm_alg_name(tfm);
+ se_alg = container_of(alg, struct tegra_se_alg, alg.ahash.base);
+
+ crypto_ahash_set_reqsize(ahash_tfm, sizeof(struct tegra_cmac_reqctx));
+
+ ctx->se = se_alg->se_dev;
+ ctx->key_id = 0;
+
+ ret = se_algname_to_algid(algname);
+ if (ret < 0) {
+ dev_err(ctx->se->dev, "invalid algorithm\n");
+ return ret;
+ }
+
+ ctx->alg = ret;
+
+ tegra_cmac_init_fallback(ahash_tfm, ctx, algname);
+
+ return 0;
+}
+
+static void tegra_cmac_cra_exit(struct crypto_tfm *tfm)
+{
+ struct tegra_cmac_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ if (ctx->fallback_tfm)
+ crypto_free_shash(ctx->fallback_tfm);
+
+ tegra_key_invalidate(ctx->se, ctx->key_id, ctx->alg);
+}
+
+static int tegra_cmac_init(struct ahash_request *req)
+{
+ struct tegra_cmac_reqctx *rctx = ahash_request_ctx(req);
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct tegra_cmac_ctx *ctx = crypto_ahash_ctx(tfm);
+ struct tegra_se *se = ctx->se;
+ int i;
+
+ rctx->total_len = 0;
+ rctx->datbuf.size = 0;
+ rctx->residue.size = 0;
+ rctx->task = SHA_FIRST;
+ rctx->blk_size = crypto_ahash_blocksize(tfm);
+
+ rctx->residue.buf = dma_alloc_coherent(se->dev, rctx->blk_size * 2,
+ &rctx->residue.addr, GFP_KERNEL);
+ if (!rctx->residue.buf)
+ goto resbuf_fail;
+
+ rctx->residue.size = 0;
+
+ rctx->datbuf.buf = dma_alloc_coherent(se->dev, SE_SHA_BUFLEN,
+ &rctx->datbuf.addr, GFP_KERNEL);
+ if (!rctx->datbuf.buf)
+ goto datbuf_fail;
+
+ rctx->datbuf.size = 0;
+
+ /* Clear any previous result */
+ for (i = 0; i < CMAC_RESULT_REG_COUNT; i++)
+ writel(0, se->base + se->hw->regs->result + (i * 4));
+
+ return 0;
+
+datbuf_fail:
+ dma_free_coherent(se->dev, rctx->blk_size, rctx->residue.buf,
+ rctx->residue.addr);
+resbuf_fail:
+ return -ENOMEM;
+}
+
+static int tegra_cmac_setkey(struct crypto_ahash *tfm, const u8 *key,
+ unsigned int keylen)
+{
+ struct tegra_cmac_ctx *ctx = crypto_ahash_ctx(tfm);
+
+ if (aes_check_keylen(keylen)) {
+ dev_dbg(ctx->se->dev, "invalid key length (%d)\n", keylen);
+ return -EINVAL;
+ }
+
+ if (ctx->fallback_tfm)
+ crypto_shash_setkey(ctx->fallback_tfm, key, keylen);
+
+ return tegra_key_submit(ctx->se, key, keylen, ctx->alg, &ctx->key_id);
+}
+
+static int tegra_cmac_update(struct ahash_request *req)
+{
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct tegra_cmac_ctx *ctx = crypto_ahash_ctx(tfm);
+ struct tegra_cmac_reqctx *rctx = ahash_request_ctx(req);
+
+ rctx->task |= SHA_UPDATE;
+
+ return crypto_transfer_hash_request_to_engine(ctx->se->engine, req);
+}
+
+static int tegra_cmac_final(struct ahash_request *req)
+{
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct tegra_cmac_ctx *ctx = crypto_ahash_ctx(tfm);
+ struct tegra_cmac_reqctx *rctx = ahash_request_ctx(req);
+
+ rctx->task |= SHA_FINAL;
+
+ return crypto_transfer_hash_request_to_engine(ctx->se->engine, req);
+}
+
+static int tegra_cmac_finup(struct ahash_request *req)
+{
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct tegra_cmac_ctx *ctx = crypto_ahash_ctx(tfm);
+ struct tegra_cmac_reqctx *rctx = ahash_request_ctx(req);
+
+ rctx->task |= SHA_UPDATE | SHA_FINAL;
+
+ return crypto_transfer_hash_request_to_engine(ctx->se->engine, req);
+}
+
+static int tegra_cmac_digest(struct ahash_request *req)
+{
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct tegra_cmac_ctx *ctx = crypto_ahash_ctx(tfm);
+ struct tegra_cmac_reqctx *rctx = ahash_request_ctx(req);
+
+ tegra_cmac_init(req);
+ rctx->task |= SHA_UPDATE | SHA_FINAL;
+
+ return crypto_transfer_hash_request_to_engine(ctx->se->engine, req);
+}
+
+static int tegra_cmac_export(struct ahash_request *req, void *out)
+{
+ struct tegra_cmac_reqctx *rctx = ahash_request_ctx(req);
+
+ memcpy(out, rctx, sizeof(*rctx));
+
+ return 0;
+}
+
+static int tegra_cmac_import(struct ahash_request *req, const void *in)
+{
+ struct tegra_cmac_reqctx *rctx = ahash_request_ctx(req);
+
+ memcpy(rctx, in, sizeof(*rctx));
+
+ return 0;
+}
+
+static struct tegra_se_alg tegra_aead_algs[] = {
+ {
+ .alg.aead.op.do_one_request = tegra_gcm_do_one_req,
+ .alg.aead.base = {
+ .init = tegra_aead_cra_init,
+ .exit = tegra_aead_cra_exit,
+ .setkey = tegra_aead_setkey,
+ .setauthsize = tegra_gcm_setauthsize,
+ .encrypt = tegra_aead_encrypt,
+ .decrypt = tegra_aead_decrypt,
+ .maxauthsize = AES_BLOCK_SIZE,
+ .ivsize = GCM_AES_IV_SIZE,
+ .base = {
+ .cra_name = "gcm(aes)",
+ .cra_driver_name = "gcm-aes-tegra",
+ .cra_priority = 500,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct tegra_aead_ctx),
+ .cra_alignmask = 0xf,
+ .cra_module = THIS_MODULE,
+ },
+ }
+ }, {
+ .alg.aead.op.do_one_request = tegra_ccm_do_one_req,
+ .alg.aead.base = {
+ .init = tegra_aead_cra_init,
+ .exit = tegra_aead_cra_exit,
+ .setkey = tegra_aead_setkey,
+ .setauthsize = tegra_ccm_setauthsize,
+ .encrypt = tegra_aead_encrypt,
+ .decrypt = tegra_aead_decrypt,
+ .maxauthsize = AES_BLOCK_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .chunksize = AES_BLOCK_SIZE,
+ .base = {
+ .cra_name = "ccm(aes)",
+ .cra_driver_name = "ccm-aes-tegra",
+ .cra_priority = 500,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct tegra_aead_ctx),
+ .cra_alignmask = 0xf,
+ .cra_module = THIS_MODULE,
+ },
+ }
+ }
+};
+
+static struct tegra_se_alg tegra_cmac_algs[] = {
+ {
+ .alg.ahash.op.do_one_request = tegra_cmac_do_one_req,
+ .alg.ahash.base = {
+ .init = tegra_cmac_init,
+ .setkey = tegra_cmac_setkey,
+ .update = tegra_cmac_update,
+ .final = tegra_cmac_final,
+ .finup = tegra_cmac_finup,
+ .digest = tegra_cmac_digest,
+ .export = tegra_cmac_export,
+ .import = tegra_cmac_import,
+ .halg.digestsize = AES_BLOCK_SIZE,
+ .halg.statesize = sizeof(struct tegra_cmac_reqctx),
+ .halg.base = {
+ .cra_name = "cmac(aes)",
+ .cra_driver_name = "tegra-se-cmac",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_AHASH,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct tegra_cmac_ctx),
+ .cra_alignmask = 0,
+ .cra_module = THIS_MODULE,
+ .cra_init = tegra_cmac_cra_init,
+ .cra_exit = tegra_cmac_cra_exit,
+ }
+ }
+ }
+};
+
+int tegra_init_aes(struct tegra_se *se)
+{
+ struct aead_engine_alg *aead_alg;
+ struct ahash_engine_alg *ahash_alg;
+ struct skcipher_engine_alg *sk_alg;
+ int i, ret;
+
+ se->manifest = tegra_aes_kac_manifest;
+
+ for (i = 0; i < ARRAY_SIZE(tegra_aes_algs); i++) {
+ sk_alg = &tegra_aes_algs[i].alg.skcipher;
+ tegra_aes_algs[i].se_dev = se;
+
+ ret = crypto_engine_register_skcipher(sk_alg);
+ if (ret) {
+ dev_err(se->dev, "failed to register %s\n",
+ sk_alg->base.base.cra_name);
+ goto err_aes;
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE(tegra_aead_algs); i++) {
+ aead_alg = &tegra_aead_algs[i].alg.aead;
+ tegra_aead_algs[i].se_dev = se;
+
+ ret = crypto_engine_register_aead(aead_alg);
+ if (ret) {
+ dev_err(se->dev, "failed to register %s\n",
+ aead_alg->base.base.cra_name);
+ goto err_aead;
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE(tegra_cmac_algs); i++) {
+ ahash_alg = &tegra_cmac_algs[i].alg.ahash;
+ tegra_cmac_algs[i].se_dev = se;
+
+ ret = crypto_engine_register_ahash(ahash_alg);
+ if (ret) {
+ dev_err(se->dev, "failed to register %s\n",
+ ahash_alg->base.halg.base.cra_name);
+ goto err_cmac;
+ }
+ }
+
+ return 0;
+
+err_cmac:
+ while (i--)
+ crypto_engine_unregister_ahash(&tegra_cmac_algs[i].alg.ahash);
+
+ i = ARRAY_SIZE(tegra_aead_algs);
+err_aead:
+ while (i--)
+ crypto_engine_unregister_aead(&tegra_aead_algs[i].alg.aead);
+
+ i = ARRAY_SIZE(tegra_aes_algs);
+err_aes:
+ while (i--)
+ crypto_engine_unregister_skcipher(&tegra_aes_algs[i].alg.skcipher);
+
+ return ret;
+}
+
+void tegra_deinit_aes(struct tegra_se *se)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(tegra_aes_algs); i++)
+ crypto_engine_unregister_skcipher(&tegra_aes_algs[i].alg.skcipher);
+
+ for (i = 0; i < ARRAY_SIZE(tegra_aead_algs); i++)
+ crypto_engine_unregister_aead(&tegra_aead_algs[i].alg.aead);
+
+ for (i = 0; i < ARRAY_SIZE(tegra_cmac_algs); i++)
+ crypto_engine_unregister_ahash(&tegra_cmac_algs[i].alg.ahash);
+}
diff --git a/drivers/crypto/tegra/tegra-se-hash.c b/drivers/crypto/tegra/tegra-se-hash.c
new file mode 100644
index 000000000000..4d4bd727f498
--- /dev/null
+++ b/drivers/crypto/tegra/tegra-se-hash.c
@@ -0,0 +1,1060 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+/*
+ * Crypto driver to handle HASH algorithms using NVIDIA Security Engine.
+ */
+
+#include <linux/clk.h>
+#include <linux/dma-mapping.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+
+#include <crypto/aes.h>
+#include <crypto/sha1.h>
+#include <crypto/sha2.h>
+#include <crypto/sha3.h>
+#include <crypto/internal/des.h>
+#include <crypto/engine.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/internal/hash.h>
+
+#include "tegra-se.h"
+
+struct tegra_sha_ctx {
+ struct tegra_se *se;
+ unsigned int alg;
+ bool fallback;
+ u32 key_id;
+ struct crypto_ahash *fallback_tfm;
+};
+
+struct tegra_sha_reqctx {
+ struct scatterlist *src_sg;
+ struct tegra_se_datbuf datbuf;
+ struct tegra_se_datbuf residue;
+ struct tegra_se_datbuf digest;
+ unsigned int alg;
+ unsigned int config;
+ unsigned int total_len;
+ unsigned int blk_size;
+ unsigned int task;
+ u32 key_id;
+ u32 result[HASH_RESULT_REG_COUNT];
+ struct ahash_request fallback_req;
+};
+
+static int tegra_sha_get_config(u32 alg)
+{
+ int cfg = 0;
+
+ switch (alg) {
+ case SE_ALG_SHA1:
+ cfg |= SE_SHA_ENC_ALG_SHA;
+ cfg |= SE_SHA_ENC_MODE_SHA1;
+ break;
+
+ case SE_ALG_HMAC_SHA224:
+ cfg |= SE_SHA_ENC_ALG_HMAC;
+ fallthrough;
+ case SE_ALG_SHA224:
+ cfg |= SE_SHA_ENC_ALG_SHA;
+ cfg |= SE_SHA_ENC_MODE_SHA224;
+ break;
+
+ case SE_ALG_HMAC_SHA256:
+ cfg |= SE_SHA_ENC_ALG_HMAC;
+ fallthrough;
+ case SE_ALG_SHA256:
+ cfg |= SE_SHA_ENC_ALG_SHA;
+ cfg |= SE_SHA_ENC_MODE_SHA256;
+ break;
+
+ case SE_ALG_HMAC_SHA384:
+ cfg |= SE_SHA_ENC_ALG_HMAC;
+ fallthrough;
+ case SE_ALG_SHA384:
+ cfg |= SE_SHA_ENC_ALG_SHA;
+ cfg |= SE_SHA_ENC_MODE_SHA384;
+ break;
+
+ case SE_ALG_HMAC_SHA512:
+ cfg |= SE_SHA_ENC_ALG_HMAC;
+ fallthrough;
+ case SE_ALG_SHA512:
+ cfg |= SE_SHA_ENC_ALG_SHA;
+ cfg |= SE_SHA_ENC_MODE_SHA512;
+ break;
+
+ case SE_ALG_SHA3_224:
+ cfg |= SE_SHA_ENC_ALG_SHA;
+ cfg |= SE_SHA_ENC_MODE_SHA3_224;
+ break;
+ case SE_ALG_SHA3_256:
+ cfg |= SE_SHA_ENC_ALG_SHA;
+ cfg |= SE_SHA_ENC_MODE_SHA3_256;
+ break;
+ case SE_ALG_SHA3_384:
+ cfg |= SE_SHA_ENC_ALG_SHA;
+ cfg |= SE_SHA_ENC_MODE_SHA3_384;
+ break;
+ case SE_ALG_SHA3_512:
+ cfg |= SE_SHA_ENC_ALG_SHA;
+ cfg |= SE_SHA_ENC_MODE_SHA3_512;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return cfg;
+}
+
+static int tegra_sha_fallback_init(struct ahash_request *req)
+{
+ struct tegra_sha_reqctx *rctx = ahash_request_ctx(req);
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm);
+
+ ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm);
+ rctx->fallback_req.base.flags = req->base.flags &
+ CRYPTO_TFM_REQ_MAY_SLEEP;
+
+ return crypto_ahash_init(&rctx->fallback_req);
+}
+
+static int tegra_sha_fallback_update(struct ahash_request *req)
+{
+ struct tegra_sha_reqctx *rctx = ahash_request_ctx(req);
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm);
+
+ ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm);
+ rctx->fallback_req.base.flags = req->base.flags &
+ CRYPTO_TFM_REQ_MAY_SLEEP;
+ rctx->fallback_req.nbytes = req->nbytes;
+ rctx->fallback_req.src = req->src;
+
+ return crypto_ahash_update(&rctx->fallback_req);
+}
+
+static int tegra_sha_fallback_final(struct ahash_request *req)
+{
+ struct tegra_sha_reqctx *rctx = ahash_request_ctx(req);
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm);
+
+ ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm);
+ rctx->fallback_req.base.flags = req->base.flags &
+ CRYPTO_TFM_REQ_MAY_SLEEP;
+ rctx->fallback_req.result = req->result;
+
+ return crypto_ahash_final(&rctx->fallback_req);
+}
+
+static int tegra_sha_fallback_finup(struct ahash_request *req)
+{
+ struct tegra_sha_reqctx *rctx = ahash_request_ctx(req);
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm);
+
+ ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm);
+ rctx->fallback_req.base.flags = req->base.flags &
+ CRYPTO_TFM_REQ_MAY_SLEEP;
+
+ rctx->fallback_req.nbytes = req->nbytes;
+ rctx->fallback_req.src = req->src;
+ rctx->fallback_req.result = req->result;
+
+ return crypto_ahash_finup(&rctx->fallback_req);
+}
+
+static int tegra_sha_fallback_digest(struct ahash_request *req)
+{
+ struct tegra_sha_reqctx *rctx = ahash_request_ctx(req);
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm);
+
+ ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm);
+ rctx->fallback_req.base.flags = req->base.flags &
+ CRYPTO_TFM_REQ_MAY_SLEEP;
+
+ rctx->fallback_req.nbytes = req->nbytes;
+ rctx->fallback_req.src = req->src;
+ rctx->fallback_req.result = req->result;
+
+ return crypto_ahash_digest(&rctx->fallback_req);
+}
+
+static int tegra_sha_fallback_import(struct ahash_request *req, const void *in)
+{
+ struct tegra_sha_reqctx *rctx = ahash_request_ctx(req);
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm);
+
+ ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm);
+ rctx->fallback_req.base.flags = req->base.flags &
+ CRYPTO_TFM_REQ_MAY_SLEEP;
+
+ return crypto_ahash_import(&rctx->fallback_req, in);
+}
+
+static int tegra_sha_fallback_export(struct ahash_request *req, void *out)
+{
+ struct tegra_sha_reqctx *rctx = ahash_request_ctx(req);
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm);
+
+ ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm);
+ rctx->fallback_req.base.flags = req->base.flags &
+ CRYPTO_TFM_REQ_MAY_SLEEP;
+
+ return crypto_ahash_export(&rctx->fallback_req, out);
+}
+
+static int tegra_sha_prep_cmd(struct tegra_se *se, u32 *cpuvaddr,
+ struct tegra_sha_reqctx *rctx)
+{
+ u64 msg_len, msg_left;
+ int i = 0;
+
+ msg_len = rctx->total_len * 8;
+ msg_left = rctx->datbuf.size * 8;
+
+ /*
+ * If IN_ADDR_HI_0.SZ > SHA_MSG_LEFT_[0-3] to the HASH engine,
+ * HW treats it as the last buffer and process the data.
+ * Therefore, add an extra byte to msg_left if it is not the
+ * last buffer.
+ */
+ if (rctx->task & SHA_UPDATE) {
+ msg_left += 8;
+ msg_len += 8;
+ }
+
+ cpuvaddr[i++] = host1x_opcode_setpayload(8);
+ cpuvaddr[i++] = se_host1x_opcode_incr_w(SE_SHA_MSG_LENGTH);
+ cpuvaddr[i++] = lower_32_bits(msg_len);
+ cpuvaddr[i++] = upper_32_bits(msg_len);
+ cpuvaddr[i++] = 0;
+ cpuvaddr[i++] = 0;
+ cpuvaddr[i++] = lower_32_bits(msg_left);
+ cpuvaddr[i++] = upper_32_bits(msg_left);
+ cpuvaddr[i++] = 0;
+ cpuvaddr[i++] = 0;
+ cpuvaddr[i++] = host1x_opcode_setpayload(6);
+ cpuvaddr[i++] = se_host1x_opcode_incr_w(SE_SHA_CFG);
+ cpuvaddr[i++] = rctx->config;
+
+ if (rctx->task & SHA_FIRST) {
+ cpuvaddr[i++] = SE_SHA_TASK_HASH_INIT;
+ rctx->task &= ~SHA_FIRST;
+ } else {
+ cpuvaddr[i++] = 0;
+ }
+
+ cpuvaddr[i++] = rctx->datbuf.addr;
+ cpuvaddr[i++] = (u32)(SE_ADDR_HI_MSB(upper_32_bits(rctx->datbuf.addr)) |
+ SE_ADDR_HI_SZ(rctx->datbuf.size));
+ cpuvaddr[i++] = rctx->digest.addr;
+ cpuvaddr[i++] = (u32)(SE_ADDR_HI_MSB(upper_32_bits(rctx->digest.addr)) |
+ SE_ADDR_HI_SZ(rctx->digest.size));
+ if (rctx->key_id) {
+ cpuvaddr[i++] = host1x_opcode_setpayload(1);
+ cpuvaddr[i++] = se_host1x_opcode_nonincr_w(SE_SHA_CRYPTO_CFG);
+ cpuvaddr[i++] = SE_AES_KEY_INDEX(rctx->key_id);
+ }
+
+ cpuvaddr[i++] = host1x_opcode_setpayload(1);
+ cpuvaddr[i++] = se_host1x_opcode_nonincr_w(SE_SHA_OPERATION);
+ cpuvaddr[i++] = SE_SHA_OP_WRSTALL |
+ SE_SHA_OP_START |
+ SE_SHA_OP_LASTBUF;
+ cpuvaddr[i++] = se_host1x_opcode_nonincr(host1x_uclass_incr_syncpt_r(), 1);
+ cpuvaddr[i++] = host1x_uclass_incr_syncpt_cond_f(1) |
+ host1x_uclass_incr_syncpt_indx_f(se->syncpt_id);
+
+ dev_dbg(se->dev, "msg len %llu msg left %llu cfg %#x",
+ msg_len, msg_left, rctx->config);
+
+ return i;
+}
+
+static void tegra_sha_copy_hash_result(struct tegra_se *se, struct tegra_sha_reqctx *rctx)
+{
+ int i;
+
+ for (i = 0; i < HASH_RESULT_REG_COUNT; i++)
+ rctx->result[i] = readl(se->base + se->hw->regs->result + (i * 4));
+}
+
+static void tegra_sha_paste_hash_result(struct tegra_se *se, struct tegra_sha_reqctx *rctx)
+{
+ int i;
+
+ for (i = 0; i < HASH_RESULT_REG_COUNT; i++)
+ writel(rctx->result[i],
+ se->base + se->hw->regs->result + (i * 4));
+}
+
+static int tegra_sha_do_update(struct ahash_request *req)
+{
+ struct tegra_sha_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req));
+ struct tegra_sha_reqctx *rctx = ahash_request_ctx(req);
+ unsigned int nblks, nresidue, size, ret;
+ u32 *cpuvaddr = ctx->se->cmdbuf->addr;
+
+ nresidue = (req->nbytes + rctx->residue.size) % rctx->blk_size;
+ nblks = (req->nbytes + rctx->residue.size) / rctx->blk_size;
+
+ /*
+ * If nbytes is a multiple of block size and there is no residue,
+ * then reserve the last block as residue during final() to process.
+ */
+ if (!nresidue && nblks) {
+ nresidue = rctx->blk_size;
+ nblks--;
+ }
+
+ rctx->src_sg = req->src;
+ rctx->datbuf.size = (req->nbytes + rctx->residue.size) - nresidue;
+ rctx->total_len += rctx->datbuf.size;
+
+ /*
+ * If nbytes are less than a block size, copy it residue and
+ * return. The bytes will be processed in final()
+ */
+ if (nblks < 1) {
+ scatterwalk_map_and_copy(rctx->residue.buf + rctx->residue.size,
+ rctx->src_sg, 0, req->nbytes, 0);
+
+ rctx->residue.size += req->nbytes;
+ return 0;
+ }
+
+ /* Copy the previous residue first */
+ if (rctx->residue.size)
+ memcpy(rctx->datbuf.buf, rctx->residue.buf, rctx->residue.size);
+
+ scatterwalk_map_and_copy(rctx->datbuf.buf + rctx->residue.size,
+ rctx->src_sg, 0, req->nbytes - nresidue, 0);
+
+ scatterwalk_map_and_copy(rctx->residue.buf, rctx->src_sg,
+ req->nbytes - nresidue, nresidue, 0);
+
+ /* Update residue value with the residue after current block */
+ rctx->residue.size = nresidue;
+
+ rctx->config = tegra_sha_get_config(rctx->alg) |
+ SE_SHA_DST_HASH_REG;
+
+ /*
+ * If this is not the first 'update' call, paste the previous copied
+ * intermediate results to the registers so that it gets picked up.
+ * This is to support the import/export functionality.
+ */
+ if (!(rctx->task & SHA_FIRST))
+ tegra_sha_paste_hash_result(ctx->se, rctx);
+
+ size = tegra_sha_prep_cmd(ctx->se, cpuvaddr, rctx);
+
+ ret = tegra_se_host1x_submit(ctx->se, size);
+
+ /*
+ * If this is not the final update, copy the intermediate results
+ * from the registers so that it can be used in the next 'update'
+ * call. This is to support the import/export functionality.
+ */
+ if (!(rctx->task & SHA_FINAL))
+ tegra_sha_copy_hash_result(ctx->se, rctx);
+
+ return ret;
+}
+
+static int tegra_sha_do_final(struct ahash_request *req)
+{
+ struct tegra_sha_reqctx *rctx = ahash_request_ctx(req);
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm);
+ struct tegra_se *se = ctx->se;
+ u32 *cpuvaddr = se->cmdbuf->addr;
+ int size, ret = 0;
+
+ memcpy(rctx->datbuf.buf, rctx->residue.buf, rctx->residue.size);
+ rctx->datbuf.size = rctx->residue.size;
+ rctx->total_len += rctx->residue.size;
+
+ rctx->config = tegra_sha_get_config(rctx->alg) |
+ SE_SHA_DST_MEMORY;
+
+ size = tegra_sha_prep_cmd(se, cpuvaddr, rctx);
+
+ ret = tegra_se_host1x_submit(se, size);
+ if (ret)
+ goto out;
+
+ /* Copy result */
+ memcpy(req->result, rctx->digest.buf, rctx->digest.size);
+
+out:
+ dma_free_coherent(se->dev, SE_SHA_BUFLEN,
+ rctx->datbuf.buf, rctx->datbuf.addr);
+ dma_free_coherent(se->dev, crypto_ahash_blocksize(tfm),
+ rctx->residue.buf, rctx->residue.addr);
+ dma_free_coherent(se->dev, rctx->digest.size, rctx->digest.buf,
+ rctx->digest.addr);
+ return ret;
+}
+
+static int tegra_sha_do_one_req(struct crypto_engine *engine, void *areq)
+{
+ struct ahash_request *req = ahash_request_cast(areq);
+ struct tegra_sha_reqctx *rctx = ahash_request_ctx(req);
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm);
+ struct tegra_se *se = ctx->se;
+ int ret = 0;
+
+ if (rctx->task & SHA_UPDATE) {
+ ret = tegra_sha_do_update(req);
+ rctx->task &= ~SHA_UPDATE;
+ }
+
+ if (rctx->task & SHA_FINAL) {
+ ret = tegra_sha_do_final(req);
+ rctx->task &= ~SHA_FINAL;
+ }
+
+ crypto_finalize_hash_request(se->engine, req, ret);
+
+ return 0;
+}
+
+static void tegra_sha_init_fallback(struct crypto_ahash *tfm, struct tegra_sha_ctx *ctx,
+ const char *algname)
+{
+ unsigned int statesize;
+
+ ctx->fallback_tfm = crypto_alloc_ahash(algname, 0, CRYPTO_ALG_ASYNC |
+ CRYPTO_ALG_NEED_FALLBACK);
+
+ if (IS_ERR(ctx->fallback_tfm)) {
+ dev_warn(ctx->se->dev,
+ "failed to allocate fallback for %s\n", algname);
+ ctx->fallback_tfm = NULL;
+ return;
+ }
+
+ statesize = crypto_ahash_statesize(ctx->fallback_tfm);
+
+ if (statesize > sizeof(struct tegra_sha_reqctx))
+ crypto_ahash_set_statesize(tfm, statesize);
+
+ /* Update reqsize if fallback is added */
+ crypto_ahash_set_reqsize(tfm,
+ sizeof(struct tegra_sha_reqctx) +
+ crypto_ahash_reqsize(ctx->fallback_tfm));
+}
+
+static int tegra_sha_cra_init(struct crypto_tfm *tfm)
+{
+ struct tegra_sha_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct crypto_ahash *ahash_tfm = __crypto_ahash_cast(tfm);
+ struct ahash_alg *alg = __crypto_ahash_alg(tfm->__crt_alg);
+ struct tegra_se_alg *se_alg;
+ const char *algname;
+ int ret;
+
+ algname = crypto_tfm_alg_name(tfm);
+ se_alg = container_of(alg, struct tegra_se_alg, alg.ahash.base);
+
+ crypto_ahash_set_reqsize(ahash_tfm, sizeof(struct tegra_sha_reqctx));
+
+ ctx->se = se_alg->se_dev;
+ ctx->fallback = false;
+ ctx->key_id = 0;
+
+ ret = se_algname_to_algid(algname);
+ if (ret < 0) {
+ dev_err(ctx->se->dev, "invalid algorithm\n");
+ return ret;
+ }
+
+ if (se_alg->alg_base)
+ tegra_sha_init_fallback(ahash_tfm, ctx, algname);
+
+ ctx->alg = ret;
+
+ return 0;
+}
+
+static void tegra_sha_cra_exit(struct crypto_tfm *tfm)
+{
+ struct tegra_sha_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ if (ctx->fallback_tfm)
+ crypto_free_ahash(ctx->fallback_tfm);
+
+ tegra_key_invalidate(ctx->se, ctx->key_id, ctx->alg);
+}
+
+static int tegra_sha_init(struct ahash_request *req)
+{
+ struct tegra_sha_reqctx *rctx = ahash_request_ctx(req);
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm);
+ struct tegra_se *se = ctx->se;
+
+ if (ctx->fallback)
+ return tegra_sha_fallback_init(req);
+
+ rctx->total_len = 0;
+ rctx->datbuf.size = 0;
+ rctx->residue.size = 0;
+ rctx->key_id = ctx->key_id;
+ rctx->task = SHA_FIRST;
+ rctx->alg = ctx->alg;
+ rctx->blk_size = crypto_ahash_blocksize(tfm);
+ rctx->digest.size = crypto_ahash_digestsize(tfm);
+
+ rctx->digest.buf = dma_alloc_coherent(se->dev, rctx->digest.size,
+ &rctx->digest.addr, GFP_KERNEL);
+ if (!rctx->digest.buf)
+ goto digbuf_fail;
+
+ rctx->residue.buf = dma_alloc_coherent(se->dev, rctx->blk_size,
+ &rctx->residue.addr, GFP_KERNEL);
+ if (!rctx->residue.buf)
+ goto resbuf_fail;
+
+ rctx->datbuf.buf = dma_alloc_coherent(se->dev, SE_SHA_BUFLEN,
+ &rctx->datbuf.addr, GFP_KERNEL);
+ if (!rctx->datbuf.buf)
+ goto datbuf_fail;
+
+ return 0;
+
+datbuf_fail:
+ dma_free_coherent(se->dev, rctx->blk_size, rctx->residue.buf,
+ rctx->residue.addr);
+resbuf_fail:
+ dma_free_coherent(se->dev, SE_SHA_BUFLEN, rctx->datbuf.buf,
+ rctx->datbuf.addr);
+digbuf_fail:
+ return -ENOMEM;
+}
+
+static int tegra_hmac_fallback_setkey(struct tegra_sha_ctx *ctx, const u8 *key,
+ unsigned int keylen)
+{
+ if (!ctx->fallback_tfm) {
+ dev_dbg(ctx->se->dev, "invalid key length (%d)\n", keylen);
+ return -EINVAL;
+ }
+
+ ctx->fallback = true;
+ return crypto_ahash_setkey(ctx->fallback_tfm, key, keylen);
+}
+
+static int tegra_hmac_setkey(struct crypto_ahash *tfm, const u8 *key,
+ unsigned int keylen)
+{
+ struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm);
+
+ if (aes_check_keylen(keylen))
+ return tegra_hmac_fallback_setkey(ctx, key, keylen);
+
+ ctx->fallback = false;
+
+ return tegra_key_submit(ctx->se, key, keylen, ctx->alg, &ctx->key_id);
+}
+
+static int tegra_sha_update(struct ahash_request *req)
+{
+ struct tegra_sha_reqctx *rctx = ahash_request_ctx(req);
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm);
+
+ if (ctx->fallback)
+ return tegra_sha_fallback_update(req);
+
+ rctx->task |= SHA_UPDATE;
+
+ return crypto_transfer_hash_request_to_engine(ctx->se->engine, req);
+}
+
+static int tegra_sha_final(struct ahash_request *req)
+{
+ struct tegra_sha_reqctx *rctx = ahash_request_ctx(req);
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm);
+
+ if (ctx->fallback)
+ return tegra_sha_fallback_final(req);
+
+ rctx->task |= SHA_FINAL;
+
+ return crypto_transfer_hash_request_to_engine(ctx->se->engine, req);
+}
+
+static int tegra_sha_finup(struct ahash_request *req)
+{
+ struct tegra_sha_reqctx *rctx = ahash_request_ctx(req);
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm);
+
+ if (ctx->fallback)
+ return tegra_sha_fallback_finup(req);
+
+ rctx->task |= SHA_UPDATE | SHA_FINAL;
+
+ return crypto_transfer_hash_request_to_engine(ctx->se->engine, req);
+}
+
+static int tegra_sha_digest(struct ahash_request *req)
+{
+ struct tegra_sha_reqctx *rctx = ahash_request_ctx(req);
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm);
+
+ if (ctx->fallback)
+ return tegra_sha_fallback_digest(req);
+
+ tegra_sha_init(req);
+ rctx->task |= SHA_UPDATE | SHA_FINAL;
+
+ return crypto_transfer_hash_request_to_engine(ctx->se->engine, req);
+}
+
+static int tegra_sha_export(struct ahash_request *req, void *out)
+{
+ struct tegra_sha_reqctx *rctx = ahash_request_ctx(req);
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm);
+
+ if (ctx->fallback)
+ return tegra_sha_fallback_export(req, out);
+
+ memcpy(out, rctx, sizeof(*rctx));
+
+ return 0;
+}
+
+static int tegra_sha_import(struct ahash_request *req, const void *in)
+{
+ struct tegra_sha_reqctx *rctx = ahash_request_ctx(req);
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm);
+
+ if (ctx->fallback)
+ return tegra_sha_fallback_import(req, in);
+
+ memcpy(rctx, in, sizeof(*rctx));
+
+ return 0;
+}
+
+static struct tegra_se_alg tegra_hash_algs[] = {
+ {
+ .alg.ahash.op.do_one_request = tegra_sha_do_one_req,
+ .alg.ahash.base = {
+ .init = tegra_sha_init,
+ .update = tegra_sha_update,
+ .final = tegra_sha_final,
+ .finup = tegra_sha_finup,
+ .digest = tegra_sha_digest,
+ .export = tegra_sha_export,
+ .import = tegra_sha_import,
+ .halg.digestsize = SHA1_DIGEST_SIZE,
+ .halg.statesize = sizeof(struct tegra_sha_reqctx),
+ .halg.base = {
+ .cra_name = "sha1",
+ .cra_driver_name = "tegra-se-sha1",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_AHASH,
+ .cra_blocksize = SHA1_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct tegra_sha_ctx),
+ .cra_alignmask = 0,
+ .cra_module = THIS_MODULE,
+ .cra_init = tegra_sha_cra_init,
+ .cra_exit = tegra_sha_cra_exit,
+ }
+ }
+ }, {
+ .alg.ahash.op.do_one_request = tegra_sha_do_one_req,
+ .alg.ahash.base = {
+ .init = tegra_sha_init,
+ .update = tegra_sha_update,
+ .final = tegra_sha_final,
+ .finup = tegra_sha_finup,
+ .digest = tegra_sha_digest,
+ .export = tegra_sha_export,
+ .import = tegra_sha_import,
+ .halg.digestsize = SHA224_DIGEST_SIZE,
+ .halg.statesize = sizeof(struct tegra_sha_reqctx),
+ .halg.base = {
+ .cra_name = "sha224",
+ .cra_driver_name = "tegra-se-sha224",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_AHASH,
+ .cra_blocksize = SHA224_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct tegra_sha_ctx),
+ .cra_alignmask = 0,
+ .cra_module = THIS_MODULE,
+ .cra_init = tegra_sha_cra_init,
+ .cra_exit = tegra_sha_cra_exit,
+ }
+ }
+ }, {
+ .alg.ahash.op.do_one_request = tegra_sha_do_one_req,
+ .alg.ahash.base = {
+ .init = tegra_sha_init,
+ .update = tegra_sha_update,
+ .final = tegra_sha_final,
+ .finup = tegra_sha_finup,
+ .digest = tegra_sha_digest,
+ .export = tegra_sha_export,
+ .import = tegra_sha_import,
+ .halg.digestsize = SHA256_DIGEST_SIZE,
+ .halg.statesize = sizeof(struct tegra_sha_reqctx),
+ .halg.base = {
+ .cra_name = "sha256",
+ .cra_driver_name = "tegra-se-sha256",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_AHASH,
+ .cra_blocksize = SHA256_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct tegra_sha_ctx),
+ .cra_alignmask = 0,
+ .cra_module = THIS_MODULE,
+ .cra_init = tegra_sha_cra_init,
+ .cra_exit = tegra_sha_cra_exit,
+ }
+ }
+ }, {
+ .alg.ahash.op.do_one_request = tegra_sha_do_one_req,
+ .alg.ahash.base = {
+ .init = tegra_sha_init,
+ .update = tegra_sha_update,
+ .final = tegra_sha_final,
+ .finup = tegra_sha_finup,
+ .digest = tegra_sha_digest,
+ .export = tegra_sha_export,
+ .import = tegra_sha_import,
+ .halg.digestsize = SHA384_DIGEST_SIZE,
+ .halg.statesize = sizeof(struct tegra_sha_reqctx),
+ .halg.base = {
+ .cra_name = "sha384",
+ .cra_driver_name = "tegra-se-sha384",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_AHASH,
+ .cra_blocksize = SHA384_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct tegra_sha_ctx),
+ .cra_alignmask = 0,
+ .cra_module = THIS_MODULE,
+ .cra_init = tegra_sha_cra_init,
+ .cra_exit = tegra_sha_cra_exit,
+ }
+ }
+ }, {
+ .alg.ahash.op.do_one_request = tegra_sha_do_one_req,
+ .alg.ahash.base = {
+ .init = tegra_sha_init,
+ .update = tegra_sha_update,
+ .final = tegra_sha_final,
+ .finup = tegra_sha_finup,
+ .digest = tegra_sha_digest,
+ .export = tegra_sha_export,
+ .import = tegra_sha_import,
+ .halg.digestsize = SHA512_DIGEST_SIZE,
+ .halg.statesize = sizeof(struct tegra_sha_reqctx),
+ .halg.base = {
+ .cra_name = "sha512",
+ .cra_driver_name = "tegra-se-sha512",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_AHASH,
+ .cra_blocksize = SHA512_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct tegra_sha_ctx),
+ .cra_alignmask = 0,
+ .cra_module = THIS_MODULE,
+ .cra_init = tegra_sha_cra_init,
+ .cra_exit = tegra_sha_cra_exit,
+ }
+ }
+ }, {
+ .alg.ahash.op.do_one_request = tegra_sha_do_one_req,
+ .alg.ahash.base = {
+ .init = tegra_sha_init,
+ .update = tegra_sha_update,
+ .final = tegra_sha_final,
+ .finup = tegra_sha_finup,
+ .digest = tegra_sha_digest,
+ .export = tegra_sha_export,
+ .import = tegra_sha_import,
+ .halg.digestsize = SHA3_224_DIGEST_SIZE,
+ .halg.statesize = sizeof(struct tegra_sha_reqctx),
+ .halg.base = {
+ .cra_name = "sha3-224",
+ .cra_driver_name = "tegra-se-sha3-224",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_AHASH,
+ .cra_blocksize = SHA3_224_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct tegra_sha_ctx),
+ .cra_alignmask = 0,
+ .cra_module = THIS_MODULE,
+ .cra_init = tegra_sha_cra_init,
+ .cra_exit = tegra_sha_cra_exit,
+ }
+ }
+ }, {
+ .alg.ahash.op.do_one_request = tegra_sha_do_one_req,
+ .alg.ahash.base = {
+ .init = tegra_sha_init,
+ .update = tegra_sha_update,
+ .final = tegra_sha_final,
+ .finup = tegra_sha_finup,
+ .digest = tegra_sha_digest,
+ .export = tegra_sha_export,
+ .import = tegra_sha_import,
+ .halg.digestsize = SHA3_256_DIGEST_SIZE,
+ .halg.statesize = sizeof(struct tegra_sha_reqctx),
+ .halg.base = {
+ .cra_name = "sha3-256",
+ .cra_driver_name = "tegra-se-sha3-256",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_AHASH,
+ .cra_blocksize = SHA3_256_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct tegra_sha_ctx),
+ .cra_alignmask = 0,
+ .cra_module = THIS_MODULE,
+ .cra_init = tegra_sha_cra_init,
+ .cra_exit = tegra_sha_cra_exit,
+ }
+ }
+ }, {
+ .alg.ahash.op.do_one_request = tegra_sha_do_one_req,
+ .alg.ahash.base = {
+ .init = tegra_sha_init,
+ .update = tegra_sha_update,
+ .final = tegra_sha_final,
+ .finup = tegra_sha_finup,
+ .digest = tegra_sha_digest,
+ .export = tegra_sha_export,
+ .import = tegra_sha_import,
+ .halg.digestsize = SHA3_384_DIGEST_SIZE,
+ .halg.statesize = sizeof(struct tegra_sha_reqctx),
+ .halg.base = {
+ .cra_name = "sha3-384",
+ .cra_driver_name = "tegra-se-sha3-384",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_AHASH,
+ .cra_blocksize = SHA3_384_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct tegra_sha_ctx),
+ .cra_alignmask = 0,
+ .cra_module = THIS_MODULE,
+ .cra_init = tegra_sha_cra_init,
+ .cra_exit = tegra_sha_cra_exit,
+ }
+ }
+ }, {
+ .alg.ahash.op.do_one_request = tegra_sha_do_one_req,
+ .alg.ahash.base = {
+ .init = tegra_sha_init,
+ .update = tegra_sha_update,
+ .final = tegra_sha_final,
+ .finup = tegra_sha_finup,
+ .digest = tegra_sha_digest,
+ .export = tegra_sha_export,
+ .import = tegra_sha_import,
+ .halg.digestsize = SHA3_512_DIGEST_SIZE,
+ .halg.statesize = sizeof(struct tegra_sha_reqctx),
+ .halg.base = {
+ .cra_name = "sha3-512",
+ .cra_driver_name = "tegra-se-sha3-512",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_AHASH,
+ .cra_blocksize = SHA3_512_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct tegra_sha_ctx),
+ .cra_alignmask = 0,
+ .cra_module = THIS_MODULE,
+ .cra_init = tegra_sha_cra_init,
+ .cra_exit = tegra_sha_cra_exit,
+ }
+ }
+ }, {
+ .alg_base = "sha224",
+ .alg.ahash.op.do_one_request = tegra_sha_do_one_req,
+ .alg.ahash.base = {
+ .init = tegra_sha_init,
+ .update = tegra_sha_update,
+ .final = tegra_sha_final,
+ .finup = tegra_sha_finup,
+ .digest = tegra_sha_digest,
+ .export = tegra_sha_export,
+ .import = tegra_sha_import,
+ .setkey = tegra_hmac_setkey,
+ .halg.digestsize = SHA224_DIGEST_SIZE,
+ .halg.statesize = sizeof(struct tegra_sha_reqctx),
+ .halg.base = {
+ .cra_name = "hmac(sha224)",
+ .cra_driver_name = "tegra-se-hmac-sha224",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_NEED_FALLBACK,
+ .cra_blocksize = SHA224_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct tegra_sha_ctx),
+ .cra_alignmask = 0,
+ .cra_module = THIS_MODULE,
+ .cra_init = tegra_sha_cra_init,
+ .cra_exit = tegra_sha_cra_exit,
+ }
+ }
+ }, {
+ .alg_base = "sha256",
+ .alg.ahash.op.do_one_request = tegra_sha_do_one_req,
+ .alg.ahash.base = {
+ .init = tegra_sha_init,
+ .update = tegra_sha_update,
+ .final = tegra_sha_final,
+ .finup = tegra_sha_finup,
+ .digest = tegra_sha_digest,
+ .export = tegra_sha_export,
+ .import = tegra_sha_import,
+ .setkey = tegra_hmac_setkey,
+ .halg.digestsize = SHA256_DIGEST_SIZE,
+ .halg.statesize = sizeof(struct tegra_sha_reqctx),
+ .halg.base = {
+ .cra_name = "hmac(sha256)",
+ .cra_driver_name = "tegra-se-hmac-sha256",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_NEED_FALLBACK,
+ .cra_blocksize = SHA256_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct tegra_sha_ctx),
+ .cra_alignmask = 0,
+ .cra_module = THIS_MODULE,
+ .cra_init = tegra_sha_cra_init,
+ .cra_exit = tegra_sha_cra_exit,
+ }
+ }
+ }, {
+ .alg_base = "sha384",
+ .alg.ahash.op.do_one_request = tegra_sha_do_one_req,
+ .alg.ahash.base = {
+ .init = tegra_sha_init,
+ .update = tegra_sha_update,
+ .final = tegra_sha_final,
+ .finup = tegra_sha_finup,
+ .digest = tegra_sha_digest,
+ .export = tegra_sha_export,
+ .import = tegra_sha_import,
+ .setkey = tegra_hmac_setkey,
+ .halg.digestsize = SHA384_DIGEST_SIZE,
+ .halg.statesize = sizeof(struct tegra_sha_reqctx),
+ .halg.base = {
+ .cra_name = "hmac(sha384)",
+ .cra_driver_name = "tegra-se-hmac-sha384",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_NEED_FALLBACK,
+ .cra_blocksize = SHA384_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct tegra_sha_ctx),
+ .cra_alignmask = 0,
+ .cra_module = THIS_MODULE,
+ .cra_init = tegra_sha_cra_init,
+ .cra_exit = tegra_sha_cra_exit,
+ }
+ }
+ }, {
+ .alg_base = "sha512",
+ .alg.ahash.op.do_one_request = tegra_sha_do_one_req,
+ .alg.ahash.base = {
+ .init = tegra_sha_init,
+ .update = tegra_sha_update,
+ .final = tegra_sha_final,
+ .finup = tegra_sha_finup,
+ .digest = tegra_sha_digest,
+ .export = tegra_sha_export,
+ .import = tegra_sha_import,
+ .setkey = tegra_hmac_setkey,
+ .halg.digestsize = SHA512_DIGEST_SIZE,
+ .halg.statesize = sizeof(struct tegra_sha_reqctx),
+ .halg.base = {
+ .cra_name = "hmac(sha512)",
+ .cra_driver_name = "tegra-se-hmac-sha512",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_NEED_FALLBACK,
+ .cra_blocksize = SHA512_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct tegra_sha_ctx),
+ .cra_alignmask = 0,
+ .cra_module = THIS_MODULE,
+ .cra_init = tegra_sha_cra_init,
+ .cra_exit = tegra_sha_cra_exit,
+ }
+ }
+ }
+};
+
+static int tegra_hash_kac_manifest(u32 user, u32 alg, u32 keylen)
+{
+ int manifest;
+
+ manifest = SE_KAC_USER_NS;
+
+ switch (alg) {
+ case SE_ALG_HMAC_SHA224:
+ case SE_ALG_HMAC_SHA256:
+ case SE_ALG_HMAC_SHA384:
+ case SE_ALG_HMAC_SHA512:
+ manifest |= SE_KAC_HMAC;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ switch (keylen) {
+ case AES_KEYSIZE_128:
+ manifest |= SE_KAC_SIZE_128;
+ break;
+ case AES_KEYSIZE_192:
+ manifest |= SE_KAC_SIZE_192;
+ break;
+ case AES_KEYSIZE_256:
+ default:
+ manifest |= SE_KAC_SIZE_256;
+ break;
+ }
+
+ return manifest;
+}
+
+int tegra_init_hash(struct tegra_se *se)
+{
+ struct ahash_engine_alg *alg;
+ int i, ret;
+
+ se->manifest = tegra_hash_kac_manifest;
+
+ for (i = 0; i < ARRAY_SIZE(tegra_hash_algs); i++) {
+ tegra_hash_algs[i].se_dev = se;
+ alg = &tegra_hash_algs[i].alg.ahash;
+
+ ret = crypto_engine_register_ahash(alg);
+ if (ret) {
+ dev_err(se->dev, "failed to register %s\n",
+ alg->base.halg.base.cra_name);
+ goto sha_err;
+ }
+ }
+
+ return 0;
+
+sha_err:
+ while (i--)
+ crypto_engine_unregister_ahash(&tegra_hash_algs[i].alg.ahash);
+
+ return ret;
+}
+
+void tegra_deinit_hash(struct tegra_se *se)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(tegra_hash_algs); i++)
+ crypto_engine_unregister_ahash(&tegra_hash_algs[i].alg.ahash);
+}
diff --git a/drivers/crypto/tegra/tegra-se-key.c b/drivers/crypto/tegra/tegra-se-key.c
new file mode 100644
index 000000000000..ac14678dbd30
--- /dev/null
+++ b/drivers/crypto/tegra/tegra-se-key.c
@@ -0,0 +1,156 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+/*
+ * Crypto driver file to manage keys of NVIDIA Security Engine.
+ */
+
+#include <linux/bitops.h>
+#include <linux/module.h>
+#include <crypto/aes.h>
+
+#include "tegra-se.h"
+
+#define SE_KEY_FULL_MASK GENMASK(SE_MAX_KEYSLOT, 0)
+
+/* Reserve keyslot 0, 14, 15 */
+#define SE_KEY_RSVD_MASK (BIT(0) | BIT(14) | BIT(15))
+#define SE_KEY_VALID_MASK (SE_KEY_FULL_MASK & ~SE_KEY_RSVD_MASK)
+
+/* Mutex lock to guard keyslots */
+static DEFINE_MUTEX(kslt_lock);
+
+/* Keyslot bitmask (0 = available, 1 = in use/not available) */
+static u16 tegra_se_keyslots = SE_KEY_RSVD_MASK;
+
+static u16 tegra_keyslot_alloc(void)
+{
+ u16 keyid;
+
+ mutex_lock(&kslt_lock);
+ /* Check if all key slots are full */
+ if (tegra_se_keyslots == GENMASK(SE_MAX_KEYSLOT, 0)) {
+ mutex_unlock(&kslt_lock);
+ return 0;
+ }
+
+ keyid = ffz(tegra_se_keyslots);
+ tegra_se_keyslots |= BIT(keyid);
+
+ mutex_unlock(&kslt_lock);
+
+ return keyid;
+}
+
+static void tegra_keyslot_free(u16 slot)
+{
+ mutex_lock(&kslt_lock);
+ tegra_se_keyslots &= ~(BIT(slot));
+ mutex_unlock(&kslt_lock);
+}
+
+static unsigned int tegra_key_prep_ins_cmd(struct tegra_se *se, u32 *cpuvaddr,
+ const u32 *key, u32 keylen, u16 slot, u32 alg)
+{
+ int i = 0, j;
+
+ cpuvaddr[i++] = host1x_opcode_setpayload(1);
+ cpuvaddr[i++] = se_host1x_opcode_incr_w(se->hw->regs->op);
+ cpuvaddr[i++] = SE_AES_OP_WRSTALL | SE_AES_OP_DUMMY;
+
+ cpuvaddr[i++] = host1x_opcode_setpayload(1);
+ cpuvaddr[i++] = se_host1x_opcode_incr_w(se->hw->regs->manifest);
+ cpuvaddr[i++] = se->manifest(se->owner, alg, keylen);
+ cpuvaddr[i++] = host1x_opcode_setpayload(1);
+ cpuvaddr[i++] = se_host1x_opcode_incr_w(se->hw->regs->key_dst);
+
+ cpuvaddr[i++] = SE_AES_KEY_DST_INDEX(slot);
+
+ for (j = 0; j < keylen / 4; j++) {
+ /* Set key address */
+ cpuvaddr[i++] = host1x_opcode_setpayload(1);
+ cpuvaddr[i++] = se_host1x_opcode_incr_w(se->hw->regs->key_addr);
+ cpuvaddr[i++] = j;
+
+ /* Set key data */
+ cpuvaddr[i++] = host1x_opcode_setpayload(1);
+ cpuvaddr[i++] = se_host1x_opcode_incr_w(se->hw->regs->key_data);
+ cpuvaddr[i++] = key[j];
+ }
+
+ cpuvaddr[i++] = host1x_opcode_setpayload(1);
+ cpuvaddr[i++] = se_host1x_opcode_incr_w(se->hw->regs->config);
+ cpuvaddr[i++] = SE_CFG_INS;
+
+ cpuvaddr[i++] = host1x_opcode_setpayload(1);
+ cpuvaddr[i++] = se_host1x_opcode_incr_w(se->hw->regs->op);
+ cpuvaddr[i++] = SE_AES_OP_WRSTALL | SE_AES_OP_START |
+ SE_AES_OP_LASTBUF;
+
+ cpuvaddr[i++] = se_host1x_opcode_nonincr(host1x_uclass_incr_syncpt_r(), 1);
+ cpuvaddr[i++] = host1x_uclass_incr_syncpt_cond_f(1) |
+ host1x_uclass_incr_syncpt_indx_f(se->syncpt_id);
+
+ dev_dbg(se->dev, "key-slot %u key-manifest %#x\n",
+ slot, se->manifest(se->owner, alg, keylen));
+
+ return i;
+}
+
+static bool tegra_key_in_kslt(u32 keyid)
+{
+ bool ret;
+
+ if (keyid > SE_MAX_KEYSLOT)
+ return false;
+
+ mutex_lock(&kslt_lock);
+ ret = ((BIT(keyid) & SE_KEY_VALID_MASK) &&
+ (BIT(keyid) & tegra_se_keyslots));
+ mutex_unlock(&kslt_lock);
+
+ return ret;
+}
+
+static int tegra_key_insert(struct tegra_se *se, const u8 *key,
+ u32 keylen, u16 slot, u32 alg)
+{
+ const u32 *keyval = (u32 *)key;
+ u32 *addr = se->cmdbuf->addr, size;
+
+ size = tegra_key_prep_ins_cmd(se, addr, keyval, keylen, slot, alg);
+
+ return tegra_se_host1x_submit(se, size);
+}
+
+void tegra_key_invalidate(struct tegra_se *se, u32 keyid, u32 alg)
+{
+ u8 zkey[AES_MAX_KEY_SIZE] = {0};
+
+ if (!keyid)
+ return;
+
+ /* Overwrite the key with 0s */
+ tegra_key_insert(se, zkey, AES_MAX_KEY_SIZE, keyid, alg);
+
+ tegra_keyslot_free(keyid);
+}
+
+int tegra_key_submit(struct tegra_se *se, const u8 *key, u32 keylen, u32 alg, u32 *keyid)
+{
+ int ret;
+
+ /* Use the existing slot if it is already allocated */
+ if (!tegra_key_in_kslt(*keyid)) {
+ *keyid = tegra_keyslot_alloc();
+ if (!(*keyid)) {
+ dev_err(se->dev, "failed to allocate key slot\n");
+ return -ENOMEM;
+ }
+ }
+
+ ret = tegra_key_insert(se, key, keylen, *keyid, alg);
+ if (ret)
+ return ret;
+
+ return 0;
+}
diff --git a/drivers/crypto/tegra/tegra-se-main.c b/drivers/crypto/tegra/tegra-se-main.c
new file mode 100644
index 000000000000..9955874b3dc3
--- /dev/null
+++ b/drivers/crypto/tegra/tegra-se-main.c
@@ -0,0 +1,437 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+/*
+ * Crypto driver for NVIDIA Security Engine in Tegra Chips
+ */
+
+#include <linux/clk.h>
+#include <linux/dma-mapping.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/mod_devicetable.h>
+
+#include <crypto/engine.h>
+
+#include "tegra-se.h"
+
+static struct host1x_bo *tegra_se_cmdbuf_get(struct host1x_bo *host_bo)
+{
+ struct tegra_se_cmdbuf *cmdbuf = container_of(host_bo, struct tegra_se_cmdbuf, bo);
+
+ kref_get(&cmdbuf->ref);
+
+ return host_bo;
+}
+
+static void tegra_se_cmdbuf_release(struct kref *ref)
+{
+ struct tegra_se_cmdbuf *cmdbuf = container_of(ref, struct tegra_se_cmdbuf, ref);
+
+ dma_free_attrs(cmdbuf->dev, cmdbuf->size, cmdbuf->addr,
+ cmdbuf->iova, 0);
+
+ kfree(cmdbuf);
+}
+
+static void tegra_se_cmdbuf_put(struct host1x_bo *host_bo)
+{
+ struct tegra_se_cmdbuf *cmdbuf = container_of(host_bo, struct tegra_se_cmdbuf, bo);
+
+ kref_put(&cmdbuf->ref, tegra_se_cmdbuf_release);
+}
+
+static struct host1x_bo_mapping *
+tegra_se_cmdbuf_pin(struct device *dev, struct host1x_bo *bo, enum dma_data_direction direction)
+{
+ struct tegra_se_cmdbuf *cmdbuf = container_of(bo, struct tegra_se_cmdbuf, bo);
+ struct host1x_bo_mapping *map;
+ int err;
+
+ map = kzalloc(sizeof(*map), GFP_KERNEL);
+ if (!map)
+ return ERR_PTR(-ENOMEM);
+
+ kref_init(&map->ref);
+ map->bo = host1x_bo_get(bo);
+ map->direction = direction;
+ map->dev = dev;
+
+ map->sgt = kzalloc(sizeof(*map->sgt), GFP_KERNEL);
+ if (!map->sgt) {
+ err = -ENOMEM;
+ goto free;
+ }
+
+ err = dma_get_sgtable(dev, map->sgt, cmdbuf->addr,
+ cmdbuf->iova, cmdbuf->words * 4);
+ if (err)
+ goto free_sgt;
+
+ err = dma_map_sgtable(dev, map->sgt, direction, 0);
+ if (err)
+ goto free_sgt;
+
+ map->phys = sg_dma_address(map->sgt->sgl);
+ map->size = cmdbuf->words * 4;
+ map->chunks = err;
+
+ return map;
+
+free_sgt:
+ sg_free_table(map->sgt);
+ kfree(map->sgt);
+free:
+ kfree(map);
+ return ERR_PTR(err);
+}
+
+static void tegra_se_cmdbuf_unpin(struct host1x_bo_mapping *map)
+{
+ if (!map)
+ return;
+
+ dma_unmap_sgtable(map->dev, map->sgt, map->direction, 0);
+ sg_free_table(map->sgt);
+ kfree(map->sgt);
+ host1x_bo_put(map->bo);
+
+ kfree(map);
+}
+
+static void *tegra_se_cmdbuf_mmap(struct host1x_bo *host_bo)
+{
+ struct tegra_se_cmdbuf *cmdbuf = container_of(host_bo, struct tegra_se_cmdbuf, bo);
+
+ return cmdbuf->addr;
+}
+
+static void tegra_se_cmdbuf_munmap(struct host1x_bo *host_bo, void *addr)
+{
+}
+
+static const struct host1x_bo_ops tegra_se_cmdbuf_ops = {
+ .get = tegra_se_cmdbuf_get,
+ .put = tegra_se_cmdbuf_put,
+ .pin = tegra_se_cmdbuf_pin,
+ .unpin = tegra_se_cmdbuf_unpin,
+ .mmap = tegra_se_cmdbuf_mmap,
+ .munmap = tegra_se_cmdbuf_munmap,
+};
+
+static struct tegra_se_cmdbuf *tegra_se_host1x_bo_alloc(struct tegra_se *se, ssize_t size)
+{
+ struct tegra_se_cmdbuf *cmdbuf;
+ struct device *dev = se->dev->parent;
+
+ cmdbuf = kzalloc(sizeof(*cmdbuf), GFP_KERNEL);
+ if (!cmdbuf)
+ return NULL;
+
+ cmdbuf->addr = dma_alloc_attrs(dev, size, &cmdbuf->iova,
+ GFP_KERNEL, 0);
+ if (!cmdbuf->addr)
+ return NULL;
+
+ cmdbuf->size = size;
+ cmdbuf->dev = dev;
+
+ host1x_bo_init(&cmdbuf->bo, &tegra_se_cmdbuf_ops);
+ kref_init(&cmdbuf->ref);
+
+ return cmdbuf;
+}
+
+int tegra_se_host1x_submit(struct tegra_se *se, u32 size)
+{
+ struct host1x_job *job;
+ int ret;
+
+ job = host1x_job_alloc(se->channel, 1, 0, true);
+ if (!job) {
+ dev_err(se->dev, "failed to allocate host1x job\n");
+ return -ENOMEM;
+ }
+
+ job->syncpt = host1x_syncpt_get(se->syncpt);
+ job->syncpt_incrs = 1;
+ job->client = &se->client;
+ job->class = se->client.class;
+ job->serialize = true;
+ job->engine_fallback_streamid = se->stream_id;
+ job->engine_streamid_offset = SE_STREAM_ID;
+
+ se->cmdbuf->words = size;
+
+ host1x_job_add_gather(job, &se->cmdbuf->bo, size, 0);
+
+ ret = host1x_job_pin(job, se->dev);
+ if (ret) {
+ dev_err(se->dev, "failed to pin host1x job\n");
+ goto job_put;
+ }
+
+ ret = host1x_job_submit(job);
+ if (ret) {
+ dev_err(se->dev, "failed to submit host1x job\n");
+ goto job_unpin;
+ }
+
+ ret = host1x_syncpt_wait(job->syncpt, job->syncpt_end,
+ MAX_SCHEDULE_TIMEOUT, NULL);
+ if (ret) {
+ dev_err(se->dev, "host1x job timed out\n");
+ return ret;
+ }
+
+ host1x_job_put(job);
+ return 0;
+
+job_unpin:
+ host1x_job_unpin(job);
+job_put:
+ host1x_job_put(job);
+
+ return ret;
+}
+
+static int tegra_se_client_init(struct host1x_client *client)
+{
+ struct tegra_se *se = container_of(client, struct tegra_se, client);
+ int ret;
+
+ se->channel = host1x_channel_request(&se->client);
+ if (!se->channel) {
+ dev_err(se->dev, "host1x channel map failed\n");
+ return -ENODEV;
+ }
+
+ se->syncpt = host1x_syncpt_request(&se->client, 0);
+ if (!se->syncpt) {
+ dev_err(se->dev, "host1x syncpt allocation failed\n");
+ ret = -EINVAL;
+ goto channel_put;
+ }
+
+ se->syncpt_id = host1x_syncpt_id(se->syncpt);
+
+ se->cmdbuf = tegra_se_host1x_bo_alloc(se, SZ_4K);
+ if (!se->cmdbuf) {
+ ret = -ENOMEM;
+ goto syncpt_put;
+ }
+
+ ret = se->hw->init_alg(se);
+ if (ret) {
+ dev_err(se->dev, "failed to register algorithms\n");
+ goto cmdbuf_put;
+ }
+
+ return 0;
+
+cmdbuf_put:
+ tegra_se_cmdbuf_put(&se->cmdbuf->bo);
+syncpt_put:
+ host1x_syncpt_put(se->syncpt);
+channel_put:
+ host1x_channel_put(se->channel);
+
+ return ret;
+}
+
+static int tegra_se_client_deinit(struct host1x_client *client)
+{
+ struct tegra_se *se = container_of(client, struct tegra_se, client);
+
+ se->hw->deinit_alg(se);
+ tegra_se_cmdbuf_put(&se->cmdbuf->bo);
+ host1x_syncpt_put(se->syncpt);
+ host1x_channel_put(se->channel);
+
+ return 0;
+}
+
+static const struct host1x_client_ops tegra_se_client_ops = {
+ .init = tegra_se_client_init,
+ .exit = tegra_se_client_deinit,
+};
+
+static int tegra_se_host1x_register(struct tegra_se *se)
+{
+ INIT_LIST_HEAD(&se->client.list);
+ se->client.dev = se->dev;
+ se->client.ops = &tegra_se_client_ops;
+ se->client.class = se->hw->host1x_class;
+ se->client.num_syncpts = 1;
+
+ host1x_client_register(&se->client);
+
+ return 0;
+}
+
+static int tegra_se_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct tegra_se *se;
+ int ret;
+
+ se = devm_kzalloc(dev, sizeof(*se), GFP_KERNEL);
+ if (!se)
+ return -ENOMEM;
+
+ se->dev = dev;
+ se->owner = TEGRA_GPSE_ID;
+ se->hw = device_get_match_data(&pdev->dev);
+
+ se->base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(se->base))
+ return PTR_ERR(se->base);
+
+ dma_set_mask_and_coherent(dev, DMA_BIT_MASK(39));
+ platform_set_drvdata(pdev, se);
+
+ se->clk = devm_clk_get_enabled(se->dev, NULL);
+ if (IS_ERR(se->clk))
+ return dev_err_probe(dev, PTR_ERR(se->clk),
+ "failed to enable clocks\n");
+
+ if (!tegra_dev_iommu_get_stream_id(dev, &se->stream_id))
+ return dev_err_probe(dev, -ENODEV,
+ "failed to get IOMMU stream ID\n");
+
+ writel(se->stream_id, se->base + SE_STREAM_ID);
+
+ se->engine = crypto_engine_alloc_init(dev, 0);
+ if (!se->engine)
+ return dev_err_probe(dev, -ENOMEM, "failed to init crypto engine\n");
+
+ ret = crypto_engine_start(se->engine);
+ if (ret) {
+ crypto_engine_exit(se->engine);
+ return dev_err_probe(dev, ret, "failed to start crypto engine\n");
+ }
+
+ ret = tegra_se_host1x_register(se);
+ if (ret) {
+ crypto_engine_stop(se->engine);
+ crypto_engine_exit(se->engine);
+ return dev_err_probe(dev, ret, "failed to init host1x params\n");
+ }
+
+ return 0;
+}
+
+static void tegra_se_remove(struct platform_device *pdev)
+{
+ struct tegra_se *se = platform_get_drvdata(pdev);
+
+ crypto_engine_stop(se->engine);
+ crypto_engine_exit(se->engine);
+ iommu_fwspec_free(se->dev);
+ host1x_client_unregister(&se->client);
+}
+
+static const struct tegra_se_regs tegra234_aes1_regs = {
+ .config = SE_AES1_CFG,
+ .op = SE_AES1_OPERATION,
+ .last_blk = SE_AES1_LAST_BLOCK,
+ .linear_ctr = SE_AES1_LINEAR_CTR,
+ .aad_len = SE_AES1_AAD_LEN,
+ .cryp_msg_len = SE_AES1_CRYPTO_MSG_LEN,
+ .manifest = SE_AES1_KEYMANIFEST,
+ .key_addr = SE_AES1_KEY_ADDR,
+ .key_data = SE_AES1_KEY_DATA,
+ .key_dst = SE_AES1_KEY_DST,
+ .result = SE_AES1_CMAC_RESULT,
+};
+
+static const struct tegra_se_regs tegra234_hash_regs = {
+ .config = SE_SHA_CFG,
+ .op = SE_SHA_OPERATION,
+ .manifest = SE_SHA_KEYMANIFEST,
+ .key_addr = SE_SHA_KEY_ADDR,
+ .key_data = SE_SHA_KEY_DATA,
+ .key_dst = SE_SHA_KEY_DST,
+ .result = SE_SHA_HASH_RESULT,
+};
+
+static const struct tegra_se_hw tegra234_aes_hw = {
+ .regs = &tegra234_aes1_regs,
+ .kac_ver = 1,
+ .host1x_class = 0x3b,
+ .init_alg = tegra_init_aes,
+ .deinit_alg = tegra_deinit_aes,
+};
+
+static const struct tegra_se_hw tegra234_hash_hw = {
+ .regs = &tegra234_hash_regs,
+ .kac_ver = 1,
+ .host1x_class = 0x3d,
+ .init_alg = tegra_init_hash,
+ .deinit_alg = tegra_deinit_hash,
+};
+
+static const struct of_device_id tegra_se_of_match[] = {
+ {
+ .compatible = "nvidia,tegra234-se-aes",
+ .data = &tegra234_aes_hw
+ }, {
+ .compatible = "nvidia,tegra234-se-hash",
+ .data = &tegra234_hash_hw,
+ },
+ { },
+};
+MODULE_DEVICE_TABLE(of, tegra_se_of_match);
+
+static struct platform_driver tegra_se_driver = {
+ .driver = {
+ .name = "tegra-se",
+ .of_match_table = tegra_se_of_match,
+ },
+ .probe = tegra_se_probe,
+ .remove_new = tegra_se_remove,
+};
+
+static int tegra_se_host1x_probe(struct host1x_device *dev)
+{
+ return host1x_device_init(dev);
+}
+
+static int tegra_se_host1x_remove(struct host1x_device *dev)
+{
+ host1x_device_exit(dev);
+
+ return 0;
+}
+
+static struct host1x_driver tegra_se_host1x_driver = {
+ .driver = {
+ .name = "tegra-se-host1x",
+ },
+ .probe = tegra_se_host1x_probe,
+ .remove = tegra_se_host1x_remove,
+ .subdevs = tegra_se_of_match,
+};
+
+static int __init tegra_se_module_init(void)
+{
+ int ret;
+
+ ret = host1x_driver_register(&tegra_se_host1x_driver);
+ if (ret)
+ return ret;
+
+ return platform_driver_register(&tegra_se_driver);
+}
+
+static void __exit tegra_se_module_exit(void)
+{
+ host1x_driver_unregister(&tegra_se_host1x_driver);
+ platform_driver_unregister(&tegra_se_driver);
+}
+
+module_init(tegra_se_module_init);
+module_exit(tegra_se_module_exit);
+
+MODULE_DESCRIPTION("NVIDIA Tegra Security Engine Driver");
+MODULE_AUTHOR("Akhil R <akhilrajeev@nvidia.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/crypto/tegra/tegra-se.h b/drivers/crypto/tegra/tegra-se.h
new file mode 100644
index 000000000000..b9dd7ceb8783
--- /dev/null
+++ b/drivers/crypto/tegra/tegra-se.h
@@ -0,0 +1,560 @@
+/* SPDX-License-Identifier: GPL-2.0-only
+ * SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ *
+ * Header file for NVIDIA Security Engine driver.
+ */
+
+#ifndef _TEGRA_SE_H
+#define _TEGRA_SE_H
+
+#include <linux/bitfield.h>
+#include <linux/iommu.h>
+#include <linux/host1x.h>
+#include <crypto/aead.h>
+#include <crypto/engine.h>
+#include <crypto/hash.h>
+#include <crypto/sha1.h>
+#include <crypto/sha3.h>
+#include <crypto/skcipher.h>
+
+#define SE_OWNERSHIP 0x14
+#define SE_OWNERSHIP_UID(x) FIELD_GET(GENMASK(7, 0), x)
+#define TEGRA_GPSE_ID 3
+
+#define SE_STREAM_ID 0x90
+
+#define SE_SHA_CFG 0x4004
+#define SE_SHA_KEY_ADDR 0x4094
+#define SE_SHA_KEY_DATA 0x4098
+#define SE_SHA_KEYMANIFEST 0x409c
+#define SE_SHA_CRYPTO_CFG 0x40a4
+#define SE_SHA_KEY_DST 0x40a8
+#define SE_SHA_SRC_KSLT 0x4180
+#define SE_SHA_TGT_KSLT 0x4184
+#define SE_SHA_MSG_LENGTH 0x401c
+#define SE_SHA_OPERATION 0x407c
+#define SE_SHA_HASH_RESULT 0x40b0
+
+#define SE_SHA_ENC_MODE(x) FIELD_PREP(GENMASK(31, 24), x)
+#define SE_SHA_ENC_MODE_SHA1 SE_SHA_ENC_MODE(0)
+#define SE_SHA_ENC_MODE_SHA224 SE_SHA_ENC_MODE(4)
+#define SE_SHA_ENC_MODE_SHA256 SE_SHA_ENC_MODE(5)
+#define SE_SHA_ENC_MODE_SHA384 SE_SHA_ENC_MODE(6)
+#define SE_SHA_ENC_MODE_SHA512 SE_SHA_ENC_MODE(7)
+#define SE_SHA_ENC_MODE_SHA_CTX_INTEGRITY SE_SHA_ENC_MODE(8)
+#define SE_SHA_ENC_MODE_SHA3_224 SE_SHA_ENC_MODE(9)
+#define SE_SHA_ENC_MODE_SHA3_256 SE_SHA_ENC_MODE(10)
+#define SE_SHA_ENC_MODE_SHA3_384 SE_SHA_ENC_MODE(11)
+#define SE_SHA_ENC_MODE_SHA3_512 SE_SHA_ENC_MODE(12)
+#define SE_SHA_ENC_MODE_SHAKE128 SE_SHA_ENC_MODE(13)
+#define SE_SHA_ENC_MODE_SHAKE256 SE_SHA_ENC_MODE(14)
+#define SE_SHA_ENC_MODE_HMAC_SHA256_1KEY SE_SHA_ENC_MODE(0)
+#define SE_SHA_ENC_MODE_HMAC_SHA256_2KEY SE_SHA_ENC_MODE(1)
+#define SE_SHA_ENC_MODE_SM3_256 SE_SHA_ENC_MODE(0)
+
+#define SE_SHA_CFG_ENC_ALG(x) FIELD_PREP(GENMASK(15, 12), x)
+#define SE_SHA_ENC_ALG_NOP SE_SHA_CFG_ENC_ALG(0)
+#define SE_SHA_ENC_ALG_SHA_ENC SE_SHA_CFG_ENC_ALG(1)
+#define SE_SHA_ENC_ALG_RNG SE_SHA_CFG_ENC_ALG(2)
+#define SE_SHA_ENC_ALG_SHA SE_SHA_CFG_ENC_ALG(3)
+#define SE_SHA_ENC_ALG_SM3 SE_SHA_CFG_ENC_ALG(4)
+#define SE_SHA_ENC_ALG_HMAC SE_SHA_CFG_ENC_ALG(7)
+#define SE_SHA_ENC_ALG_KDF SE_SHA_CFG_ENC_ALG(8)
+#define SE_SHA_ENC_ALG_KEY_INVLD SE_SHA_CFG_ENC_ALG(10)
+#define SE_SHA_ENC_ALG_KEY_INQUIRE SE_SHA_CFG_ENC_ALG(12)
+#define SE_SHA_ENC_ALG_INS SE_SHA_CFG_ENC_ALG(13)
+
+#define SE_SHA_OP_LASTBUF FIELD_PREP(BIT(16), 1)
+#define SE_SHA_OP_WRSTALL FIELD_PREP(BIT(15), 1)
+
+#define SE_SHA_OP_OP(x) FIELD_PREP(GENMASK(2, 0), x)
+#define SE_SHA_OP_START SE_SHA_OP_OP(1)
+#define SE_SHA_OP_RESTART_OUT SE_SHA_OP_OP(2)
+#define SE_SHA_OP_RESTART_IN SE_SHA_OP_OP(4)
+#define SE_SHA_OP_RESTART_INOUT SE_SHA_OP_OP(5)
+#define SE_SHA_OP_DUMMY SE_SHA_OP_OP(6)
+
+#define SE_SHA_CFG_DEC_ALG(x) FIELD_PREP(GENMASK(11, 8), x)
+#define SE_SHA_DEC_ALG_NOP SE_SHA_CFG_DEC_ALG(0)
+#define SE_SHA_DEC_ALG_AES_DEC SE_SHA_CFG_DEC_ALG(1)
+#define SE_SHA_DEC_ALG_HMAC SE_SHA_CFG_DEC_ALG(7)
+#define SE_SHA_DEC_ALG_HMAC_VERIFY SE_SHA_CFG_DEC_ALG(9)
+
+#define SE_SHA_CFG_DST(x) FIELD_PREP(GENMASK(4, 2), x)
+#define SE_SHA_DST_MEMORY SE_SHA_CFG_DST(0)
+#define SE_SHA_DST_HASH_REG SE_SHA_CFG_DST(1)
+#define SE_SHA_DST_KEYTABLE SE_SHA_CFG_DST(2)
+#define SE_SHA_DST_SRK SE_SHA_CFG_DST(3)
+
+#define SE_SHA_TASK_HASH_INIT BIT(0)
+
+/* AES Configuration */
+#define SE_AES0_CFG 0x1004
+#define SE_AES0_CRYPTO_CONFIG 0x1008
+#define SE_AES0_KEY_DST 0x1030
+#define SE_AES0_OPERATION 0x1038
+#define SE_AES0_LINEAR_CTR 0x101c
+#define SE_AES0_LAST_BLOCK 0x102c
+#define SE_AES0_KEY_ADDR 0x10bc
+#define SE_AES0_KEY_DATA 0x10c0
+#define SE_AES0_CMAC_RESULT 0x10c4
+#define SE_AES0_SRC_KSLT 0x1100
+#define SE_AES0_TGT_KSLT 0x1104
+#define SE_AES0_KEYMANIFEST 0x1114
+#define SE_AES0_AAD_LEN 0x112c
+#define SE_AES0_CRYPTO_MSG_LEN 0x1134
+
+#define SE_AES1_CFG 0x2004
+#define SE_AES1_CRYPTO_CONFIG 0x2008
+#define SE_AES1_KEY_DST 0x2030
+#define SE_AES1_OPERATION 0x2038
+#define SE_AES1_LINEAR_CTR 0x201c
+#define SE_AES1_LAST_BLOCK 0x202c
+#define SE_AES1_KEY_ADDR 0x20bc
+#define SE_AES1_KEY_DATA 0x20c0
+#define SE_AES1_CMAC_RESULT 0x20c4
+#define SE_AES1_SRC_KSLT 0x2100
+#define SE_AES1_TGT_KSLT 0x2104
+#define SE_AES1_KEYMANIFEST 0x2114
+#define SE_AES1_AAD_LEN 0x212c
+#define SE_AES1_CRYPTO_MSG_LEN 0x2134
+
+#define SE_AES_CFG_ENC_MODE(x) FIELD_PREP(GENMASK(31, 24), x)
+#define SE_AES_ENC_MODE_GMAC SE_AES_CFG_ENC_MODE(3)
+#define SE_AES_ENC_MODE_GCM SE_AES_CFG_ENC_MODE(4)
+#define SE_AES_ENC_MODE_GCM_FINAL SE_AES_CFG_ENC_MODE(5)
+#define SE_AES_ENC_MODE_CMAC SE_AES_CFG_ENC_MODE(7)
+#define SE_AES_ENC_MODE_CBC_MAC SE_AES_CFG_ENC_MODE(12)
+
+#define SE_AES_CFG_DEC_MODE(x) FIELD_PREP(GENMASK(23, 16), x)
+#define SE_AES_DEC_MODE_GMAC SE_AES_CFG_DEC_MODE(3)
+#define SE_AES_DEC_MODE_GCM SE_AES_CFG_DEC_MODE(4)
+#define SE_AES_DEC_MODE_GCM_FINAL SE_AES_CFG_DEC_MODE(5)
+#define SE_AES_DEC_MODE_CBC_MAC SE_AES_CFG_DEC_MODE(12)
+
+#define SE_AES_CFG_ENC_ALG(x) FIELD_PREP(GENMASK(15, 12), x)
+#define SE_AES_ENC_ALG_NOP SE_AES_CFG_ENC_ALG(0)
+#define SE_AES_ENC_ALG_AES_ENC SE_AES_CFG_ENC_ALG(1)
+#define SE_AES_ENC_ALG_RNG SE_AES_CFG_ENC_ALG(2)
+#define SE_AES_ENC_ALG_SHA SE_AES_CFG_ENC_ALG(3)
+#define SE_AES_ENC_ALG_HMAC SE_AES_CFG_ENC_ALG(7)
+#define SE_AES_ENC_ALG_KDF SE_AES_CFG_ENC_ALG(8)
+#define SE_AES_ENC_ALG_INS SE_AES_CFG_ENC_ALG(13)
+
+#define SE_AES_CFG_DEC_ALG(x) FIELD_PREP(GENMASK(11, 8), x)
+#define SE_AES_DEC_ALG_NOP SE_AES_CFG_DEC_ALG(0)
+#define SE_AES_DEC_ALG_AES_DEC SE_AES_CFG_DEC_ALG(1)
+
+#define SE_AES_CFG_DST(x) FIELD_PREP(GENMASK(4, 2), x)
+#define SE_AES_DST_MEMORY SE_AES_CFG_DST(0)
+#define SE_AES_DST_HASH_REG SE_AES_CFG_DST(1)
+#define SE_AES_DST_KEYTABLE SE_AES_CFG_DST(2)
+#define SE_AES_DST_SRK SE_AES_CFG_DST(3)
+
+/* AES Crypto Configuration */
+#define SE_AES_KEY2_INDEX(x) FIELD_PREP(GENMASK(31, 28), x)
+#define SE_AES_KEY_INDEX(x) FIELD_PREP(GENMASK(27, 24), x)
+
+#define SE_AES_CRYPTO_CFG_SCC_DIS FIELD_PREP(BIT(20), 1)
+
+#define SE_AES_CRYPTO_CFG_CTR_CNTN(x) FIELD_PREP(GENMASK(18, 11), x)
+
+#define SE_AES_CRYPTO_CFG_IV_MODE(x) FIELD_PREP(BIT(10), x)
+#define SE_AES_IV_MODE_SWIV SE_AES_CRYPTO_CFG_IV_MODE(0)
+#define SE_AES_IV_MODE_HWIV SE_AES_CRYPTO_CFG_IV_MODE(1)
+
+#define SE_AES_CRYPTO_CFG_CORE_SEL(x) FIELD_PREP(BIT(9), x)
+#define SE_AES_CORE_SEL_DECRYPT SE_AES_CRYPTO_CFG_CORE_SEL(0)
+#define SE_AES_CORE_SEL_ENCRYPT SE_AES_CRYPTO_CFG_CORE_SEL(1)
+
+#define SE_AES_CRYPTO_CFG_IV_SEL(x) FIELD_PREP(GENMASK(8, 7), x)
+#define SE_AES_IV_SEL_UPDATED SE_AES_CRYPTO_CFG_IV_SEL(1)
+#define SE_AES_IV_SEL_REG SE_AES_CRYPTO_CFG_IV_SEL(2)
+#define SE_AES_IV_SEL_RANDOM SE_AES_CRYPTO_CFG_IV_SEL(3)
+
+#define SE_AES_CRYPTO_CFG_VCTRAM_SEL(x) FIELD_PREP(GENMASK(6, 5), x)
+#define SE_AES_VCTRAM_SEL_MEMORY SE_AES_CRYPTO_CFG_VCTRAM_SEL(0)
+#define SE_AES_VCTRAM_SEL_TWEAK SE_AES_CRYPTO_CFG_VCTRAM_SEL(1)
+#define SE_AES_VCTRAM_SEL_AESOUT SE_AES_CRYPTO_CFG_VCTRAM_SEL(2)
+#define SE_AES_VCTRAM_SEL_PREV_MEM SE_AES_CRYPTO_CFG_VCTRAM_SEL(3)
+
+#define SE_AES_CRYPTO_CFG_INPUT_SEL(x) FIELD_PREP(GENMASK(4, 3), x)
+#define SE_AES_INPUT_SEL_MEMORY SE_AES_CRYPTO_CFG_INPUT_SEL(0)
+#define SE_AES_INPUT_SEL_RANDOM SE_AES_CRYPTO_CFG_INPUT_SEL(1)
+#define SE_AES_INPUT_SEL_AESOUT SE_AES_CRYPTO_CFG_INPUT_SEL(2)
+#define SE_AES_INPUT_SEL_LINEAR_CTR SE_AES_CRYPTO_CFG_INPUT_SEL(3)
+#define SE_AES_INPUT_SEL_REG SE_AES_CRYPTO_CFG_INPUT_SEL(1)
+
+#define SE_AES_CRYPTO_CFG_XOR_POS(x) FIELD_PREP(GENMASK(2, 1), x)
+#define SE_AES_XOR_POS_BYPASS SE_AES_CRYPTO_CFG_XOR_POS(0)
+#define SE_AES_XOR_POS_BOTH SE_AES_CRYPTO_CFG_XOR_POS(1)
+#define SE_AES_XOR_POS_TOP SE_AES_CRYPTO_CFG_XOR_POS(2)
+#define SE_AES_XOR_POS_BOTTOM SE_AES_CRYPTO_CFG_XOR_POS(3)
+
+#define SE_AES_CRYPTO_CFG_HASH_EN(x) FIELD_PREP(BIT(0), x)
+#define SE_AES_HASH_DISABLE SE_AES_CRYPTO_CFG_HASH_EN(0)
+#define SE_AES_HASH_ENABLE SE_AES_CRYPTO_CFG_HASH_EN(1)
+
+#define SE_LAST_BLOCK_VAL(x) FIELD_PREP(GENMASK(19, 0), x)
+#define SE_LAST_BLOCK_RES_BITS(x) FIELD_PREP(GENMASK(26, 20), x)
+
+#define SE_AES_OP_LASTBUF FIELD_PREP(BIT(16), 1)
+#define SE_AES_OP_WRSTALL FIELD_PREP(BIT(15), 1)
+#define SE_AES_OP_FINAL FIELD_PREP(BIT(5), 1)
+#define SE_AES_OP_INIT FIELD_PREP(BIT(4), 1)
+
+#define SE_AES_OP_OP(x) FIELD_PREP(GENMASK(2, 0), x)
+#define SE_AES_OP_START SE_AES_OP_OP(1)
+#define SE_AES_OP_RESTART_OUT SE_AES_OP_OP(2)
+#define SE_AES_OP_RESTART_IN SE_AES_OP_OP(4)
+#define SE_AES_OP_RESTART_INOUT SE_AES_OP_OP(5)
+#define SE_AES_OP_DUMMY SE_AES_OP_OP(6)
+
+#define SE_KAC_SIZE(x) FIELD_PREP(GENMASK(15, 14), x)
+#define SE_KAC_SIZE_128 SE_KAC_SIZE(0)
+#define SE_KAC_SIZE_192 SE_KAC_SIZE(1)
+#define SE_KAC_SIZE_256 SE_KAC_SIZE(2)
+
+#define SE_KAC_EXPORTABLE FIELD_PREP(BIT(12), 1)
+
+#define SE_KAC_PURPOSE(x) FIELD_PREP(GENMASK(11, 8), x)
+#define SE_KAC_ENC SE_KAC_PURPOSE(0)
+#define SE_KAC_CMAC SE_KAC_PURPOSE(1)
+#define SE_KAC_HMAC SE_KAC_PURPOSE(2)
+#define SE_KAC_GCM_KW SE_KAC_PURPOSE(3)
+#define SE_KAC_HMAC_KDK SE_KAC_PURPOSE(6)
+#define SE_KAC_HMAC_KDD SE_KAC_PURPOSE(7)
+#define SE_KAC_HMAC_KDD_KUW SE_KAC_PURPOSE(8)
+#define SE_KAC_XTS SE_KAC_PURPOSE(9)
+#define SE_KAC_GCM SE_KAC_PURPOSE(10)
+
+#define SE_KAC_USER_NS FIELD_PREP(GENMASK(6, 4), 3)
+
+#define SE_AES_KEY_DST_INDEX(x) FIELD_PREP(GENMASK(11, 8), x)
+#define SE_ADDR_HI_MSB(x) FIELD_PREP(GENMASK(31, 24), x)
+#define SE_ADDR_HI_SZ(x) FIELD_PREP(GENMASK(23, 0), x)
+
+#define SE_CFG_AES_ENCRYPT (SE_AES_ENC_ALG_AES_ENC | \
+ SE_AES_DEC_ALG_NOP | \
+ SE_AES_DST_MEMORY)
+
+#define SE_CFG_AES_DECRYPT (SE_AES_ENC_ALG_NOP | \
+ SE_AES_DEC_ALG_AES_DEC | \
+ SE_AES_DST_MEMORY)
+
+#define SE_CFG_GMAC_ENCRYPT (SE_AES_ENC_ALG_AES_ENC | \
+ SE_AES_DEC_ALG_NOP | \
+ SE_AES_ENC_MODE_GMAC | \
+ SE_AES_DST_MEMORY)
+
+#define SE_CFG_GMAC_DECRYPT (SE_AES_ENC_ALG_NOP | \
+ SE_AES_DEC_ALG_AES_DEC | \
+ SE_AES_DEC_MODE_GMAC | \
+ SE_AES_DST_MEMORY)
+
+#define SE_CFG_GCM_ENCRYPT (SE_AES_ENC_ALG_AES_ENC | \
+ SE_AES_DEC_ALG_NOP | \
+ SE_AES_ENC_MODE_GCM | \
+ SE_AES_DST_MEMORY)
+
+#define SE_CFG_GCM_DECRYPT (SE_AES_ENC_ALG_NOP | \
+ SE_AES_DEC_ALG_AES_DEC | \
+ SE_AES_DEC_MODE_GCM | \
+ SE_AES_DST_MEMORY)
+
+#define SE_CFG_GCM_FINAL_ENCRYPT (SE_AES_ENC_ALG_AES_ENC | \
+ SE_AES_DEC_ALG_NOP | \
+ SE_AES_ENC_MODE_GCM_FINAL | \
+ SE_AES_DST_MEMORY)
+
+#define SE_CFG_GCM_FINAL_DECRYPT (SE_AES_ENC_ALG_NOP | \
+ SE_AES_DEC_ALG_AES_DEC | \
+ SE_AES_DEC_MODE_GCM_FINAL | \
+ SE_AES_DST_MEMORY)
+
+#define SE_CFG_CMAC (SE_AES_ENC_ALG_AES_ENC | \
+ SE_AES_ENC_MODE_CMAC | \
+ SE_AES_DST_HASH_REG)
+
+#define SE_CFG_CBC_MAC (SE_AES_ENC_ALG_AES_ENC | \
+ SE_AES_ENC_MODE_CBC_MAC)
+
+#define SE_CFG_INS (SE_AES_ENC_ALG_INS | \
+ SE_AES_DEC_ALG_NOP)
+
+#define SE_CRYPTO_CFG_ECB_ENCRYPT (SE_AES_INPUT_SEL_MEMORY | \
+ SE_AES_XOR_POS_BYPASS | \
+ SE_AES_CORE_SEL_ENCRYPT)
+
+#define SE_CRYPTO_CFG_ECB_DECRYPT (SE_AES_INPUT_SEL_MEMORY | \
+ SE_AES_XOR_POS_BYPASS | \
+ SE_AES_CORE_SEL_DECRYPT)
+
+#define SE_CRYPTO_CFG_CBC_ENCRYPT (SE_AES_INPUT_SEL_MEMORY | \
+ SE_AES_VCTRAM_SEL_AESOUT | \
+ SE_AES_XOR_POS_TOP | \
+ SE_AES_CORE_SEL_ENCRYPT | \
+ SE_AES_IV_SEL_REG)
+
+#define SE_CRYPTO_CFG_CBC_DECRYPT (SE_AES_INPUT_SEL_MEMORY | \
+ SE_AES_VCTRAM_SEL_PREV_MEM | \
+ SE_AES_XOR_POS_BOTTOM | \
+ SE_AES_CORE_SEL_DECRYPT | \
+ SE_AES_IV_SEL_REG)
+
+#define SE_CRYPTO_CFG_CTR (SE_AES_INPUT_SEL_LINEAR_CTR | \
+ SE_AES_VCTRAM_SEL_MEMORY | \
+ SE_AES_XOR_POS_BOTTOM | \
+ SE_AES_CORE_SEL_ENCRYPT | \
+ SE_AES_CRYPTO_CFG_CTR_CNTN(1) | \
+ SE_AES_IV_SEL_REG)
+
+#define SE_CRYPTO_CFG_XTS_ENCRYPT (SE_AES_INPUT_SEL_MEMORY | \
+ SE_AES_VCTRAM_SEL_TWEAK | \
+ SE_AES_XOR_POS_BOTH | \
+ SE_AES_CORE_SEL_ENCRYPT | \
+ SE_AES_IV_SEL_REG)
+
+#define SE_CRYPTO_CFG_XTS_DECRYPT (SE_AES_INPUT_SEL_MEMORY | \
+ SE_AES_VCTRAM_SEL_TWEAK | \
+ SE_AES_XOR_POS_BOTH | \
+ SE_AES_CORE_SEL_DECRYPT | \
+ SE_AES_IV_SEL_REG)
+
+#define SE_CRYPTO_CFG_XTS_DECRYPT (SE_AES_INPUT_SEL_MEMORY | \
+ SE_AES_VCTRAM_SEL_TWEAK | \
+ SE_AES_XOR_POS_BOTH | \
+ SE_AES_CORE_SEL_DECRYPT | \
+ SE_AES_IV_SEL_REG)
+
+#define SE_CRYPTO_CFG_CBC_MAC (SE_AES_INPUT_SEL_MEMORY | \
+ SE_AES_VCTRAM_SEL_AESOUT | \
+ SE_AES_XOR_POS_TOP | \
+ SE_AES_CORE_SEL_ENCRYPT | \
+ SE_AES_HASH_ENABLE | \
+ SE_AES_IV_SEL_REG)
+
+#define HASH_RESULT_REG_COUNT 50
+#define CMAC_RESULT_REG_COUNT 4
+
+#define SE_CRYPTO_CTR_REG_COUNT 4
+#define SE_MAX_KEYSLOT 15
+#define SE_MAX_MEM_ALLOC SZ_4M
+#define SE_AES_BUFLEN 0x8000
+#define SE_SHA_BUFLEN 0x2000
+
+#define SHA_FIRST BIT(0)
+#define SHA_UPDATE BIT(1)
+#define SHA_FINAL BIT(2)
+
+/* Security Engine operation modes */
+enum se_aes_alg {
+ SE_ALG_CBC, /* Cipher Block Chaining (CBC) mode */
+ SE_ALG_ECB, /* Electronic Codebook (ECB) mode */
+ SE_ALG_CTR, /* Counter (CTR) mode */
+ SE_ALG_XTS, /* XTS mode */
+ SE_ALG_GMAC, /* GMAC mode */
+ SE_ALG_GCM, /* GCM mode */
+ SE_ALG_GCM_FINAL, /* GCM FINAL mode */
+ SE_ALG_CMAC, /* Cipher-based MAC (CMAC) mode */
+ SE_ALG_CBC_MAC, /* CBC MAC mode */
+};
+
+enum se_hash_alg {
+ SE_ALG_RNG_DRBG, /* Deterministic Random Bit Generator */
+ SE_ALG_SHA1, /* Secure Hash Algorithm-1 (SHA1) mode */
+ SE_ALG_SHA224, /* Secure Hash Algorithm-224 (SHA224) mode */
+ SE_ALG_SHA256, /* Secure Hash Algorithm-256 (SHA256) mode */
+ SE_ALG_SHA384, /* Secure Hash Algorithm-384 (SHA384) mode */
+ SE_ALG_SHA512, /* Secure Hash Algorithm-512 (SHA512) mode */
+ SE_ALG_SHA3_224, /* Secure Hash Algorithm3-224 (SHA3-224) mode */
+ SE_ALG_SHA3_256, /* Secure Hash Algorithm3-256 (SHA3-256) mode */
+ SE_ALG_SHA3_384, /* Secure Hash Algorithm3-384 (SHA3-384) mode */
+ SE_ALG_SHA3_512, /* Secure Hash Algorithm3-512 (SHA3-512) mode */
+ SE_ALG_SHAKE128, /* Secure Hash Algorithm3 (SHAKE128) mode */
+ SE_ALG_SHAKE256, /* Secure Hash Algorithm3 (SHAKE256) mode */
+ SE_ALG_HMAC_SHA224, /* Hash based MAC (HMAC) - 224 */
+ SE_ALG_HMAC_SHA256, /* Hash based MAC (HMAC) - 256 */
+ SE_ALG_HMAC_SHA384, /* Hash based MAC (HMAC) - 384 */
+ SE_ALG_HMAC_SHA512, /* Hash based MAC (HMAC) - 512 */
+};
+
+struct tegra_se_alg {
+ struct tegra_se *se_dev;
+ const char *alg_base;
+
+ union {
+ struct skcipher_engine_alg skcipher;
+ struct aead_engine_alg aead;
+ struct ahash_engine_alg ahash;
+ } alg;
+};
+
+struct tegra_se_regs {
+ u32 op;
+ u32 config;
+ u32 last_blk;
+ u32 linear_ctr;
+ u32 out_addr;
+ u32 aad_len;
+ u32 cryp_msg_len;
+ u32 manifest;
+ u32 key_addr;
+ u32 key_data;
+ u32 key_dst;
+ u32 result;
+};
+
+struct tegra_se_hw {
+ const struct tegra_se_regs *regs;
+ int (*init_alg)(struct tegra_se *se);
+ void (*deinit_alg)(struct tegra_se *se);
+ bool support_sm_alg;
+ u32 host1x_class;
+ u32 kac_ver;
+};
+
+struct tegra_se {
+ int (*manifest)(u32 user, u32 alg, u32 keylen);
+ const struct tegra_se_hw *hw;
+ struct host1x_client client;
+ struct host1x_channel *channel;
+ struct tegra_se_cmdbuf *cmdbuf;
+ struct crypto_engine *engine;
+ struct host1x_syncpt *syncpt;
+ struct device *dev;
+ struct clk *clk;
+ unsigned int opcode_addr;
+ unsigned int stream_id;
+ unsigned int syncpt_id;
+ void __iomem *base;
+ u32 owner;
+};
+
+struct tegra_se_cmdbuf {
+ dma_addr_t iova;
+ u32 *addr;
+ struct device *dev;
+ struct kref ref;
+ struct host1x_bo bo;
+ ssize_t size;
+ u32 words;
+};
+
+struct tegra_se_datbuf {
+ u8 *buf;
+ dma_addr_t addr;
+ ssize_t size;
+};
+
+static inline int se_algname_to_algid(const char *name)
+{
+ if (!strcmp(name, "cbc(aes)"))
+ return SE_ALG_CBC;
+ else if (!strcmp(name, "ecb(aes)"))
+ return SE_ALG_ECB;
+ else if (!strcmp(name, "ctr(aes)"))
+ return SE_ALG_CTR;
+ else if (!strcmp(name, "xts(aes)"))
+ return SE_ALG_XTS;
+ else if (!strcmp(name, "cmac(aes)"))
+ return SE_ALG_CMAC;
+ else if (!strcmp(name, "gcm(aes)"))
+ return SE_ALG_GCM;
+ else if (!strcmp(name, "ccm(aes)"))
+ return SE_ALG_CBC_MAC;
+
+ else if (!strcmp(name, "sha1"))
+ return SE_ALG_SHA1;
+ else if (!strcmp(name, "sha224"))
+ return SE_ALG_SHA224;
+ else if (!strcmp(name, "sha256"))
+ return SE_ALG_SHA256;
+ else if (!strcmp(name, "sha384"))
+ return SE_ALG_SHA384;
+ else if (!strcmp(name, "sha512"))
+ return SE_ALG_SHA512;
+ else if (!strcmp(name, "sha3-224"))
+ return SE_ALG_SHA3_224;
+ else if (!strcmp(name, "sha3-256"))
+ return SE_ALG_SHA3_256;
+ else if (!strcmp(name, "sha3-384"))
+ return SE_ALG_SHA3_384;
+ else if (!strcmp(name, "sha3-512"))
+ return SE_ALG_SHA3_512;
+ else if (!strcmp(name, "hmac(sha224)"))
+ return SE_ALG_HMAC_SHA224;
+ else if (!strcmp(name, "hmac(sha256)"))
+ return SE_ALG_HMAC_SHA256;
+ else if (!strcmp(name, "hmac(sha384)"))
+ return SE_ALG_HMAC_SHA384;
+ else if (!strcmp(name, "hmac(sha512)"))
+ return SE_ALG_HMAC_SHA512;
+ else
+ return -EINVAL;
+}
+
+/* Functions */
+int tegra_init_aes(struct tegra_se *se);
+int tegra_init_hash(struct tegra_se *se);
+void tegra_deinit_aes(struct tegra_se *se);
+void tegra_deinit_hash(struct tegra_se *se);
+int tegra_key_submit(struct tegra_se *se, const u8 *key,
+ u32 keylen, u32 alg, u32 *keyid);
+void tegra_key_invalidate(struct tegra_se *se, u32 keyid, u32 alg);
+int tegra_se_host1x_submit(struct tegra_se *se, u32 size);
+
+/* HOST1x OPCODES */
+static inline u32 host1x_opcode_setpayload(unsigned int payload)
+{
+ return (9 << 28) | payload;
+}
+
+static inline u32 host1x_opcode_incr_w(unsigned int offset)
+{
+ /* 22-bit offset supported */
+ return (10 << 28) | offset;
+}
+
+static inline u32 host1x_opcode_nonincr_w(unsigned int offset)
+{
+ /* 22-bit offset supported */
+ return (11 << 28) | offset;
+}
+
+static inline u32 host1x_opcode_incr(unsigned int offset, unsigned int count)
+{
+ return (1 << 28) | (offset << 16) | count;
+}
+
+static inline u32 host1x_opcode_nonincr(unsigned int offset, unsigned int count)
+{
+ return (2 << 28) | (offset << 16) | count;
+}
+
+static inline u32 host1x_uclass_incr_syncpt_cond_f(u32 v)
+{
+ return (v & 0xff) << 10;
+}
+
+static inline u32 host1x_uclass_incr_syncpt_indx_f(u32 v)
+{
+ return (v & 0x3ff) << 0;
+}
+
+static inline u32 host1x_uclass_wait_syncpt_r(void)
+{
+ return 0x8;
+}
+
+static inline u32 host1x_uclass_incr_syncpt_r(void)
+{
+ return 0x0;
+}
+
+#define se_host1x_opcode_incr_w(x) host1x_opcode_incr_w((x) / 4)
+#define se_host1x_opcode_nonincr_w(x) host1x_opcode_nonincr_w((x) / 4)
+#define se_host1x_opcode_incr(x, y) host1x_opcode_incr((x) / 4, y)
+#define se_host1x_opcode_nonincr(x, y) host1x_opcode_nonincr((x) / 4, y)
+
+#endif /*_TEGRA_SE_H*/
diff --git a/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c b/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c
index de53eddf6796..cb92b7fa99c6 100644
--- a/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c
+++ b/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c
@@ -225,11 +225,11 @@ static int __virtio_crypto_akcipher_do_req(struct virtio_crypto_akcipher_request
struct virtio_crypto *vcrypto = ctx->vcrypto;
struct virtio_crypto_op_data_req *req_data = vc_req->req_data;
struct scatterlist *sgs[4], outhdr_sg, inhdr_sg, srcdata_sg, dstdata_sg;
- void *src_buf = NULL, *dst_buf = NULL;
+ void *src_buf, *dst_buf = NULL;
unsigned int num_out = 0, num_in = 0;
int node = dev_to_node(&vcrypto->vdev->dev);
unsigned long flags;
- int ret = -ENOMEM;
+ int ret;
bool verify = vc_akcipher_req->opcode == VIRTIO_CRYPTO_AKCIPHER_VERIFY;
unsigned int src_len = verify ? req->src_len + req->dst_len : req->src_len;
@@ -240,7 +240,7 @@ static int __virtio_crypto_akcipher_do_req(struct virtio_crypto_akcipher_request
/* src data */
src_buf = kcalloc_node(src_len, 1, GFP_KERNEL, node);
if (!src_buf)
- goto err;
+ return -ENOMEM;
if (verify) {
/* for verify operation, both src and dst data work as OUT direction */
@@ -255,7 +255,7 @@ static int __virtio_crypto_akcipher_do_req(struct virtio_crypto_akcipher_request
/* dst data */
dst_buf = kcalloc_node(req->dst_len, 1, GFP_KERNEL, node);
if (!dst_buf)
- goto err;
+ goto free_src;
sg_init_one(&dstdata_sg, dst_buf, req->dst_len);
sgs[num_out + num_in++] = &dstdata_sg;
@@ -278,9 +278,9 @@ static int __virtio_crypto_akcipher_do_req(struct virtio_crypto_akcipher_request
return 0;
err:
- kfree(src_buf);
kfree(dst_buf);
-
+free_src:
+ kfree(src_buf);
return -ENOMEM;
}
diff --git a/drivers/crypto/virtio/virtio_crypto_core.c b/drivers/crypto/virtio/virtio_crypto_core.c
index b909c6a2bf1c..30cd040aa03b 100644
--- a/drivers/crypto/virtio/virtio_crypto_core.c
+++ b/drivers/crypto/virtio/virtio_crypto_core.c
@@ -42,8 +42,6 @@ static void virtcrypto_ctrlq_callback(struct virtqueue *vq)
virtio_crypto_ctrlq_callback(vc_ctrl_req);
spin_lock_irqsave(&vcrypto->ctrl_lock, flags);
}
- if (unlikely(virtqueue_is_broken(vq)))
- break;
} while (!virtqueue_enable_cb(vq));
spin_unlock_irqrestore(&vcrypto->ctrl_lock, flags);
}
@@ -583,7 +581,6 @@ static const struct virtio_device_id id_table[] = {
static struct virtio_driver virtio_crypto_driver = {
.driver.name = KBUILD_MODNAME,
- .driver.owner = THIS_MODULE,
.feature_table = features,
.feature_table_size = ARRAY_SIZE(features),
.id_table = id_table,
diff --git a/drivers/crypto/vmx/.gitignore b/drivers/crypto/vmx/.gitignore
deleted file mode 100644
index 7aa71d83f739..000000000000
--- a/drivers/crypto/vmx/.gitignore
+++ /dev/null
@@ -1,3 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-aesp8-ppc.S
-ghashp8-ppc.S
diff --git a/drivers/crypto/vmx/Kconfig b/drivers/crypto/vmx/Kconfig
deleted file mode 100644
index b2c28b87f14b..000000000000
--- a/drivers/crypto/vmx/Kconfig
+++ /dev/null
@@ -1,14 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-config CRYPTO_DEV_VMX_ENCRYPT
- tristate "Encryption acceleration support on P8 CPU"
- depends on CRYPTO_DEV_VMX
- select CRYPTO_AES
- select CRYPTO_CBC
- select CRYPTO_CTR
- select CRYPTO_GHASH
- select CRYPTO_XTS
- default m
- help
- Support for VMX cryptographic acceleration instructions on Power8 CPU.
- This module supports acceleration for AES and GHASH in hardware. If you
- choose 'M' here, this module will be called vmx-crypto.
diff --git a/drivers/crypto/vmx/Makefile b/drivers/crypto/vmx/Makefile
deleted file mode 100644
index 7257b8c44626..000000000000
--- a/drivers/crypto/vmx/Makefile
+++ /dev/null
@@ -1,23 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-obj-$(CONFIG_CRYPTO_DEV_VMX_ENCRYPT) += vmx-crypto.o
-vmx-crypto-objs := vmx.o aesp8-ppc.o ghashp8-ppc.o aes.o aes_cbc.o aes_ctr.o aes_xts.o ghash.o
-
-ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y)
-override flavour := linux-ppc64le
-else
-ifdef CONFIG_PPC64_ELF_ABI_V2
-override flavour := linux-ppc64-elfv2
-else
-override flavour := linux-ppc64
-endif
-endif
-
-quiet_cmd_perl = PERL $@
- cmd_perl = $(PERL) $< $(flavour) > $@
-
-targets += aesp8-ppc.S ghashp8-ppc.S
-
-$(obj)/aesp8-ppc.S $(obj)/ghashp8-ppc.S: $(obj)/%.S: $(src)/%.pl FORCE
- $(call if_changed,perl)
-
-OBJECT_FILES_NON_STANDARD_aesp8-ppc.o := y
diff --git a/drivers/crypto/vmx/aes.c b/drivers/crypto/vmx/aes.c
deleted file mode 100644
index ec06189fbf99..000000000000
--- a/drivers/crypto/vmx/aes.c
+++ /dev/null
@@ -1,134 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * AES routines supporting VMX instructions on the Power 8
- *
- * Copyright (C) 2015 International Business Machines Inc.
- *
- * Author: Marcelo Henrique Cerri <mhcerri@br.ibm.com>
- */
-
-#include <linux/types.h>
-#include <linux/err.h>
-#include <linux/crypto.h>
-#include <linux/delay.h>
-#include <asm/simd.h>
-#include <asm/switch_to.h>
-#include <crypto/aes.h>
-#include <crypto/internal/cipher.h>
-#include <crypto/internal/simd.h>
-
-#include "aesp8-ppc.h"
-
-struct p8_aes_ctx {
- struct crypto_cipher *fallback;
- struct aes_key enc_key;
- struct aes_key dec_key;
-};
-
-static int p8_aes_init(struct crypto_tfm *tfm)
-{
- const char *alg = crypto_tfm_alg_name(tfm);
- struct crypto_cipher *fallback;
- struct p8_aes_ctx *ctx = crypto_tfm_ctx(tfm);
-
- fallback = crypto_alloc_cipher(alg, 0, CRYPTO_ALG_NEED_FALLBACK);
- if (IS_ERR(fallback)) {
- printk(KERN_ERR
- "Failed to allocate transformation for '%s': %ld\n",
- alg, PTR_ERR(fallback));
- return PTR_ERR(fallback);
- }
-
- crypto_cipher_set_flags(fallback,
- crypto_cipher_get_flags((struct
- crypto_cipher *)
- tfm));
- ctx->fallback = fallback;
-
- return 0;
-}
-
-static void p8_aes_exit(struct crypto_tfm *tfm)
-{
- struct p8_aes_ctx *ctx = crypto_tfm_ctx(tfm);
-
- if (ctx->fallback) {
- crypto_free_cipher(ctx->fallback);
- ctx->fallback = NULL;
- }
-}
-
-static int p8_aes_setkey(struct crypto_tfm *tfm, const u8 *key,
- unsigned int keylen)
-{
- int ret;
- struct p8_aes_ctx *ctx = crypto_tfm_ctx(tfm);
-
- preempt_disable();
- pagefault_disable();
- enable_kernel_vsx();
- ret = aes_p8_set_encrypt_key(key, keylen * 8, &ctx->enc_key);
- ret |= aes_p8_set_decrypt_key(key, keylen * 8, &ctx->dec_key);
- disable_kernel_vsx();
- pagefault_enable();
- preempt_enable();
-
- ret |= crypto_cipher_setkey(ctx->fallback, key, keylen);
-
- return ret ? -EINVAL : 0;
-}
-
-static void p8_aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
- struct p8_aes_ctx *ctx = crypto_tfm_ctx(tfm);
-
- if (!crypto_simd_usable()) {
- crypto_cipher_encrypt_one(ctx->fallback, dst, src);
- } else {
- preempt_disable();
- pagefault_disable();
- enable_kernel_vsx();
- aes_p8_encrypt(src, dst, &ctx->enc_key);
- disable_kernel_vsx();
- pagefault_enable();
- preempt_enable();
- }
-}
-
-static void p8_aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
- struct p8_aes_ctx *ctx = crypto_tfm_ctx(tfm);
-
- if (!crypto_simd_usable()) {
- crypto_cipher_decrypt_one(ctx->fallback, dst, src);
- } else {
- preempt_disable();
- pagefault_disable();
- enable_kernel_vsx();
- aes_p8_decrypt(src, dst, &ctx->dec_key);
- disable_kernel_vsx();
- pagefault_enable();
- preempt_enable();
- }
-}
-
-struct crypto_alg p8_aes_alg = {
- .cra_name = "aes",
- .cra_driver_name = "p8_aes",
- .cra_module = THIS_MODULE,
- .cra_priority = 1000,
- .cra_type = NULL,
- .cra_flags = CRYPTO_ALG_TYPE_CIPHER | CRYPTO_ALG_NEED_FALLBACK,
- .cra_alignmask = 0,
- .cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct p8_aes_ctx),
- .cra_init = p8_aes_init,
- .cra_exit = p8_aes_exit,
- .cra_cipher = {
- .cia_min_keysize = AES_MIN_KEY_SIZE,
- .cia_max_keysize = AES_MAX_KEY_SIZE,
- .cia_setkey = p8_aes_setkey,
- .cia_encrypt = p8_aes_encrypt,
- .cia_decrypt = p8_aes_decrypt,
- },
-};
diff --git a/drivers/crypto/vmx/aes_cbc.c b/drivers/crypto/vmx/aes_cbc.c
deleted file mode 100644
index ed0debc7acb5..000000000000
--- a/drivers/crypto/vmx/aes_cbc.c
+++ /dev/null
@@ -1,133 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * AES CBC routines supporting VMX instructions on the Power 8
- *
- * Copyright (C) 2015 International Business Machines Inc.
- *
- * Author: Marcelo Henrique Cerri <mhcerri@br.ibm.com>
- */
-
-#include <asm/simd.h>
-#include <asm/switch_to.h>
-#include <crypto/aes.h>
-#include <crypto/internal/simd.h>
-#include <crypto/internal/skcipher.h>
-
-#include "aesp8-ppc.h"
-
-struct p8_aes_cbc_ctx {
- struct crypto_skcipher *fallback;
- struct aes_key enc_key;
- struct aes_key dec_key;
-};
-
-static int p8_aes_cbc_init(struct crypto_skcipher *tfm)
-{
- struct p8_aes_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct crypto_skcipher *fallback;
-
- fallback = crypto_alloc_skcipher("cbc(aes)", 0,
- CRYPTO_ALG_NEED_FALLBACK |
- CRYPTO_ALG_ASYNC);
- if (IS_ERR(fallback)) {
- pr_err("Failed to allocate cbc(aes) fallback: %ld\n",
- PTR_ERR(fallback));
- return PTR_ERR(fallback);
- }
-
- crypto_skcipher_set_reqsize(tfm, sizeof(struct skcipher_request) +
- crypto_skcipher_reqsize(fallback));
- ctx->fallback = fallback;
- return 0;
-}
-
-static void p8_aes_cbc_exit(struct crypto_skcipher *tfm)
-{
- struct p8_aes_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
-
- crypto_free_skcipher(ctx->fallback);
-}
-
-static int p8_aes_cbc_setkey(struct crypto_skcipher *tfm, const u8 *key,
- unsigned int keylen)
-{
- struct p8_aes_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
- int ret;
-
- preempt_disable();
- pagefault_disable();
- enable_kernel_vsx();
- ret = aes_p8_set_encrypt_key(key, keylen * 8, &ctx->enc_key);
- ret |= aes_p8_set_decrypt_key(key, keylen * 8, &ctx->dec_key);
- disable_kernel_vsx();
- pagefault_enable();
- preempt_enable();
-
- ret |= crypto_skcipher_setkey(ctx->fallback, key, keylen);
-
- return ret ? -EINVAL : 0;
-}
-
-static int p8_aes_cbc_crypt(struct skcipher_request *req, int enc)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- const struct p8_aes_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct skcipher_walk walk;
- unsigned int nbytes;
- int ret;
-
- if (!crypto_simd_usable()) {
- struct skcipher_request *subreq = skcipher_request_ctx(req);
-
- *subreq = *req;
- skcipher_request_set_tfm(subreq, ctx->fallback);
- return enc ? crypto_skcipher_encrypt(subreq) :
- crypto_skcipher_decrypt(subreq);
- }
-
- ret = skcipher_walk_virt(&walk, req, false);
- while ((nbytes = walk.nbytes) != 0) {
- preempt_disable();
- pagefault_disable();
- enable_kernel_vsx();
- aes_p8_cbc_encrypt(walk.src.virt.addr,
- walk.dst.virt.addr,
- round_down(nbytes, AES_BLOCK_SIZE),
- enc ? &ctx->enc_key : &ctx->dec_key,
- walk.iv, enc);
- disable_kernel_vsx();
- pagefault_enable();
- preempt_enable();
-
- ret = skcipher_walk_done(&walk, nbytes % AES_BLOCK_SIZE);
- }
- return ret;
-}
-
-static int p8_aes_cbc_encrypt(struct skcipher_request *req)
-{
- return p8_aes_cbc_crypt(req, 1);
-}
-
-static int p8_aes_cbc_decrypt(struct skcipher_request *req)
-{
- return p8_aes_cbc_crypt(req, 0);
-}
-
-struct skcipher_alg p8_aes_cbc_alg = {
- .base.cra_name = "cbc(aes)",
- .base.cra_driver_name = "p8_aes_cbc",
- .base.cra_module = THIS_MODULE,
- .base.cra_priority = 2000,
- .base.cra_flags = CRYPTO_ALG_NEED_FALLBACK,
- .base.cra_blocksize = AES_BLOCK_SIZE,
- .base.cra_ctxsize = sizeof(struct p8_aes_cbc_ctx),
- .setkey = p8_aes_cbc_setkey,
- .encrypt = p8_aes_cbc_encrypt,
- .decrypt = p8_aes_cbc_decrypt,
- .init = p8_aes_cbc_init,
- .exit = p8_aes_cbc_exit,
- .min_keysize = AES_MIN_KEY_SIZE,
- .max_keysize = AES_MAX_KEY_SIZE,
- .ivsize = AES_BLOCK_SIZE,
-};
diff --git a/drivers/crypto/vmx/aes_ctr.c b/drivers/crypto/vmx/aes_ctr.c
deleted file mode 100644
index 9a3da8cd62f3..000000000000
--- a/drivers/crypto/vmx/aes_ctr.c
+++ /dev/null
@@ -1,149 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * AES CTR routines supporting VMX instructions on the Power 8
- *
- * Copyright (C) 2015 International Business Machines Inc.
- *
- * Author: Marcelo Henrique Cerri <mhcerri@br.ibm.com>
- */
-
-#include <asm/simd.h>
-#include <asm/switch_to.h>
-#include <crypto/aes.h>
-#include <crypto/internal/simd.h>
-#include <crypto/internal/skcipher.h>
-
-#include "aesp8-ppc.h"
-
-struct p8_aes_ctr_ctx {
- struct crypto_skcipher *fallback;
- struct aes_key enc_key;
-};
-
-static int p8_aes_ctr_init(struct crypto_skcipher *tfm)
-{
- struct p8_aes_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct crypto_skcipher *fallback;
-
- fallback = crypto_alloc_skcipher("ctr(aes)", 0,
- CRYPTO_ALG_NEED_FALLBACK |
- CRYPTO_ALG_ASYNC);
- if (IS_ERR(fallback)) {
- pr_err("Failed to allocate ctr(aes) fallback: %ld\n",
- PTR_ERR(fallback));
- return PTR_ERR(fallback);
- }
-
- crypto_skcipher_set_reqsize(tfm, sizeof(struct skcipher_request) +
- crypto_skcipher_reqsize(fallback));
- ctx->fallback = fallback;
- return 0;
-}
-
-static void p8_aes_ctr_exit(struct crypto_skcipher *tfm)
-{
- struct p8_aes_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
-
- crypto_free_skcipher(ctx->fallback);
-}
-
-static int p8_aes_ctr_setkey(struct crypto_skcipher *tfm, const u8 *key,
- unsigned int keylen)
-{
- struct p8_aes_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
- int ret;
-
- preempt_disable();
- pagefault_disable();
- enable_kernel_vsx();
- ret = aes_p8_set_encrypt_key(key, keylen * 8, &ctx->enc_key);
- disable_kernel_vsx();
- pagefault_enable();
- preempt_enable();
-
- ret |= crypto_skcipher_setkey(ctx->fallback, key, keylen);
-
- return ret ? -EINVAL : 0;
-}
-
-static void p8_aes_ctr_final(const struct p8_aes_ctr_ctx *ctx,
- struct skcipher_walk *walk)
-{
- u8 *ctrblk = walk->iv;
- u8 keystream[AES_BLOCK_SIZE];
- u8 *src = walk->src.virt.addr;
- u8 *dst = walk->dst.virt.addr;
- unsigned int nbytes = walk->nbytes;
-
- preempt_disable();
- pagefault_disable();
- enable_kernel_vsx();
- aes_p8_encrypt(ctrblk, keystream, &ctx->enc_key);
- disable_kernel_vsx();
- pagefault_enable();
- preempt_enable();
-
- crypto_xor_cpy(dst, keystream, src, nbytes);
- crypto_inc(ctrblk, AES_BLOCK_SIZE);
-}
-
-static int p8_aes_ctr_crypt(struct skcipher_request *req)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- const struct p8_aes_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct skcipher_walk walk;
- unsigned int nbytes;
- int ret;
-
- if (!crypto_simd_usable()) {
- struct skcipher_request *subreq = skcipher_request_ctx(req);
-
- *subreq = *req;
- skcipher_request_set_tfm(subreq, ctx->fallback);
- return crypto_skcipher_encrypt(subreq);
- }
-
- ret = skcipher_walk_virt(&walk, req, false);
- while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
- preempt_disable();
- pagefault_disable();
- enable_kernel_vsx();
- aes_p8_ctr32_encrypt_blocks(walk.src.virt.addr,
- walk.dst.virt.addr,
- nbytes / AES_BLOCK_SIZE,
- &ctx->enc_key, walk.iv);
- disable_kernel_vsx();
- pagefault_enable();
- preempt_enable();
-
- do {
- crypto_inc(walk.iv, AES_BLOCK_SIZE);
- } while ((nbytes -= AES_BLOCK_SIZE) >= AES_BLOCK_SIZE);
-
- ret = skcipher_walk_done(&walk, nbytes);
- }
- if (nbytes) {
- p8_aes_ctr_final(ctx, &walk);
- ret = skcipher_walk_done(&walk, 0);
- }
- return ret;
-}
-
-struct skcipher_alg p8_aes_ctr_alg = {
- .base.cra_name = "ctr(aes)",
- .base.cra_driver_name = "p8_aes_ctr",
- .base.cra_module = THIS_MODULE,
- .base.cra_priority = 2000,
- .base.cra_flags = CRYPTO_ALG_NEED_FALLBACK,
- .base.cra_blocksize = 1,
- .base.cra_ctxsize = sizeof(struct p8_aes_ctr_ctx),
- .setkey = p8_aes_ctr_setkey,
- .encrypt = p8_aes_ctr_crypt,
- .decrypt = p8_aes_ctr_crypt,
- .init = p8_aes_ctr_init,
- .exit = p8_aes_ctr_exit,
- .min_keysize = AES_MIN_KEY_SIZE,
- .max_keysize = AES_MAX_KEY_SIZE,
- .ivsize = AES_BLOCK_SIZE,
- .chunksize = AES_BLOCK_SIZE,
-};
diff --git a/drivers/crypto/vmx/aes_xts.c b/drivers/crypto/vmx/aes_xts.c
deleted file mode 100644
index dabbccb41550..000000000000
--- a/drivers/crypto/vmx/aes_xts.c
+++ /dev/null
@@ -1,162 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * AES XTS routines supporting VMX In-core instructions on Power 8
- *
- * Copyright (C) 2015 International Business Machines Inc.
- *
- * Author: Leonidas S. Barbosa <leosilva@linux.vnet.ibm.com>
- */
-
-#include <asm/simd.h>
-#include <asm/switch_to.h>
-#include <crypto/aes.h>
-#include <crypto/internal/simd.h>
-#include <crypto/internal/skcipher.h>
-#include <crypto/xts.h>
-
-#include "aesp8-ppc.h"
-
-struct p8_aes_xts_ctx {
- struct crypto_skcipher *fallback;
- struct aes_key enc_key;
- struct aes_key dec_key;
- struct aes_key tweak_key;
-};
-
-static int p8_aes_xts_init(struct crypto_skcipher *tfm)
-{
- struct p8_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct crypto_skcipher *fallback;
-
- fallback = crypto_alloc_skcipher("xts(aes)", 0,
- CRYPTO_ALG_NEED_FALLBACK |
- CRYPTO_ALG_ASYNC);
- if (IS_ERR(fallback)) {
- pr_err("Failed to allocate xts(aes) fallback: %ld\n",
- PTR_ERR(fallback));
- return PTR_ERR(fallback);
- }
-
- crypto_skcipher_set_reqsize(tfm, sizeof(struct skcipher_request) +
- crypto_skcipher_reqsize(fallback));
- ctx->fallback = fallback;
- return 0;
-}
-
-static void p8_aes_xts_exit(struct crypto_skcipher *tfm)
-{
- struct p8_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
-
- crypto_free_skcipher(ctx->fallback);
-}
-
-static int p8_aes_xts_setkey(struct crypto_skcipher *tfm, const u8 *key,
- unsigned int keylen)
-{
- struct p8_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
- int ret;
-
- ret = xts_verify_key(tfm, key, keylen);
- if (ret)
- return ret;
-
- preempt_disable();
- pagefault_disable();
- enable_kernel_vsx();
- ret = aes_p8_set_encrypt_key(key + keylen/2, (keylen/2) * 8, &ctx->tweak_key);
- ret |= aes_p8_set_encrypt_key(key, (keylen/2) * 8, &ctx->enc_key);
- ret |= aes_p8_set_decrypt_key(key, (keylen/2) * 8, &ctx->dec_key);
- disable_kernel_vsx();
- pagefault_enable();
- preempt_enable();
-
- ret |= crypto_skcipher_setkey(ctx->fallback, key, keylen);
-
- return ret ? -EINVAL : 0;
-}
-
-static int p8_aes_xts_crypt(struct skcipher_request *req, int enc)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- const struct p8_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct skcipher_walk walk;
- unsigned int nbytes;
- u8 tweak[AES_BLOCK_SIZE];
- int ret;
-
- if (req->cryptlen < AES_BLOCK_SIZE)
- return -EINVAL;
-
- if (!crypto_simd_usable() || (req->cryptlen % XTS_BLOCK_SIZE) != 0) {
- struct skcipher_request *subreq = skcipher_request_ctx(req);
-
- *subreq = *req;
- skcipher_request_set_tfm(subreq, ctx->fallback);
- return enc ? crypto_skcipher_encrypt(subreq) :
- crypto_skcipher_decrypt(subreq);
- }
-
- ret = skcipher_walk_virt(&walk, req, false);
- if (ret)
- return ret;
-
- preempt_disable();
- pagefault_disable();
- enable_kernel_vsx();
-
- aes_p8_encrypt(walk.iv, tweak, &ctx->tweak_key);
-
- disable_kernel_vsx();
- pagefault_enable();
- preempt_enable();
-
- while ((nbytes = walk.nbytes) != 0) {
- preempt_disable();
- pagefault_disable();
- enable_kernel_vsx();
- if (enc)
- aes_p8_xts_encrypt(walk.src.virt.addr,
- walk.dst.virt.addr,
- round_down(nbytes, AES_BLOCK_SIZE),
- &ctx->enc_key, NULL, tweak);
- else
- aes_p8_xts_decrypt(walk.src.virt.addr,
- walk.dst.virt.addr,
- round_down(nbytes, AES_BLOCK_SIZE),
- &ctx->dec_key, NULL, tweak);
- disable_kernel_vsx();
- pagefault_enable();
- preempt_enable();
-
- ret = skcipher_walk_done(&walk, nbytes % AES_BLOCK_SIZE);
- }
- return ret;
-}
-
-static int p8_aes_xts_encrypt(struct skcipher_request *req)
-{
- return p8_aes_xts_crypt(req, 1);
-}
-
-static int p8_aes_xts_decrypt(struct skcipher_request *req)
-{
- return p8_aes_xts_crypt(req, 0);
-}
-
-struct skcipher_alg p8_aes_xts_alg = {
- .base.cra_name = "xts(aes)",
- .base.cra_driver_name = "p8_aes_xts",
- .base.cra_module = THIS_MODULE,
- .base.cra_priority = 2000,
- .base.cra_flags = CRYPTO_ALG_NEED_FALLBACK,
- .base.cra_blocksize = AES_BLOCK_SIZE,
- .base.cra_ctxsize = sizeof(struct p8_aes_xts_ctx),
- .setkey = p8_aes_xts_setkey,
- .encrypt = p8_aes_xts_encrypt,
- .decrypt = p8_aes_xts_decrypt,
- .init = p8_aes_xts_init,
- .exit = p8_aes_xts_exit,
- .min_keysize = 2 * AES_MIN_KEY_SIZE,
- .max_keysize = 2 * AES_MAX_KEY_SIZE,
- .ivsize = AES_BLOCK_SIZE,
-};
diff --git a/drivers/crypto/vmx/aesp8-ppc.h b/drivers/crypto/vmx/aesp8-ppc.h
deleted file mode 100644
index 5764d4438388..000000000000
--- a/drivers/crypto/vmx/aesp8-ppc.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/types.h>
-#include <crypto/aes.h>
-
-struct aes_key {
- u8 key[AES_MAX_KEYLENGTH];
- int rounds;
-};
-
-extern struct shash_alg p8_ghash_alg;
-extern struct crypto_alg p8_aes_alg;
-extern struct skcipher_alg p8_aes_cbc_alg;
-extern struct skcipher_alg p8_aes_ctr_alg;
-extern struct skcipher_alg p8_aes_xts_alg;
-
-int aes_p8_set_encrypt_key(const u8 *userKey, const int bits,
- struct aes_key *key);
-int aes_p8_set_decrypt_key(const u8 *userKey, const int bits,
- struct aes_key *key);
-void aes_p8_encrypt(const u8 *in, u8 *out, const struct aes_key *key);
-void aes_p8_decrypt(const u8 *in, u8 *out, const struct aes_key *key);
-void aes_p8_cbc_encrypt(const u8 *in, u8 *out, size_t len,
- const struct aes_key *key, u8 *iv, const int enc);
-void aes_p8_ctr32_encrypt_blocks(const u8 *in, u8 *out,
- size_t len, const struct aes_key *key,
- const u8 *iv);
-void aes_p8_xts_encrypt(const u8 *in, u8 *out, size_t len,
- const struct aes_key *key1, const struct aes_key *key2, u8 *iv);
-void aes_p8_xts_decrypt(const u8 *in, u8 *out, size_t len,
- const struct aes_key *key1, const struct aes_key *key2, u8 *iv);
diff --git a/drivers/crypto/vmx/aesp8-ppc.pl b/drivers/crypto/vmx/aesp8-ppc.pl
deleted file mode 100644
index f729589d792e..000000000000
--- a/drivers/crypto/vmx/aesp8-ppc.pl
+++ /dev/null
@@ -1,3889 +0,0 @@
-#! /usr/bin/env perl
-# SPDX-License-Identifier: GPL-2.0
-
-# This code is taken from CRYPTOGAMs[1] and is included here using the option
-# in the license to distribute the code under the GPL. Therefore this program
-# is free software; you can redistribute it and/or modify it under the terms of
-# the GNU General Public License version 2 as published by the Free Software
-# Foundation.
-#
-# [1] https://www.openssl.org/~appro/cryptogams/
-
-# Copyright (c) 2006-2017, CRYPTOGAMS by <appro@openssl.org>
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#
-# * Redistributions of source code must retain copyright notices,
-# this list of conditions and the following disclaimer.
-#
-# * Redistributions in binary form must reproduce the above
-# copyright notice, this list of conditions and the following
-# disclaimer in the documentation and/or other materials
-# provided with the distribution.
-#
-# * Neither the name of the CRYPTOGAMS nor the names of its
-# copyright holder and contributors may be used to endorse or
-# promote products derived from this software without specific
-# prior written permission.
-#
-# ALTERNATIVELY, provided that this notice is retained in full, this
-# product may be distributed under the terms of the GNU General Public
-# License (GPL), in which case the provisions of the GPL apply INSTEAD OF
-# those given above.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
-# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-# ====================================================================
-# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
-# project. The module is, however, dual licensed under OpenSSL and
-# CRYPTOGAMS licenses depending on where you obtain it. For further
-# details see https://www.openssl.org/~appro/cryptogams/.
-# ====================================================================
-#
-# This module implements support for AES instructions as per PowerISA
-# specification version 2.07, first implemented by POWER8 processor.
-# The module is endian-agnostic in sense that it supports both big-
-# and little-endian cases. Data alignment in parallelizable modes is
-# handled with VSX loads and stores, which implies MSR.VSX flag being
-# set. It should also be noted that ISA specification doesn't prohibit
-# alignment exceptions for these instructions on page boundaries.
-# Initially alignment was handled in pure AltiVec/VMX way [when data
-# is aligned programmatically, which in turn guarantees exception-
-# free execution], but it turned to hamper performance when vcipher
-# instructions are interleaved. It's reckoned that eventual
-# misalignment penalties at page boundaries are in average lower
-# than additional overhead in pure AltiVec approach.
-#
-# May 2016
-#
-# Add XTS subroutine, 9x on little- and 12x improvement on big-endian
-# systems were measured.
-#
-######################################################################
-# Current large-block performance in cycles per byte processed with
-# 128-bit key (less is better).
-#
-# CBC en-/decrypt CTR XTS
-# POWER8[le] 3.96/0.72 0.74 1.1
-# POWER8[be] 3.75/0.65 0.66 1.0
-
-$flavour = shift;
-
-if ($flavour =~ /64/) {
- $SIZE_T =8;
- $LRSAVE =2*$SIZE_T;
- $STU ="stdu";
- $POP ="ld";
- $PUSH ="std";
- $UCMP ="cmpld";
- $SHL ="sldi";
-} elsif ($flavour =~ /32/) {
- $SIZE_T =4;
- $LRSAVE =$SIZE_T;
- $STU ="stwu";
- $POP ="lwz";
- $PUSH ="stw";
- $UCMP ="cmplw";
- $SHL ="slwi";
-} else { die "nonsense $flavour"; }
-
-$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
-
-$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
-( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
-( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
-die "can't locate ppc-xlate.pl";
-
-open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
-
-$FRAME=8*$SIZE_T;
-$prefix="aes_p8";
-
-$sp="r1";
-$vrsave="r12";
-
-#########################################################################
-{{{ # Key setup procedures #
-my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
-my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
-my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
-
-$code.=<<___;
-.machine "any"
-
-.text
-
-.align 7
-rcon:
-.long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev
-.long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev
-.long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev
-.long 0,0,0,0 ?asis
-.long 0x0f102132, 0x43546576, 0x8798a9ba, 0xcbdcedfe
-Lconsts:
- mflr r0
- bcl 20,31,\$+4
- mflr $ptr #vvvvv "distance between . and rcon
- addi $ptr,$ptr,-0x58
- mtlr r0
- blr
- .long 0
- .byte 0,12,0x14,0,0,0,0,0
-.asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
-
-.globl .${prefix}_set_encrypt_key
-Lset_encrypt_key:
- mflr r11
- $PUSH r11,$LRSAVE($sp)
-
- li $ptr,-1
- ${UCMP}i $inp,0
- beq- Lenc_key_abort # if ($inp==0) return -1;
- ${UCMP}i $out,0
- beq- Lenc_key_abort # if ($out==0) return -1;
- li $ptr,-2
- cmpwi $bits,128
- blt- Lenc_key_abort
- cmpwi $bits,256
- bgt- Lenc_key_abort
- andi. r0,$bits,0x3f
- bne- Lenc_key_abort
-
- lis r0,0xfff0
- mfspr $vrsave,256
- mtspr 256,r0
-
- bl Lconsts
- mtlr r11
-
- neg r9,$inp
- lvx $in0,0,$inp
- addi $inp,$inp,15 # 15 is not typo
- lvsr $key,0,r9 # borrow $key
- li r8,0x20
- cmpwi $bits,192
- lvx $in1,0,$inp
- le?vspltisb $mask,0x0f # borrow $mask
- lvx $rcon,0,$ptr
- le?vxor $key,$key,$mask # adjust for byte swap
- lvx $mask,r8,$ptr
- addi $ptr,$ptr,0x10
- vperm $in0,$in0,$in1,$key # align [and byte swap in LE]
- li $cnt,8
- vxor $zero,$zero,$zero
- mtctr $cnt
-
- ?lvsr $outperm,0,$out
- vspltisb $outmask,-1
- lvx $outhead,0,$out
- ?vperm $outmask,$zero,$outmask,$outperm
-
- blt Loop128
- addi $inp,$inp,8
- beq L192
- addi $inp,$inp,8
- b L256
-
-.align 4
-Loop128:
- vperm $key,$in0,$in0,$mask # rotate-n-splat
- vsldoi $tmp,$zero,$in0,12 # >>32
- vperm $outtail,$in0,$in0,$outperm # rotate
- vsel $stage,$outhead,$outtail,$outmask
- vmr $outhead,$outtail
- vcipherlast $key,$key,$rcon
- stvx $stage,0,$out
- addi $out,$out,16
-
- vxor $in0,$in0,$tmp
- vsldoi $tmp,$zero,$tmp,12 # >>32
- vxor $in0,$in0,$tmp
- vsldoi $tmp,$zero,$tmp,12 # >>32
- vxor $in0,$in0,$tmp
- vadduwm $rcon,$rcon,$rcon
- vxor $in0,$in0,$key
- bdnz Loop128
-
- lvx $rcon,0,$ptr # last two round keys
-
- vperm $key,$in0,$in0,$mask # rotate-n-splat
- vsldoi $tmp,$zero,$in0,12 # >>32
- vperm $outtail,$in0,$in0,$outperm # rotate
- vsel $stage,$outhead,$outtail,$outmask
- vmr $outhead,$outtail
- vcipherlast $key,$key,$rcon
- stvx $stage,0,$out
- addi $out,$out,16
-
- vxor $in0,$in0,$tmp
- vsldoi $tmp,$zero,$tmp,12 # >>32
- vxor $in0,$in0,$tmp
- vsldoi $tmp,$zero,$tmp,12 # >>32
- vxor $in0,$in0,$tmp
- vadduwm $rcon,$rcon,$rcon
- vxor $in0,$in0,$key
-
- vperm $key,$in0,$in0,$mask # rotate-n-splat
- vsldoi $tmp,$zero,$in0,12 # >>32
- vperm $outtail,$in0,$in0,$outperm # rotate
- vsel $stage,$outhead,$outtail,$outmask
- vmr $outhead,$outtail
- vcipherlast $key,$key,$rcon
- stvx $stage,0,$out
- addi $out,$out,16
-
- vxor $in0,$in0,$tmp
- vsldoi $tmp,$zero,$tmp,12 # >>32
- vxor $in0,$in0,$tmp
- vsldoi $tmp,$zero,$tmp,12 # >>32
- vxor $in0,$in0,$tmp
- vxor $in0,$in0,$key
- vperm $outtail,$in0,$in0,$outperm # rotate
- vsel $stage,$outhead,$outtail,$outmask
- vmr $outhead,$outtail
- stvx $stage,0,$out
-
- addi $inp,$out,15 # 15 is not typo
- addi $out,$out,0x50
-
- li $rounds,10
- b Ldone
-
-.align 4
-L192:
- lvx $tmp,0,$inp
- li $cnt,4
- vperm $outtail,$in0,$in0,$outperm # rotate
- vsel $stage,$outhead,$outtail,$outmask
- vmr $outhead,$outtail
- stvx $stage,0,$out
- addi $out,$out,16
- vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
- vspltisb $key,8 # borrow $key
- mtctr $cnt
- vsububm $mask,$mask,$key # adjust the mask
-
-Loop192:
- vperm $key,$in1,$in1,$mask # roate-n-splat
- vsldoi $tmp,$zero,$in0,12 # >>32
- vcipherlast $key,$key,$rcon
-
- vxor $in0,$in0,$tmp
- vsldoi $tmp,$zero,$tmp,12 # >>32
- vxor $in0,$in0,$tmp
- vsldoi $tmp,$zero,$tmp,12 # >>32
- vxor $in0,$in0,$tmp
-
- vsldoi $stage,$zero,$in1,8
- vspltw $tmp,$in0,3
- vxor $tmp,$tmp,$in1
- vsldoi $in1,$zero,$in1,12 # >>32
- vadduwm $rcon,$rcon,$rcon
- vxor $in1,$in1,$tmp
- vxor $in0,$in0,$key
- vxor $in1,$in1,$key
- vsldoi $stage,$stage,$in0,8
-
- vperm $key,$in1,$in1,$mask # rotate-n-splat
- vsldoi $tmp,$zero,$in0,12 # >>32
- vperm $outtail,$stage,$stage,$outperm # rotate
- vsel $stage,$outhead,$outtail,$outmask
- vmr $outhead,$outtail
- vcipherlast $key,$key,$rcon
- stvx $stage,0,$out
- addi $out,$out,16
-
- vsldoi $stage,$in0,$in1,8
- vxor $in0,$in0,$tmp
- vsldoi $tmp,$zero,$tmp,12 # >>32
- vperm $outtail,$stage,$stage,$outperm # rotate
- vsel $stage,$outhead,$outtail,$outmask
- vmr $outhead,$outtail
- vxor $in0,$in0,$tmp
- vsldoi $tmp,$zero,$tmp,12 # >>32
- vxor $in0,$in0,$tmp
- stvx $stage,0,$out
- addi $out,$out,16
-
- vspltw $tmp,$in0,3
- vxor $tmp,$tmp,$in1
- vsldoi $in1,$zero,$in1,12 # >>32
- vadduwm $rcon,$rcon,$rcon
- vxor $in1,$in1,$tmp
- vxor $in0,$in0,$key
- vxor $in1,$in1,$key
- vperm $outtail,$in0,$in0,$outperm # rotate
- vsel $stage,$outhead,$outtail,$outmask
- vmr $outhead,$outtail
- stvx $stage,0,$out
- addi $inp,$out,15 # 15 is not typo
- addi $out,$out,16
- bdnz Loop192
-
- li $rounds,12
- addi $out,$out,0x20
- b Ldone
-
-.align 4
-L256:
- lvx $tmp,0,$inp
- li $cnt,7
- li $rounds,14
- vperm $outtail,$in0,$in0,$outperm # rotate
- vsel $stage,$outhead,$outtail,$outmask
- vmr $outhead,$outtail
- stvx $stage,0,$out
- addi $out,$out,16
- vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
- mtctr $cnt
-
-Loop256:
- vperm $key,$in1,$in1,$mask # rotate-n-splat
- vsldoi $tmp,$zero,$in0,12 # >>32
- vperm $outtail,$in1,$in1,$outperm # rotate
- vsel $stage,$outhead,$outtail,$outmask
- vmr $outhead,$outtail
- vcipherlast $key,$key,$rcon
- stvx $stage,0,$out
- addi $out,$out,16
-
- vxor $in0,$in0,$tmp
- vsldoi $tmp,$zero,$tmp,12 # >>32
- vxor $in0,$in0,$tmp
- vsldoi $tmp,$zero,$tmp,12 # >>32
- vxor $in0,$in0,$tmp
- vadduwm $rcon,$rcon,$rcon
- vxor $in0,$in0,$key
- vperm $outtail,$in0,$in0,$outperm # rotate
- vsel $stage,$outhead,$outtail,$outmask
- vmr $outhead,$outtail
- stvx $stage,0,$out
- addi $inp,$out,15 # 15 is not typo
- addi $out,$out,16
- bdz Ldone
-
- vspltw $key,$in0,3 # just splat
- vsldoi $tmp,$zero,$in1,12 # >>32
- vsbox $key,$key
-
- vxor $in1,$in1,$tmp
- vsldoi $tmp,$zero,$tmp,12 # >>32
- vxor $in1,$in1,$tmp
- vsldoi $tmp,$zero,$tmp,12 # >>32
- vxor $in1,$in1,$tmp
-
- vxor $in1,$in1,$key
- b Loop256
-
-.align 4
-Ldone:
- lvx $in1,0,$inp # redundant in aligned case
- vsel $in1,$outhead,$in1,$outmask
- stvx $in1,0,$inp
- li $ptr,0
- mtspr 256,$vrsave
- stw $rounds,0($out)
-
-Lenc_key_abort:
- mr r3,$ptr
- blr
- .long 0
- .byte 0,12,0x14,1,0,0,3,0
- .long 0
-.size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
-
-.globl .${prefix}_set_decrypt_key
- $STU $sp,-$FRAME($sp)
- mflr r10
- $PUSH r10,$FRAME+$LRSAVE($sp)
- bl Lset_encrypt_key
- mtlr r10
-
- cmpwi r3,0
- bne- Ldec_key_abort
-
- slwi $cnt,$rounds,4
- subi $inp,$out,240 # first round key
- srwi $rounds,$rounds,1
- add $out,$inp,$cnt # last round key
- mtctr $rounds
-
-Ldeckey:
- lwz r0, 0($inp)
- lwz r6, 4($inp)
- lwz r7, 8($inp)
- lwz r8, 12($inp)
- addi $inp,$inp,16
- lwz r9, 0($out)
- lwz r10,4($out)
- lwz r11,8($out)
- lwz r12,12($out)
- stw r0, 0($out)
- stw r6, 4($out)
- stw r7, 8($out)
- stw r8, 12($out)
- subi $out,$out,16
- stw r9, -16($inp)
- stw r10,-12($inp)
- stw r11,-8($inp)
- stw r12,-4($inp)
- bdnz Ldeckey
-
- xor r3,r3,r3 # return value
-Ldec_key_abort:
- addi $sp,$sp,$FRAME
- blr
- .long 0
- .byte 0,12,4,1,0x80,0,3,0
- .long 0
-.size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
-___
-}}}
-#########################################################################
-{{{ # Single block en- and decrypt procedures #
-sub gen_block () {
-my $dir = shift;
-my $n = $dir eq "de" ? "n" : "";
-my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
-
-$code.=<<___;
-.globl .${prefix}_${dir}crypt
- lwz $rounds,240($key)
- lis r0,0xfc00
- mfspr $vrsave,256
- li $idx,15 # 15 is not typo
- mtspr 256,r0
-
- lvx v0,0,$inp
- neg r11,$out
- lvx v1,$idx,$inp
- lvsl v2,0,$inp # inpperm
- le?vspltisb v4,0x0f
- ?lvsl v3,0,r11 # outperm
- le?vxor v2,v2,v4
- li $idx,16
- vperm v0,v0,v1,v2 # align [and byte swap in LE]
- lvx v1,0,$key
- ?lvsl v5,0,$key # keyperm
- srwi $rounds,$rounds,1
- lvx v2,$idx,$key
- addi $idx,$idx,16
- subi $rounds,$rounds,1
- ?vperm v1,v1,v2,v5 # align round key
-
- vxor v0,v0,v1
- lvx v1,$idx,$key
- addi $idx,$idx,16
- mtctr $rounds
-
-Loop_${dir}c:
- ?vperm v2,v2,v1,v5
- v${n}cipher v0,v0,v2
- lvx v2,$idx,$key
- addi $idx,$idx,16
- ?vperm v1,v1,v2,v5
- v${n}cipher v0,v0,v1
- lvx v1,$idx,$key
- addi $idx,$idx,16
- bdnz Loop_${dir}c
-
- ?vperm v2,v2,v1,v5
- v${n}cipher v0,v0,v2
- lvx v2,$idx,$key
- ?vperm v1,v1,v2,v5
- v${n}cipherlast v0,v0,v1
-
- vspltisb v2,-1
- vxor v1,v1,v1
- li $idx,15 # 15 is not typo
- ?vperm v2,v1,v2,v3 # outmask
- le?vxor v3,v3,v4
- lvx v1,0,$out # outhead
- vperm v0,v0,v0,v3 # rotate [and byte swap in LE]
- vsel v1,v1,v0,v2
- lvx v4,$idx,$out
- stvx v1,0,$out
- vsel v0,v0,v4,v2
- stvx v0,$idx,$out
-
- mtspr 256,$vrsave
- blr
- .long 0
- .byte 0,12,0x14,0,0,0,3,0
- .long 0
-.size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
-___
-}
-&gen_block("en");
-&gen_block("de");
-}}}
-#########################################################################
-{{{ # CBC en- and decrypt procedures #
-my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
-my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
-my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
- map("v$_",(4..10));
-$code.=<<___;
-.globl .${prefix}_cbc_encrypt
- ${UCMP}i $len,16
- bltlr-
-
- cmpwi $enc,0 # test direction
- lis r0,0xffe0
- mfspr $vrsave,256
- mtspr 256,r0
-
- li $idx,15
- vxor $rndkey0,$rndkey0,$rndkey0
- le?vspltisb $tmp,0x0f
-
- lvx $ivec,0,$ivp # load [unaligned] iv
- lvsl $inpperm,0,$ivp
- lvx $inptail,$idx,$ivp
- le?vxor $inpperm,$inpperm,$tmp
- vperm $ivec,$ivec,$inptail,$inpperm
-
- neg r11,$inp
- ?lvsl $keyperm,0,$key # prepare for unaligned key
- lwz $rounds,240($key)
-
- lvsr $inpperm,0,r11 # prepare for unaligned load
- lvx $inptail,0,$inp
- addi $inp,$inp,15 # 15 is not typo
- le?vxor $inpperm,$inpperm,$tmp
-
- ?lvsr $outperm,0,$out # prepare for unaligned store
- vspltisb $outmask,-1
- lvx $outhead,0,$out
- ?vperm $outmask,$rndkey0,$outmask,$outperm
- le?vxor $outperm,$outperm,$tmp
-
- srwi $rounds,$rounds,1
- li $idx,16
- subi $rounds,$rounds,1
- beq Lcbc_dec
-
-Lcbc_enc:
- vmr $inout,$inptail
- lvx $inptail,0,$inp
- addi $inp,$inp,16
- mtctr $rounds
- subi $len,$len,16 # len-=16
-
- lvx $rndkey0,0,$key
- vperm $inout,$inout,$inptail,$inpperm
- lvx $rndkey1,$idx,$key
- addi $idx,$idx,16
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vxor $inout,$inout,$rndkey0
- lvx $rndkey0,$idx,$key
- addi $idx,$idx,16
- vxor $inout,$inout,$ivec
-
-Loop_cbc_enc:
- ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
- vcipher $inout,$inout,$rndkey1
- lvx $rndkey1,$idx,$key
- addi $idx,$idx,16
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vcipher $inout,$inout,$rndkey0
- lvx $rndkey0,$idx,$key
- addi $idx,$idx,16
- bdnz Loop_cbc_enc
-
- ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
- vcipher $inout,$inout,$rndkey1
- lvx $rndkey1,$idx,$key
- li $idx,16
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vcipherlast $ivec,$inout,$rndkey0
- ${UCMP}i $len,16
-
- vperm $tmp,$ivec,$ivec,$outperm
- vsel $inout,$outhead,$tmp,$outmask
- vmr $outhead,$tmp
- stvx $inout,0,$out
- addi $out,$out,16
- bge Lcbc_enc
-
- b Lcbc_done
-
-.align 4
-Lcbc_dec:
- ${UCMP}i $len,128
- bge _aesp8_cbc_decrypt8x
- vmr $tmp,$inptail
- lvx $inptail,0,$inp
- addi $inp,$inp,16
- mtctr $rounds
- subi $len,$len,16 # len-=16
-
- lvx $rndkey0,0,$key
- vperm $tmp,$tmp,$inptail,$inpperm
- lvx $rndkey1,$idx,$key
- addi $idx,$idx,16
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vxor $inout,$tmp,$rndkey0
- lvx $rndkey0,$idx,$key
- addi $idx,$idx,16
-
-Loop_cbc_dec:
- ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
- vncipher $inout,$inout,$rndkey1
- lvx $rndkey1,$idx,$key
- addi $idx,$idx,16
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vncipher $inout,$inout,$rndkey0
- lvx $rndkey0,$idx,$key
- addi $idx,$idx,16
- bdnz Loop_cbc_dec
-
- ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
- vncipher $inout,$inout,$rndkey1
- lvx $rndkey1,$idx,$key
- li $idx,16
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vncipherlast $inout,$inout,$rndkey0
- ${UCMP}i $len,16
-
- vxor $inout,$inout,$ivec
- vmr $ivec,$tmp
- vperm $tmp,$inout,$inout,$outperm
- vsel $inout,$outhead,$tmp,$outmask
- vmr $outhead,$tmp
- stvx $inout,0,$out
- addi $out,$out,16
- bge Lcbc_dec
-
-Lcbc_done:
- addi $out,$out,-1
- lvx $inout,0,$out # redundant in aligned case
- vsel $inout,$outhead,$inout,$outmask
- stvx $inout,0,$out
-
- neg $enc,$ivp # write [unaligned] iv
- li $idx,15 # 15 is not typo
- vxor $rndkey0,$rndkey0,$rndkey0
- vspltisb $outmask,-1
- le?vspltisb $tmp,0x0f
- ?lvsl $outperm,0,$enc
- ?vperm $outmask,$rndkey0,$outmask,$outperm
- le?vxor $outperm,$outperm,$tmp
- lvx $outhead,0,$ivp
- vperm $ivec,$ivec,$ivec,$outperm
- vsel $inout,$outhead,$ivec,$outmask
- lvx $inptail,$idx,$ivp
- stvx $inout,0,$ivp
- vsel $inout,$ivec,$inptail,$outmask
- stvx $inout,$idx,$ivp
-
- mtspr 256,$vrsave
- blr
- .long 0
- .byte 0,12,0x14,0,0,0,6,0
- .long 0
-___
-#########################################################################
-{{ # Optimized CBC decrypt procedure #
-my $key_="r11";
-my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
-my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
-my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
-my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
- # v26-v31 last 6 round keys
-my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
-
-$code.=<<___;
-.align 5
-_aesp8_cbc_decrypt8x:
- $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
- li r10,`$FRAME+8*16+15`
- li r11,`$FRAME+8*16+31`
- stvx v20,r10,$sp # ABI says so
- addi r10,r10,32
- stvx v21,r11,$sp
- addi r11,r11,32
- stvx v22,r10,$sp
- addi r10,r10,32
- stvx v23,r11,$sp
- addi r11,r11,32
- stvx v24,r10,$sp
- addi r10,r10,32
- stvx v25,r11,$sp
- addi r11,r11,32
- stvx v26,r10,$sp
- addi r10,r10,32
- stvx v27,r11,$sp
- addi r11,r11,32
- stvx v28,r10,$sp
- addi r10,r10,32
- stvx v29,r11,$sp
- addi r11,r11,32
- stvx v30,r10,$sp
- stvx v31,r11,$sp
- li r0,-1
- stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
- li $x10,0x10
- $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
- li $x20,0x20
- $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
- li $x30,0x30
- $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
- li $x40,0x40
- $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
- li $x50,0x50
- $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
- li $x60,0x60
- $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
- li $x70,0x70
- mtspr 256,r0
-
- subi $rounds,$rounds,3 # -4 in total
- subi $len,$len,128 # bias
-
- lvx $rndkey0,$x00,$key # load key schedule
- lvx v30,$x10,$key
- addi $key,$key,0x20
- lvx v31,$x00,$key
- ?vperm $rndkey0,$rndkey0,v30,$keyperm
- addi $key_,$sp,$FRAME+15
- mtctr $rounds
-
-Load_cbc_dec_key:
- ?vperm v24,v30,v31,$keyperm
- lvx v30,$x10,$key
- addi $key,$key,0x20
- stvx v24,$x00,$key_ # off-load round[1]
- ?vperm v25,v31,v30,$keyperm
- lvx v31,$x00,$key
- stvx v25,$x10,$key_ # off-load round[2]
- addi $key_,$key_,0x20
- bdnz Load_cbc_dec_key
-
- lvx v26,$x10,$key
- ?vperm v24,v30,v31,$keyperm
- lvx v27,$x20,$key
- stvx v24,$x00,$key_ # off-load round[3]
- ?vperm v25,v31,v26,$keyperm
- lvx v28,$x30,$key
- stvx v25,$x10,$key_ # off-load round[4]
- addi $key_,$sp,$FRAME+15 # rewind $key_
- ?vperm v26,v26,v27,$keyperm
- lvx v29,$x40,$key
- ?vperm v27,v27,v28,$keyperm
- lvx v30,$x50,$key
- ?vperm v28,v28,v29,$keyperm
- lvx v31,$x60,$key
- ?vperm v29,v29,v30,$keyperm
- lvx $out0,$x70,$key # borrow $out0
- ?vperm v30,v30,v31,$keyperm
- lvx v24,$x00,$key_ # pre-load round[1]
- ?vperm v31,v31,$out0,$keyperm
- lvx v25,$x10,$key_ # pre-load round[2]
-
- #lvx $inptail,0,$inp # "caller" already did this
- #addi $inp,$inp,15 # 15 is not typo
- subi $inp,$inp,15 # undo "caller"
-
- le?li $idx,8
- lvx_u $in0,$x00,$inp # load first 8 "words"
- le?lvsl $inpperm,0,$idx
- le?vspltisb $tmp,0x0f
- lvx_u $in1,$x10,$inp
- le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
- lvx_u $in2,$x20,$inp
- le?vperm $in0,$in0,$in0,$inpperm
- lvx_u $in3,$x30,$inp
- le?vperm $in1,$in1,$in1,$inpperm
- lvx_u $in4,$x40,$inp
- le?vperm $in2,$in2,$in2,$inpperm
- vxor $out0,$in0,$rndkey0
- lvx_u $in5,$x50,$inp
- le?vperm $in3,$in3,$in3,$inpperm
- vxor $out1,$in1,$rndkey0
- lvx_u $in6,$x60,$inp
- le?vperm $in4,$in4,$in4,$inpperm
- vxor $out2,$in2,$rndkey0
- lvx_u $in7,$x70,$inp
- addi $inp,$inp,0x80
- le?vperm $in5,$in5,$in5,$inpperm
- vxor $out3,$in3,$rndkey0
- le?vperm $in6,$in6,$in6,$inpperm
- vxor $out4,$in4,$rndkey0
- le?vperm $in7,$in7,$in7,$inpperm
- vxor $out5,$in5,$rndkey0
- vxor $out6,$in6,$rndkey0
- vxor $out7,$in7,$rndkey0
-
- mtctr $rounds
- b Loop_cbc_dec8x
-.align 5
-Loop_cbc_dec8x:
- vncipher $out0,$out0,v24
- vncipher $out1,$out1,v24
- vncipher $out2,$out2,v24
- vncipher $out3,$out3,v24
- vncipher $out4,$out4,v24
- vncipher $out5,$out5,v24
- vncipher $out6,$out6,v24
- vncipher $out7,$out7,v24
- lvx v24,$x20,$key_ # round[3]
- addi $key_,$key_,0x20
-
- vncipher $out0,$out0,v25
- vncipher $out1,$out1,v25
- vncipher $out2,$out2,v25
- vncipher $out3,$out3,v25
- vncipher $out4,$out4,v25
- vncipher $out5,$out5,v25
- vncipher $out6,$out6,v25
- vncipher $out7,$out7,v25
- lvx v25,$x10,$key_ # round[4]
- bdnz Loop_cbc_dec8x
-
- subic $len,$len,128 # $len-=128
- vncipher $out0,$out0,v24
- vncipher $out1,$out1,v24
- vncipher $out2,$out2,v24
- vncipher $out3,$out3,v24
- vncipher $out4,$out4,v24
- vncipher $out5,$out5,v24
- vncipher $out6,$out6,v24
- vncipher $out7,$out7,v24
-
- subfe. r0,r0,r0 # borrow?-1:0
- vncipher $out0,$out0,v25
- vncipher $out1,$out1,v25
- vncipher $out2,$out2,v25
- vncipher $out3,$out3,v25
- vncipher $out4,$out4,v25
- vncipher $out5,$out5,v25
- vncipher $out6,$out6,v25
- vncipher $out7,$out7,v25
-
- and r0,r0,$len
- vncipher $out0,$out0,v26
- vncipher $out1,$out1,v26
- vncipher $out2,$out2,v26
- vncipher $out3,$out3,v26
- vncipher $out4,$out4,v26
- vncipher $out5,$out5,v26
- vncipher $out6,$out6,v26
- vncipher $out7,$out7,v26
-
- add $inp,$inp,r0 # $inp is adjusted in such
- # way that at exit from the
- # loop inX-in7 are loaded
- # with last "words"
- vncipher $out0,$out0,v27
- vncipher $out1,$out1,v27
- vncipher $out2,$out2,v27
- vncipher $out3,$out3,v27
- vncipher $out4,$out4,v27
- vncipher $out5,$out5,v27
- vncipher $out6,$out6,v27
- vncipher $out7,$out7,v27
-
- addi $key_,$sp,$FRAME+15 # rewind $key_
- vncipher $out0,$out0,v28
- vncipher $out1,$out1,v28
- vncipher $out2,$out2,v28
- vncipher $out3,$out3,v28
- vncipher $out4,$out4,v28
- vncipher $out5,$out5,v28
- vncipher $out6,$out6,v28
- vncipher $out7,$out7,v28
- lvx v24,$x00,$key_ # re-pre-load round[1]
-
- vncipher $out0,$out0,v29
- vncipher $out1,$out1,v29
- vncipher $out2,$out2,v29
- vncipher $out3,$out3,v29
- vncipher $out4,$out4,v29
- vncipher $out5,$out5,v29
- vncipher $out6,$out6,v29
- vncipher $out7,$out7,v29
- lvx v25,$x10,$key_ # re-pre-load round[2]
-
- vncipher $out0,$out0,v30
- vxor $ivec,$ivec,v31 # xor with last round key
- vncipher $out1,$out1,v30
- vxor $in0,$in0,v31
- vncipher $out2,$out2,v30
- vxor $in1,$in1,v31
- vncipher $out3,$out3,v30
- vxor $in2,$in2,v31
- vncipher $out4,$out4,v30
- vxor $in3,$in3,v31
- vncipher $out5,$out5,v30
- vxor $in4,$in4,v31
- vncipher $out6,$out6,v30
- vxor $in5,$in5,v31
- vncipher $out7,$out7,v30
- vxor $in6,$in6,v31
-
- vncipherlast $out0,$out0,$ivec
- vncipherlast $out1,$out1,$in0
- lvx_u $in0,$x00,$inp # load next input block
- vncipherlast $out2,$out2,$in1
- lvx_u $in1,$x10,$inp
- vncipherlast $out3,$out3,$in2
- le?vperm $in0,$in0,$in0,$inpperm
- lvx_u $in2,$x20,$inp
- vncipherlast $out4,$out4,$in3
- le?vperm $in1,$in1,$in1,$inpperm
- lvx_u $in3,$x30,$inp
- vncipherlast $out5,$out5,$in4
- le?vperm $in2,$in2,$in2,$inpperm
- lvx_u $in4,$x40,$inp
- vncipherlast $out6,$out6,$in5
- le?vperm $in3,$in3,$in3,$inpperm
- lvx_u $in5,$x50,$inp
- vncipherlast $out7,$out7,$in6
- le?vperm $in4,$in4,$in4,$inpperm
- lvx_u $in6,$x60,$inp
- vmr $ivec,$in7
- le?vperm $in5,$in5,$in5,$inpperm
- lvx_u $in7,$x70,$inp
- addi $inp,$inp,0x80
-
- le?vperm $out0,$out0,$out0,$inpperm
- le?vperm $out1,$out1,$out1,$inpperm
- stvx_u $out0,$x00,$out
- le?vperm $in6,$in6,$in6,$inpperm
- vxor $out0,$in0,$rndkey0
- le?vperm $out2,$out2,$out2,$inpperm
- stvx_u $out1,$x10,$out
- le?vperm $in7,$in7,$in7,$inpperm
- vxor $out1,$in1,$rndkey0
- le?vperm $out3,$out3,$out3,$inpperm
- stvx_u $out2,$x20,$out
- vxor $out2,$in2,$rndkey0
- le?vperm $out4,$out4,$out4,$inpperm
- stvx_u $out3,$x30,$out
- vxor $out3,$in3,$rndkey0
- le?vperm $out5,$out5,$out5,$inpperm
- stvx_u $out4,$x40,$out
- vxor $out4,$in4,$rndkey0
- le?vperm $out6,$out6,$out6,$inpperm
- stvx_u $out5,$x50,$out
- vxor $out5,$in5,$rndkey0
- le?vperm $out7,$out7,$out7,$inpperm
- stvx_u $out6,$x60,$out
- vxor $out6,$in6,$rndkey0
- stvx_u $out7,$x70,$out
- addi $out,$out,0x80
- vxor $out7,$in7,$rndkey0
-
- mtctr $rounds
- beq Loop_cbc_dec8x # did $len-=128 borrow?
-
- addic. $len,$len,128
- beq Lcbc_dec8x_done
- nop
- nop
-
-Loop_cbc_dec8x_tail: # up to 7 "words" tail...
- vncipher $out1,$out1,v24
- vncipher $out2,$out2,v24
- vncipher $out3,$out3,v24
- vncipher $out4,$out4,v24
- vncipher $out5,$out5,v24
- vncipher $out6,$out6,v24
- vncipher $out7,$out7,v24
- lvx v24,$x20,$key_ # round[3]
- addi $key_,$key_,0x20
-
- vncipher $out1,$out1,v25
- vncipher $out2,$out2,v25
- vncipher $out3,$out3,v25
- vncipher $out4,$out4,v25
- vncipher $out5,$out5,v25
- vncipher $out6,$out6,v25
- vncipher $out7,$out7,v25
- lvx v25,$x10,$key_ # round[4]
- bdnz Loop_cbc_dec8x_tail
-
- vncipher $out1,$out1,v24
- vncipher $out2,$out2,v24
- vncipher $out3,$out3,v24
- vncipher $out4,$out4,v24
- vncipher $out5,$out5,v24
- vncipher $out6,$out6,v24
- vncipher $out7,$out7,v24
-
- vncipher $out1,$out1,v25
- vncipher $out2,$out2,v25
- vncipher $out3,$out3,v25
- vncipher $out4,$out4,v25
- vncipher $out5,$out5,v25
- vncipher $out6,$out6,v25
- vncipher $out7,$out7,v25
-
- vncipher $out1,$out1,v26
- vncipher $out2,$out2,v26
- vncipher $out3,$out3,v26
- vncipher $out4,$out4,v26
- vncipher $out5,$out5,v26
- vncipher $out6,$out6,v26
- vncipher $out7,$out7,v26
-
- vncipher $out1,$out1,v27
- vncipher $out2,$out2,v27
- vncipher $out3,$out3,v27
- vncipher $out4,$out4,v27
- vncipher $out5,$out5,v27
- vncipher $out6,$out6,v27
- vncipher $out7,$out7,v27
-
- vncipher $out1,$out1,v28
- vncipher $out2,$out2,v28
- vncipher $out3,$out3,v28
- vncipher $out4,$out4,v28
- vncipher $out5,$out5,v28
- vncipher $out6,$out6,v28
- vncipher $out7,$out7,v28
-
- vncipher $out1,$out1,v29
- vncipher $out2,$out2,v29
- vncipher $out3,$out3,v29
- vncipher $out4,$out4,v29
- vncipher $out5,$out5,v29
- vncipher $out6,$out6,v29
- vncipher $out7,$out7,v29
-
- vncipher $out1,$out1,v30
- vxor $ivec,$ivec,v31 # last round key
- vncipher $out2,$out2,v30
- vxor $in1,$in1,v31
- vncipher $out3,$out3,v30
- vxor $in2,$in2,v31
- vncipher $out4,$out4,v30
- vxor $in3,$in3,v31
- vncipher $out5,$out5,v30
- vxor $in4,$in4,v31
- vncipher $out6,$out6,v30
- vxor $in5,$in5,v31
- vncipher $out7,$out7,v30
- vxor $in6,$in6,v31
-
- cmplwi $len,32 # switch($len)
- blt Lcbc_dec8x_one
- nop
- beq Lcbc_dec8x_two
- cmplwi $len,64
- blt Lcbc_dec8x_three
- nop
- beq Lcbc_dec8x_four
- cmplwi $len,96
- blt Lcbc_dec8x_five
- nop
- beq Lcbc_dec8x_six
-
-Lcbc_dec8x_seven:
- vncipherlast $out1,$out1,$ivec
- vncipherlast $out2,$out2,$in1
- vncipherlast $out3,$out3,$in2
- vncipherlast $out4,$out4,$in3
- vncipherlast $out5,$out5,$in4
- vncipherlast $out6,$out6,$in5
- vncipherlast $out7,$out7,$in6
- vmr $ivec,$in7
-
- le?vperm $out1,$out1,$out1,$inpperm
- le?vperm $out2,$out2,$out2,$inpperm
- stvx_u $out1,$x00,$out
- le?vperm $out3,$out3,$out3,$inpperm
- stvx_u $out2,$x10,$out
- le?vperm $out4,$out4,$out4,$inpperm
- stvx_u $out3,$x20,$out
- le?vperm $out5,$out5,$out5,$inpperm
- stvx_u $out4,$x30,$out
- le?vperm $out6,$out6,$out6,$inpperm
- stvx_u $out5,$x40,$out
- le?vperm $out7,$out7,$out7,$inpperm
- stvx_u $out6,$x50,$out
- stvx_u $out7,$x60,$out
- addi $out,$out,0x70
- b Lcbc_dec8x_done
-
-.align 5
-Lcbc_dec8x_six:
- vncipherlast $out2,$out2,$ivec
- vncipherlast $out3,$out3,$in2
- vncipherlast $out4,$out4,$in3
- vncipherlast $out5,$out5,$in4
- vncipherlast $out6,$out6,$in5
- vncipherlast $out7,$out7,$in6
- vmr $ivec,$in7
-
- le?vperm $out2,$out2,$out2,$inpperm
- le?vperm $out3,$out3,$out3,$inpperm
- stvx_u $out2,$x00,$out
- le?vperm $out4,$out4,$out4,$inpperm
- stvx_u $out3,$x10,$out
- le?vperm $out5,$out5,$out5,$inpperm
- stvx_u $out4,$x20,$out
- le?vperm $out6,$out6,$out6,$inpperm
- stvx_u $out5,$x30,$out
- le?vperm $out7,$out7,$out7,$inpperm
- stvx_u $out6,$x40,$out
- stvx_u $out7,$x50,$out
- addi $out,$out,0x60
- b Lcbc_dec8x_done
-
-.align 5
-Lcbc_dec8x_five:
- vncipherlast $out3,$out3,$ivec
- vncipherlast $out4,$out4,$in3
- vncipherlast $out5,$out5,$in4
- vncipherlast $out6,$out6,$in5
- vncipherlast $out7,$out7,$in6
- vmr $ivec,$in7
-
- le?vperm $out3,$out3,$out3,$inpperm
- le?vperm $out4,$out4,$out4,$inpperm
- stvx_u $out3,$x00,$out
- le?vperm $out5,$out5,$out5,$inpperm
- stvx_u $out4,$x10,$out
- le?vperm $out6,$out6,$out6,$inpperm
- stvx_u $out5,$x20,$out
- le?vperm $out7,$out7,$out7,$inpperm
- stvx_u $out6,$x30,$out
- stvx_u $out7,$x40,$out
- addi $out,$out,0x50
- b Lcbc_dec8x_done
-
-.align 5
-Lcbc_dec8x_four:
- vncipherlast $out4,$out4,$ivec
- vncipherlast $out5,$out5,$in4
- vncipherlast $out6,$out6,$in5
- vncipherlast $out7,$out7,$in6
- vmr $ivec,$in7
-
- le?vperm $out4,$out4,$out4,$inpperm
- le?vperm $out5,$out5,$out5,$inpperm
- stvx_u $out4,$x00,$out
- le?vperm $out6,$out6,$out6,$inpperm
- stvx_u $out5,$x10,$out
- le?vperm $out7,$out7,$out7,$inpperm
- stvx_u $out6,$x20,$out
- stvx_u $out7,$x30,$out
- addi $out,$out,0x40
- b Lcbc_dec8x_done
-
-.align 5
-Lcbc_dec8x_three:
- vncipherlast $out5,$out5,$ivec
- vncipherlast $out6,$out6,$in5
- vncipherlast $out7,$out7,$in6
- vmr $ivec,$in7
-
- le?vperm $out5,$out5,$out5,$inpperm
- le?vperm $out6,$out6,$out6,$inpperm
- stvx_u $out5,$x00,$out
- le?vperm $out7,$out7,$out7,$inpperm
- stvx_u $out6,$x10,$out
- stvx_u $out7,$x20,$out
- addi $out,$out,0x30
- b Lcbc_dec8x_done
-
-.align 5
-Lcbc_dec8x_two:
- vncipherlast $out6,$out6,$ivec
- vncipherlast $out7,$out7,$in6
- vmr $ivec,$in7
-
- le?vperm $out6,$out6,$out6,$inpperm
- le?vperm $out7,$out7,$out7,$inpperm
- stvx_u $out6,$x00,$out
- stvx_u $out7,$x10,$out
- addi $out,$out,0x20
- b Lcbc_dec8x_done
-
-.align 5
-Lcbc_dec8x_one:
- vncipherlast $out7,$out7,$ivec
- vmr $ivec,$in7
-
- le?vperm $out7,$out7,$out7,$inpperm
- stvx_u $out7,0,$out
- addi $out,$out,0x10
-
-Lcbc_dec8x_done:
- le?vperm $ivec,$ivec,$ivec,$inpperm
- stvx_u $ivec,0,$ivp # write [unaligned] iv
-
- li r10,`$FRAME+15`
- li r11,`$FRAME+31`
- stvx $inpperm,r10,$sp # wipe copies of round keys
- addi r10,r10,32
- stvx $inpperm,r11,$sp
- addi r11,r11,32
- stvx $inpperm,r10,$sp
- addi r10,r10,32
- stvx $inpperm,r11,$sp
- addi r11,r11,32
- stvx $inpperm,r10,$sp
- addi r10,r10,32
- stvx $inpperm,r11,$sp
- addi r11,r11,32
- stvx $inpperm,r10,$sp
- addi r10,r10,32
- stvx $inpperm,r11,$sp
- addi r11,r11,32
-
- mtspr 256,$vrsave
- lvx v20,r10,$sp # ABI says so
- addi r10,r10,32
- lvx v21,r11,$sp
- addi r11,r11,32
- lvx v22,r10,$sp
- addi r10,r10,32
- lvx v23,r11,$sp
- addi r11,r11,32
- lvx v24,r10,$sp
- addi r10,r10,32
- lvx v25,r11,$sp
- addi r11,r11,32
- lvx v26,r10,$sp
- addi r10,r10,32
- lvx v27,r11,$sp
- addi r11,r11,32
- lvx v28,r10,$sp
- addi r10,r10,32
- lvx v29,r11,$sp
- addi r11,r11,32
- lvx v30,r10,$sp
- lvx v31,r11,$sp
- $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
- $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
- $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
- $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
- $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
- $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
- addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
- blr
- .long 0
- .byte 0,12,0x14,0,0x80,6,6,0
- .long 0
-.size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
-___
-}} }}}
-
-#########################################################################
-{{{ # CTR procedure[s] #
-
-####################### WARNING: Here be dragons! #######################
-#
-# This code is written as 'ctr32', based on a 32-bit counter used
-# upstream. The kernel does *not* use a 32-bit counter. The kernel uses
-# a 128-bit counter.
-#
-# This leads to subtle changes from the upstream code: the counter
-# is incremented with vaddu_q_m rather than vaddu_w_m. This occurs in
-# both the bulk (8 blocks at a time) path, and in the individual block
-# path. Be aware of this when doing updates.
-#
-# See:
-# 1d4aa0b4c181 ("crypto: vmx - Fixing AES-CTR counter bug")
-# 009b30ac7444 ("crypto: vmx - CTR: always increment IV as quadword")
-# https://github.com/openssl/openssl/pull/8942
-#
-#########################################################################
-my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
-my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
-my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
- map("v$_",(4..11));
-my $dat=$tmp;
-
-$code.=<<___;
-.globl .${prefix}_ctr32_encrypt_blocks
- ${UCMP}i $len,1
- bltlr-
-
- lis r0,0xfff0
- mfspr $vrsave,256
- mtspr 256,r0
-
- li $idx,15
- vxor $rndkey0,$rndkey0,$rndkey0
- le?vspltisb $tmp,0x0f
-
- lvx $ivec,0,$ivp # load [unaligned] iv
- lvsl $inpperm,0,$ivp
- lvx $inptail,$idx,$ivp
- vspltisb $one,1
- le?vxor $inpperm,$inpperm,$tmp
- vperm $ivec,$ivec,$inptail,$inpperm
- vsldoi $one,$rndkey0,$one,1
-
- neg r11,$inp
- ?lvsl $keyperm,0,$key # prepare for unaligned key
- lwz $rounds,240($key)
-
- lvsr $inpperm,0,r11 # prepare for unaligned load
- lvx $inptail,0,$inp
- addi $inp,$inp,15 # 15 is not typo
- le?vxor $inpperm,$inpperm,$tmp
-
- srwi $rounds,$rounds,1
- li $idx,16
- subi $rounds,$rounds,1
-
- ${UCMP}i $len,8
- bge _aesp8_ctr32_encrypt8x
-
- ?lvsr $outperm,0,$out # prepare for unaligned store
- vspltisb $outmask,-1
- lvx $outhead,0,$out
- ?vperm $outmask,$rndkey0,$outmask,$outperm
- le?vxor $outperm,$outperm,$tmp
-
- lvx $rndkey0,0,$key
- mtctr $rounds
- lvx $rndkey1,$idx,$key
- addi $idx,$idx,16
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vxor $inout,$ivec,$rndkey0
- lvx $rndkey0,$idx,$key
- addi $idx,$idx,16
- b Loop_ctr32_enc
-
-.align 5
-Loop_ctr32_enc:
- ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
- vcipher $inout,$inout,$rndkey1
- lvx $rndkey1,$idx,$key
- addi $idx,$idx,16
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vcipher $inout,$inout,$rndkey0
- lvx $rndkey0,$idx,$key
- addi $idx,$idx,16
- bdnz Loop_ctr32_enc
-
- vadduqm $ivec,$ivec,$one # Kernel change for 128-bit
- vmr $dat,$inptail
- lvx $inptail,0,$inp
- addi $inp,$inp,16
- subic. $len,$len,1 # blocks--
-
- ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
- vcipher $inout,$inout,$rndkey1
- lvx $rndkey1,$idx,$key
- vperm $dat,$dat,$inptail,$inpperm
- li $idx,16
- ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm
- lvx $rndkey0,0,$key
- vxor $dat,$dat,$rndkey1 # last round key
- vcipherlast $inout,$inout,$dat
-
- lvx $rndkey1,$idx,$key
- addi $idx,$idx,16
- vperm $inout,$inout,$inout,$outperm
- vsel $dat,$outhead,$inout,$outmask
- mtctr $rounds
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vmr $outhead,$inout
- vxor $inout,$ivec,$rndkey0
- lvx $rndkey0,$idx,$key
- addi $idx,$idx,16
- stvx $dat,0,$out
- addi $out,$out,16
- bne Loop_ctr32_enc
-
- addi $out,$out,-1
- lvx $inout,0,$out # redundant in aligned case
- vsel $inout,$outhead,$inout,$outmask
- stvx $inout,0,$out
-
- mtspr 256,$vrsave
- blr
- .long 0
- .byte 0,12,0x14,0,0,0,6,0
- .long 0
-___
-#########################################################################
-{{ # Optimized CTR procedure #
-my $key_="r11";
-my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
-my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
-my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
-my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
- # v26-v31 last 6 round keys
-my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
-my ($two,$three,$four)=($outhead,$outperm,$outmask);
-
-$code.=<<___;
-.align 5
-_aesp8_ctr32_encrypt8x:
- $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
- li r10,`$FRAME+8*16+15`
- li r11,`$FRAME+8*16+31`
- stvx v20,r10,$sp # ABI says so
- addi r10,r10,32
- stvx v21,r11,$sp
- addi r11,r11,32
- stvx v22,r10,$sp
- addi r10,r10,32
- stvx v23,r11,$sp
- addi r11,r11,32
- stvx v24,r10,$sp
- addi r10,r10,32
- stvx v25,r11,$sp
- addi r11,r11,32
- stvx v26,r10,$sp
- addi r10,r10,32
- stvx v27,r11,$sp
- addi r11,r11,32
- stvx v28,r10,$sp
- addi r10,r10,32
- stvx v29,r11,$sp
- addi r11,r11,32
- stvx v30,r10,$sp
- stvx v31,r11,$sp
- li r0,-1
- stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
- li $x10,0x10
- $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
- li $x20,0x20
- $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
- li $x30,0x30
- $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
- li $x40,0x40
- $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
- li $x50,0x50
- $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
- li $x60,0x60
- $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
- li $x70,0x70
- mtspr 256,r0
-
- subi $rounds,$rounds,3 # -4 in total
-
- lvx $rndkey0,$x00,$key # load key schedule
- lvx v30,$x10,$key
- addi $key,$key,0x20
- lvx v31,$x00,$key
- ?vperm $rndkey0,$rndkey0,v30,$keyperm
- addi $key_,$sp,$FRAME+15
- mtctr $rounds
-
-Load_ctr32_enc_key:
- ?vperm v24,v30,v31,$keyperm
- lvx v30,$x10,$key
- addi $key,$key,0x20
- stvx v24,$x00,$key_ # off-load round[1]
- ?vperm v25,v31,v30,$keyperm
- lvx v31,$x00,$key
- stvx v25,$x10,$key_ # off-load round[2]
- addi $key_,$key_,0x20
- bdnz Load_ctr32_enc_key
-
- lvx v26,$x10,$key
- ?vperm v24,v30,v31,$keyperm
- lvx v27,$x20,$key
- stvx v24,$x00,$key_ # off-load round[3]
- ?vperm v25,v31,v26,$keyperm
- lvx v28,$x30,$key
- stvx v25,$x10,$key_ # off-load round[4]
- addi $key_,$sp,$FRAME+15 # rewind $key_
- ?vperm v26,v26,v27,$keyperm
- lvx v29,$x40,$key
- ?vperm v27,v27,v28,$keyperm
- lvx v30,$x50,$key
- ?vperm v28,v28,v29,$keyperm
- lvx v31,$x60,$key
- ?vperm v29,v29,v30,$keyperm
- lvx $out0,$x70,$key # borrow $out0
- ?vperm v30,v30,v31,$keyperm
- lvx v24,$x00,$key_ # pre-load round[1]
- ?vperm v31,v31,$out0,$keyperm
- lvx v25,$x10,$key_ # pre-load round[2]
-
- vadduqm $two,$one,$one
- subi $inp,$inp,15 # undo "caller"
- $SHL $len,$len,4
-
- vadduqm $out1,$ivec,$one # counter values ...
- vadduqm $out2,$ivec,$two # (do all ctr adds as 128-bit)
- vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
- le?li $idx,8
- vadduqm $out3,$out1,$two
- vxor $out1,$out1,$rndkey0
- le?lvsl $inpperm,0,$idx
- vadduqm $out4,$out2,$two
- vxor $out2,$out2,$rndkey0
- le?vspltisb $tmp,0x0f
- vadduqm $out5,$out3,$two
- vxor $out3,$out3,$rndkey0
- le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
- vadduqm $out6,$out4,$two
- vxor $out4,$out4,$rndkey0
- vadduqm $out7,$out5,$two
- vxor $out5,$out5,$rndkey0
- vadduqm $ivec,$out6,$two # next counter value
- vxor $out6,$out6,$rndkey0
- vxor $out7,$out7,$rndkey0
-
- mtctr $rounds
- b Loop_ctr32_enc8x
-.align 5
-Loop_ctr32_enc8x:
- vcipher $out0,$out0,v24
- vcipher $out1,$out1,v24
- vcipher $out2,$out2,v24
- vcipher $out3,$out3,v24
- vcipher $out4,$out4,v24
- vcipher $out5,$out5,v24
- vcipher $out6,$out6,v24
- vcipher $out7,$out7,v24
-Loop_ctr32_enc8x_middle:
- lvx v24,$x20,$key_ # round[3]
- addi $key_,$key_,0x20
-
- vcipher $out0,$out0,v25
- vcipher $out1,$out1,v25
- vcipher $out2,$out2,v25
- vcipher $out3,$out3,v25
- vcipher $out4,$out4,v25
- vcipher $out5,$out5,v25
- vcipher $out6,$out6,v25
- vcipher $out7,$out7,v25
- lvx v25,$x10,$key_ # round[4]
- bdnz Loop_ctr32_enc8x
-
- subic r11,$len,256 # $len-256, borrow $key_
- vcipher $out0,$out0,v24
- vcipher $out1,$out1,v24
- vcipher $out2,$out2,v24
- vcipher $out3,$out3,v24
- vcipher $out4,$out4,v24
- vcipher $out5,$out5,v24
- vcipher $out6,$out6,v24
- vcipher $out7,$out7,v24
-
- subfe r0,r0,r0 # borrow?-1:0
- vcipher $out0,$out0,v25
- vcipher $out1,$out1,v25
- vcipher $out2,$out2,v25
- vcipher $out3,$out3,v25
- vcipher $out4,$out4,v25
- vcipher $out5,$out5,v25
- vcipher $out6,$out6,v25
- vcipher $out7,$out7,v25
-
- and r0,r0,r11
- addi $key_,$sp,$FRAME+15 # rewind $key_
- vcipher $out0,$out0,v26
- vcipher $out1,$out1,v26
- vcipher $out2,$out2,v26
- vcipher $out3,$out3,v26
- vcipher $out4,$out4,v26
- vcipher $out5,$out5,v26
- vcipher $out6,$out6,v26
- vcipher $out7,$out7,v26
- lvx v24,$x00,$key_ # re-pre-load round[1]
-
- subic $len,$len,129 # $len-=129
- vcipher $out0,$out0,v27
- addi $len,$len,1 # $len-=128 really
- vcipher $out1,$out1,v27
- vcipher $out2,$out2,v27
- vcipher $out3,$out3,v27
- vcipher $out4,$out4,v27
- vcipher $out5,$out5,v27
- vcipher $out6,$out6,v27
- vcipher $out7,$out7,v27
- lvx v25,$x10,$key_ # re-pre-load round[2]
-
- vcipher $out0,$out0,v28
- lvx_u $in0,$x00,$inp # load input
- vcipher $out1,$out1,v28
- lvx_u $in1,$x10,$inp
- vcipher $out2,$out2,v28
- lvx_u $in2,$x20,$inp
- vcipher $out3,$out3,v28
- lvx_u $in3,$x30,$inp
- vcipher $out4,$out4,v28
- lvx_u $in4,$x40,$inp
- vcipher $out5,$out5,v28
- lvx_u $in5,$x50,$inp
- vcipher $out6,$out6,v28
- lvx_u $in6,$x60,$inp
- vcipher $out7,$out7,v28
- lvx_u $in7,$x70,$inp
- addi $inp,$inp,0x80
-
- vcipher $out0,$out0,v29
- le?vperm $in0,$in0,$in0,$inpperm
- vcipher $out1,$out1,v29
- le?vperm $in1,$in1,$in1,$inpperm
- vcipher $out2,$out2,v29
- le?vperm $in2,$in2,$in2,$inpperm
- vcipher $out3,$out3,v29
- le?vperm $in3,$in3,$in3,$inpperm
- vcipher $out4,$out4,v29
- le?vperm $in4,$in4,$in4,$inpperm
- vcipher $out5,$out5,v29
- le?vperm $in5,$in5,$in5,$inpperm
- vcipher $out6,$out6,v29
- le?vperm $in6,$in6,$in6,$inpperm
- vcipher $out7,$out7,v29
- le?vperm $in7,$in7,$in7,$inpperm
-
- add $inp,$inp,r0 # $inp is adjusted in such
- # way that at exit from the
- # loop inX-in7 are loaded
- # with last "words"
- subfe. r0,r0,r0 # borrow?-1:0
- vcipher $out0,$out0,v30
- vxor $in0,$in0,v31 # xor with last round key
- vcipher $out1,$out1,v30
- vxor $in1,$in1,v31
- vcipher $out2,$out2,v30
- vxor $in2,$in2,v31
- vcipher $out3,$out3,v30
- vxor $in3,$in3,v31
- vcipher $out4,$out4,v30
- vxor $in4,$in4,v31
- vcipher $out5,$out5,v30
- vxor $in5,$in5,v31
- vcipher $out6,$out6,v30
- vxor $in6,$in6,v31
- vcipher $out7,$out7,v30
- vxor $in7,$in7,v31
-
- bne Lctr32_enc8x_break # did $len-129 borrow?
-
- vcipherlast $in0,$out0,$in0
- vcipherlast $in1,$out1,$in1
- vadduqm $out1,$ivec,$one # counter values ...
- vcipherlast $in2,$out2,$in2
- vadduqm $out2,$ivec,$two
- vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
- vcipherlast $in3,$out3,$in3
- vadduqm $out3,$out1,$two
- vxor $out1,$out1,$rndkey0
- vcipherlast $in4,$out4,$in4
- vadduqm $out4,$out2,$two
- vxor $out2,$out2,$rndkey0
- vcipherlast $in5,$out5,$in5
- vadduqm $out5,$out3,$two
- vxor $out3,$out3,$rndkey0
- vcipherlast $in6,$out6,$in6
- vadduqm $out6,$out4,$two
- vxor $out4,$out4,$rndkey0
- vcipherlast $in7,$out7,$in7
- vadduqm $out7,$out5,$two
- vxor $out5,$out5,$rndkey0
- le?vperm $in0,$in0,$in0,$inpperm
- vadduqm $ivec,$out6,$two # next counter value
- vxor $out6,$out6,$rndkey0
- le?vperm $in1,$in1,$in1,$inpperm
- vxor $out7,$out7,$rndkey0
- mtctr $rounds
-
- vcipher $out0,$out0,v24
- stvx_u $in0,$x00,$out
- le?vperm $in2,$in2,$in2,$inpperm
- vcipher $out1,$out1,v24
- stvx_u $in1,$x10,$out
- le?vperm $in3,$in3,$in3,$inpperm
- vcipher $out2,$out2,v24
- stvx_u $in2,$x20,$out
- le?vperm $in4,$in4,$in4,$inpperm
- vcipher $out3,$out3,v24
- stvx_u $in3,$x30,$out
- le?vperm $in5,$in5,$in5,$inpperm
- vcipher $out4,$out4,v24
- stvx_u $in4,$x40,$out
- le?vperm $in6,$in6,$in6,$inpperm
- vcipher $out5,$out5,v24
- stvx_u $in5,$x50,$out
- le?vperm $in7,$in7,$in7,$inpperm
- vcipher $out6,$out6,v24
- stvx_u $in6,$x60,$out
- vcipher $out7,$out7,v24
- stvx_u $in7,$x70,$out
- addi $out,$out,0x80
-
- b Loop_ctr32_enc8x_middle
-
-.align 5
-Lctr32_enc8x_break:
- cmpwi $len,-0x60
- blt Lctr32_enc8x_one
- nop
- beq Lctr32_enc8x_two
- cmpwi $len,-0x40
- blt Lctr32_enc8x_three
- nop
- beq Lctr32_enc8x_four
- cmpwi $len,-0x20
- blt Lctr32_enc8x_five
- nop
- beq Lctr32_enc8x_six
- cmpwi $len,0x00
- blt Lctr32_enc8x_seven
-
-Lctr32_enc8x_eight:
- vcipherlast $out0,$out0,$in0
- vcipherlast $out1,$out1,$in1
- vcipherlast $out2,$out2,$in2
- vcipherlast $out3,$out3,$in3
- vcipherlast $out4,$out4,$in4
- vcipherlast $out5,$out5,$in5
- vcipherlast $out6,$out6,$in6
- vcipherlast $out7,$out7,$in7
-
- le?vperm $out0,$out0,$out0,$inpperm
- le?vperm $out1,$out1,$out1,$inpperm
- stvx_u $out0,$x00,$out
- le?vperm $out2,$out2,$out2,$inpperm
- stvx_u $out1,$x10,$out
- le?vperm $out3,$out3,$out3,$inpperm
- stvx_u $out2,$x20,$out
- le?vperm $out4,$out4,$out4,$inpperm
- stvx_u $out3,$x30,$out
- le?vperm $out5,$out5,$out5,$inpperm
- stvx_u $out4,$x40,$out
- le?vperm $out6,$out6,$out6,$inpperm
- stvx_u $out5,$x50,$out
- le?vperm $out7,$out7,$out7,$inpperm
- stvx_u $out6,$x60,$out
- stvx_u $out7,$x70,$out
- addi $out,$out,0x80
- b Lctr32_enc8x_done
-
-.align 5
-Lctr32_enc8x_seven:
- vcipherlast $out0,$out0,$in1
- vcipherlast $out1,$out1,$in2
- vcipherlast $out2,$out2,$in3
- vcipherlast $out3,$out3,$in4
- vcipherlast $out4,$out4,$in5
- vcipherlast $out5,$out5,$in6
- vcipherlast $out6,$out6,$in7
-
- le?vperm $out0,$out0,$out0,$inpperm
- le?vperm $out1,$out1,$out1,$inpperm
- stvx_u $out0,$x00,$out
- le?vperm $out2,$out2,$out2,$inpperm
- stvx_u $out1,$x10,$out
- le?vperm $out3,$out3,$out3,$inpperm
- stvx_u $out2,$x20,$out
- le?vperm $out4,$out4,$out4,$inpperm
- stvx_u $out3,$x30,$out
- le?vperm $out5,$out5,$out5,$inpperm
- stvx_u $out4,$x40,$out
- le?vperm $out6,$out6,$out6,$inpperm
- stvx_u $out5,$x50,$out
- stvx_u $out6,$x60,$out
- addi $out,$out,0x70
- b Lctr32_enc8x_done
-
-.align 5
-Lctr32_enc8x_six:
- vcipherlast $out0,$out0,$in2
- vcipherlast $out1,$out1,$in3
- vcipherlast $out2,$out2,$in4
- vcipherlast $out3,$out3,$in5
- vcipherlast $out4,$out4,$in6
- vcipherlast $out5,$out5,$in7
-
- le?vperm $out0,$out0,$out0,$inpperm
- le?vperm $out1,$out1,$out1,$inpperm
- stvx_u $out0,$x00,$out
- le?vperm $out2,$out2,$out2,$inpperm
- stvx_u $out1,$x10,$out
- le?vperm $out3,$out3,$out3,$inpperm
- stvx_u $out2,$x20,$out
- le?vperm $out4,$out4,$out4,$inpperm
- stvx_u $out3,$x30,$out
- le?vperm $out5,$out5,$out5,$inpperm
- stvx_u $out4,$x40,$out
- stvx_u $out5,$x50,$out
- addi $out,$out,0x60
- b Lctr32_enc8x_done
-
-.align 5
-Lctr32_enc8x_five:
- vcipherlast $out0,$out0,$in3
- vcipherlast $out1,$out1,$in4
- vcipherlast $out2,$out2,$in5
- vcipherlast $out3,$out3,$in6
- vcipherlast $out4,$out4,$in7
-
- le?vperm $out0,$out0,$out0,$inpperm
- le?vperm $out1,$out1,$out1,$inpperm
- stvx_u $out0,$x00,$out
- le?vperm $out2,$out2,$out2,$inpperm
- stvx_u $out1,$x10,$out
- le?vperm $out3,$out3,$out3,$inpperm
- stvx_u $out2,$x20,$out
- le?vperm $out4,$out4,$out4,$inpperm
- stvx_u $out3,$x30,$out
- stvx_u $out4,$x40,$out
- addi $out,$out,0x50
- b Lctr32_enc8x_done
-
-.align 5
-Lctr32_enc8x_four:
- vcipherlast $out0,$out0,$in4
- vcipherlast $out1,$out1,$in5
- vcipherlast $out2,$out2,$in6
- vcipherlast $out3,$out3,$in7
-
- le?vperm $out0,$out0,$out0,$inpperm
- le?vperm $out1,$out1,$out1,$inpperm
- stvx_u $out0,$x00,$out
- le?vperm $out2,$out2,$out2,$inpperm
- stvx_u $out1,$x10,$out
- le?vperm $out3,$out3,$out3,$inpperm
- stvx_u $out2,$x20,$out
- stvx_u $out3,$x30,$out
- addi $out,$out,0x40
- b Lctr32_enc8x_done
-
-.align 5
-Lctr32_enc8x_three:
- vcipherlast $out0,$out0,$in5
- vcipherlast $out1,$out1,$in6
- vcipherlast $out2,$out2,$in7
-
- le?vperm $out0,$out0,$out0,$inpperm
- le?vperm $out1,$out1,$out1,$inpperm
- stvx_u $out0,$x00,$out
- le?vperm $out2,$out2,$out2,$inpperm
- stvx_u $out1,$x10,$out
- stvx_u $out2,$x20,$out
- addi $out,$out,0x30
- b Lctr32_enc8x_done
-
-.align 5
-Lctr32_enc8x_two:
- vcipherlast $out0,$out0,$in6
- vcipherlast $out1,$out1,$in7
-
- le?vperm $out0,$out0,$out0,$inpperm
- le?vperm $out1,$out1,$out1,$inpperm
- stvx_u $out0,$x00,$out
- stvx_u $out1,$x10,$out
- addi $out,$out,0x20
- b Lctr32_enc8x_done
-
-.align 5
-Lctr32_enc8x_one:
- vcipherlast $out0,$out0,$in7
-
- le?vperm $out0,$out0,$out0,$inpperm
- stvx_u $out0,0,$out
- addi $out,$out,0x10
-
-Lctr32_enc8x_done:
- li r10,`$FRAME+15`
- li r11,`$FRAME+31`
- stvx $inpperm,r10,$sp # wipe copies of round keys
- addi r10,r10,32
- stvx $inpperm,r11,$sp
- addi r11,r11,32
- stvx $inpperm,r10,$sp
- addi r10,r10,32
- stvx $inpperm,r11,$sp
- addi r11,r11,32
- stvx $inpperm,r10,$sp
- addi r10,r10,32
- stvx $inpperm,r11,$sp
- addi r11,r11,32
- stvx $inpperm,r10,$sp
- addi r10,r10,32
- stvx $inpperm,r11,$sp
- addi r11,r11,32
-
- mtspr 256,$vrsave
- lvx v20,r10,$sp # ABI says so
- addi r10,r10,32
- lvx v21,r11,$sp
- addi r11,r11,32
- lvx v22,r10,$sp
- addi r10,r10,32
- lvx v23,r11,$sp
- addi r11,r11,32
- lvx v24,r10,$sp
- addi r10,r10,32
- lvx v25,r11,$sp
- addi r11,r11,32
- lvx v26,r10,$sp
- addi r10,r10,32
- lvx v27,r11,$sp
- addi r11,r11,32
- lvx v28,r10,$sp
- addi r10,r10,32
- lvx v29,r11,$sp
- addi r11,r11,32
- lvx v30,r10,$sp
- lvx v31,r11,$sp
- $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
- $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
- $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
- $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
- $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
- $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
- addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
- blr
- .long 0
- .byte 0,12,0x14,0,0x80,6,6,0
- .long 0
-.size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
-___
-}} }}}
-
-#########################################################################
-{{{ # XTS procedures #
-# int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len, #
-# const AES_KEY *key1, const AES_KEY *key2, #
-# [const] unsigned char iv[16]); #
-# If $key2 is NULL, then a "tweak chaining" mode is engaged, in which #
-# input tweak value is assumed to be encrypted already, and last tweak #
-# value, one suitable for consecutive call on same chunk of data, is #
-# written back to original buffer. In addition, in "tweak chaining" #
-# mode only complete input blocks are processed. #
-
-my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) = map("r$_",(3..10));
-my ($rndkey0,$rndkey1,$inout) = map("v$_",(0..2));
-my ($output,$inptail,$inpperm,$leperm,$keyperm) = map("v$_",(3..7));
-my ($tweak,$seven,$eighty7,$tmp,$tweak1) = map("v$_",(8..12));
-my $taillen = $key2;
-
- ($inp,$idx) = ($idx,$inp); # reassign
-
-$code.=<<___;
-.globl .${prefix}_xts_encrypt
- mr $inp,r3 # reassign
- li r3,-1
- ${UCMP}i $len,16
- bltlr-
-
- lis r0,0xfff0
- mfspr r12,256 # save vrsave
- li r11,0
- mtspr 256,r0
-
- vspltisb $seven,0x07 # 0x070707..07
- le?lvsl $leperm,r11,r11
- le?vspltisb $tmp,0x0f
- le?vxor $leperm,$leperm,$seven
-
- li $idx,15
- lvx $tweak,0,$ivp # load [unaligned] iv
- lvsl $inpperm,0,$ivp
- lvx $inptail,$idx,$ivp
- le?vxor $inpperm,$inpperm,$tmp
- vperm $tweak,$tweak,$inptail,$inpperm
-
- neg r11,$inp
- lvsr $inpperm,0,r11 # prepare for unaligned load
- lvx $inout,0,$inp
- addi $inp,$inp,15 # 15 is not typo
- le?vxor $inpperm,$inpperm,$tmp
-
- ${UCMP}i $key2,0 # key2==NULL?
- beq Lxts_enc_no_key2
-
- ?lvsl $keyperm,0,$key2 # prepare for unaligned key
- lwz $rounds,240($key2)
- srwi $rounds,$rounds,1
- subi $rounds,$rounds,1
- li $idx,16
-
- lvx $rndkey0,0,$key2
- lvx $rndkey1,$idx,$key2
- addi $idx,$idx,16
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vxor $tweak,$tweak,$rndkey0
- lvx $rndkey0,$idx,$key2
- addi $idx,$idx,16
- mtctr $rounds
-
-Ltweak_xts_enc:
- ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
- vcipher $tweak,$tweak,$rndkey1
- lvx $rndkey1,$idx,$key2
- addi $idx,$idx,16
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vcipher $tweak,$tweak,$rndkey0
- lvx $rndkey0,$idx,$key2
- addi $idx,$idx,16
- bdnz Ltweak_xts_enc
-
- ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
- vcipher $tweak,$tweak,$rndkey1
- lvx $rndkey1,$idx,$key2
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vcipherlast $tweak,$tweak,$rndkey0
-
- li $ivp,0 # don't chain the tweak
- b Lxts_enc
-
-Lxts_enc_no_key2:
- li $idx,-16
- and $len,$len,$idx # in "tweak chaining"
- # mode only complete
- # blocks are processed
-Lxts_enc:
- lvx $inptail,0,$inp
- addi $inp,$inp,16
-
- ?lvsl $keyperm,0,$key1 # prepare for unaligned key
- lwz $rounds,240($key1)
- srwi $rounds,$rounds,1
- subi $rounds,$rounds,1
- li $idx,16
-
- vslb $eighty7,$seven,$seven # 0x808080..80
- vor $eighty7,$eighty7,$seven # 0x878787..87
- vspltisb $tmp,1 # 0x010101..01
- vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
-
- ${UCMP}i $len,96
- bge _aesp8_xts_encrypt6x
-
- andi. $taillen,$len,15
- subic r0,$len,32
- subi $taillen,$taillen,16
- subfe r0,r0,r0
- and r0,r0,$taillen
- add $inp,$inp,r0
-
- lvx $rndkey0,0,$key1
- lvx $rndkey1,$idx,$key1
- addi $idx,$idx,16
- vperm $inout,$inout,$inptail,$inpperm
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vxor $inout,$inout,$tweak
- vxor $inout,$inout,$rndkey0
- lvx $rndkey0,$idx,$key1
- addi $idx,$idx,16
- mtctr $rounds
- b Loop_xts_enc
-
-.align 5
-Loop_xts_enc:
- ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
- vcipher $inout,$inout,$rndkey1
- lvx $rndkey1,$idx,$key1
- addi $idx,$idx,16
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vcipher $inout,$inout,$rndkey0
- lvx $rndkey0,$idx,$key1
- addi $idx,$idx,16
- bdnz Loop_xts_enc
-
- ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
- vcipher $inout,$inout,$rndkey1
- lvx $rndkey1,$idx,$key1
- li $idx,16
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vxor $rndkey0,$rndkey0,$tweak
- vcipherlast $output,$inout,$rndkey0
-
- le?vperm $tmp,$output,$output,$leperm
- be?nop
- le?stvx_u $tmp,0,$out
- be?stvx_u $output,0,$out
- addi $out,$out,16
-
- subic. $len,$len,16
- beq Lxts_enc_done
-
- vmr $inout,$inptail
- lvx $inptail,0,$inp
- addi $inp,$inp,16
- lvx $rndkey0,0,$key1
- lvx $rndkey1,$idx,$key1
- addi $idx,$idx,16
-
- subic r0,$len,32
- subfe r0,r0,r0
- and r0,r0,$taillen
- add $inp,$inp,r0
-
- vsrab $tmp,$tweak,$seven # next tweak value
- vaddubm $tweak,$tweak,$tweak
- vsldoi $tmp,$tmp,$tmp,15
- vand $tmp,$tmp,$eighty7
- vxor $tweak,$tweak,$tmp
-
- vperm $inout,$inout,$inptail,$inpperm
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vxor $inout,$inout,$tweak
- vxor $output,$output,$rndkey0 # just in case $len<16
- vxor $inout,$inout,$rndkey0
- lvx $rndkey0,$idx,$key1
- addi $idx,$idx,16
-
- mtctr $rounds
- ${UCMP}i $len,16
- bge Loop_xts_enc
-
- vxor $output,$output,$tweak
- lvsr $inpperm,0,$len # $inpperm is no longer needed
- vxor $inptail,$inptail,$inptail # $inptail is no longer needed
- vspltisb $tmp,-1
- vperm $inptail,$inptail,$tmp,$inpperm
- vsel $inout,$inout,$output,$inptail
-
- subi r11,$out,17
- subi $out,$out,16
- mtctr $len
- li $len,16
-Loop_xts_enc_steal:
- lbzu r0,1(r11)
- stb r0,16(r11)
- bdnz Loop_xts_enc_steal
-
- mtctr $rounds
- b Loop_xts_enc # one more time...
-
-Lxts_enc_done:
- ${UCMP}i $ivp,0
- beq Lxts_enc_ret
-
- vsrab $tmp,$tweak,$seven # next tweak value
- vaddubm $tweak,$tweak,$tweak
- vsldoi $tmp,$tmp,$tmp,15
- vand $tmp,$tmp,$eighty7
- vxor $tweak,$tweak,$tmp
-
- le?vperm $tweak,$tweak,$tweak,$leperm
- stvx_u $tweak,0,$ivp
-
-Lxts_enc_ret:
- mtspr 256,r12 # restore vrsave
- li r3,0
- blr
- .long 0
- .byte 0,12,0x04,0,0x80,6,6,0
- .long 0
-.size .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt
-
-.globl .${prefix}_xts_decrypt
- mr $inp,r3 # reassign
- li r3,-1
- ${UCMP}i $len,16
- bltlr-
-
- lis r0,0xfff8
- mfspr r12,256 # save vrsave
- li r11,0
- mtspr 256,r0
-
- andi. r0,$len,15
- neg r0,r0
- andi. r0,r0,16
- sub $len,$len,r0
-
- vspltisb $seven,0x07 # 0x070707..07
- le?lvsl $leperm,r11,r11
- le?vspltisb $tmp,0x0f
- le?vxor $leperm,$leperm,$seven
-
- li $idx,15
- lvx $tweak,0,$ivp # load [unaligned] iv
- lvsl $inpperm,0,$ivp
- lvx $inptail,$idx,$ivp
- le?vxor $inpperm,$inpperm,$tmp
- vperm $tweak,$tweak,$inptail,$inpperm
-
- neg r11,$inp
- lvsr $inpperm,0,r11 # prepare for unaligned load
- lvx $inout,0,$inp
- addi $inp,$inp,15 # 15 is not typo
- le?vxor $inpperm,$inpperm,$tmp
-
- ${UCMP}i $key2,0 # key2==NULL?
- beq Lxts_dec_no_key2
-
- ?lvsl $keyperm,0,$key2 # prepare for unaligned key
- lwz $rounds,240($key2)
- srwi $rounds,$rounds,1
- subi $rounds,$rounds,1
- li $idx,16
-
- lvx $rndkey0,0,$key2
- lvx $rndkey1,$idx,$key2
- addi $idx,$idx,16
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vxor $tweak,$tweak,$rndkey0
- lvx $rndkey0,$idx,$key2
- addi $idx,$idx,16
- mtctr $rounds
-
-Ltweak_xts_dec:
- ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
- vcipher $tweak,$tweak,$rndkey1
- lvx $rndkey1,$idx,$key2
- addi $idx,$idx,16
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vcipher $tweak,$tweak,$rndkey0
- lvx $rndkey0,$idx,$key2
- addi $idx,$idx,16
- bdnz Ltweak_xts_dec
-
- ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
- vcipher $tweak,$tweak,$rndkey1
- lvx $rndkey1,$idx,$key2
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vcipherlast $tweak,$tweak,$rndkey0
-
- li $ivp,0 # don't chain the tweak
- b Lxts_dec
-
-Lxts_dec_no_key2:
- neg $idx,$len
- andi. $idx,$idx,15
- add $len,$len,$idx # in "tweak chaining"
- # mode only complete
- # blocks are processed
-Lxts_dec:
- lvx $inptail,0,$inp
- addi $inp,$inp,16
-
- ?lvsl $keyperm,0,$key1 # prepare for unaligned key
- lwz $rounds,240($key1)
- srwi $rounds,$rounds,1
- subi $rounds,$rounds,1
- li $idx,16
-
- vslb $eighty7,$seven,$seven # 0x808080..80
- vor $eighty7,$eighty7,$seven # 0x878787..87
- vspltisb $tmp,1 # 0x010101..01
- vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
-
- ${UCMP}i $len,96
- bge _aesp8_xts_decrypt6x
-
- lvx $rndkey0,0,$key1
- lvx $rndkey1,$idx,$key1
- addi $idx,$idx,16
- vperm $inout,$inout,$inptail,$inpperm
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vxor $inout,$inout,$tweak
- vxor $inout,$inout,$rndkey0
- lvx $rndkey0,$idx,$key1
- addi $idx,$idx,16
- mtctr $rounds
-
- ${UCMP}i $len,16
- blt Ltail_xts_dec
- be?b Loop_xts_dec
-
-.align 5
-Loop_xts_dec:
- ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
- vncipher $inout,$inout,$rndkey1
- lvx $rndkey1,$idx,$key1
- addi $idx,$idx,16
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vncipher $inout,$inout,$rndkey0
- lvx $rndkey0,$idx,$key1
- addi $idx,$idx,16
- bdnz Loop_xts_dec
-
- ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
- vncipher $inout,$inout,$rndkey1
- lvx $rndkey1,$idx,$key1
- li $idx,16
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vxor $rndkey0,$rndkey0,$tweak
- vncipherlast $output,$inout,$rndkey0
-
- le?vperm $tmp,$output,$output,$leperm
- be?nop
- le?stvx_u $tmp,0,$out
- be?stvx_u $output,0,$out
- addi $out,$out,16
-
- subic. $len,$len,16
- beq Lxts_dec_done
-
- vmr $inout,$inptail
- lvx $inptail,0,$inp
- addi $inp,$inp,16
- lvx $rndkey0,0,$key1
- lvx $rndkey1,$idx,$key1
- addi $idx,$idx,16
-
- vsrab $tmp,$tweak,$seven # next tweak value
- vaddubm $tweak,$tweak,$tweak
- vsldoi $tmp,$tmp,$tmp,15
- vand $tmp,$tmp,$eighty7
- vxor $tweak,$tweak,$tmp
-
- vperm $inout,$inout,$inptail,$inpperm
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vxor $inout,$inout,$tweak
- vxor $inout,$inout,$rndkey0
- lvx $rndkey0,$idx,$key1
- addi $idx,$idx,16
-
- mtctr $rounds
- ${UCMP}i $len,16
- bge Loop_xts_dec
-
-Ltail_xts_dec:
- vsrab $tmp,$tweak,$seven # next tweak value
- vaddubm $tweak1,$tweak,$tweak
- vsldoi $tmp,$tmp,$tmp,15
- vand $tmp,$tmp,$eighty7
- vxor $tweak1,$tweak1,$tmp
-
- subi $inp,$inp,16
- add $inp,$inp,$len
-
- vxor $inout,$inout,$tweak # :-(
- vxor $inout,$inout,$tweak1 # :-)
-
-Loop_xts_dec_short:
- ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
- vncipher $inout,$inout,$rndkey1
- lvx $rndkey1,$idx,$key1
- addi $idx,$idx,16
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vncipher $inout,$inout,$rndkey0
- lvx $rndkey0,$idx,$key1
- addi $idx,$idx,16
- bdnz Loop_xts_dec_short
-
- ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
- vncipher $inout,$inout,$rndkey1
- lvx $rndkey1,$idx,$key1
- li $idx,16
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
- vxor $rndkey0,$rndkey0,$tweak1
- vncipherlast $output,$inout,$rndkey0
-
- le?vperm $tmp,$output,$output,$leperm
- be?nop
- le?stvx_u $tmp,0,$out
- be?stvx_u $output,0,$out
-
- vmr $inout,$inptail
- lvx $inptail,0,$inp
- #addi $inp,$inp,16
- lvx $rndkey0,0,$key1
- lvx $rndkey1,$idx,$key1
- addi $idx,$idx,16
- vperm $inout,$inout,$inptail,$inpperm
- ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
-
- lvsr $inpperm,0,$len # $inpperm is no longer needed
- vxor $inptail,$inptail,$inptail # $inptail is no longer needed
- vspltisb $tmp,-1
- vperm $inptail,$inptail,$tmp,$inpperm
- vsel $inout,$inout,$output,$inptail
-
- vxor $rndkey0,$rndkey0,$tweak
- vxor $inout,$inout,$rndkey0
- lvx $rndkey0,$idx,$key1
- addi $idx,$idx,16
-
- subi r11,$out,1
- mtctr $len
- li $len,16
-Loop_xts_dec_steal:
- lbzu r0,1(r11)
- stb r0,16(r11)
- bdnz Loop_xts_dec_steal
-
- mtctr $rounds
- b Loop_xts_dec # one more time...
-
-Lxts_dec_done:
- ${UCMP}i $ivp,0
- beq Lxts_dec_ret
-
- vsrab $tmp,$tweak,$seven # next tweak value
- vaddubm $tweak,$tweak,$tweak
- vsldoi $tmp,$tmp,$tmp,15
- vand $tmp,$tmp,$eighty7
- vxor $tweak,$tweak,$tmp
-
- le?vperm $tweak,$tweak,$tweak,$leperm
- stvx_u $tweak,0,$ivp
-
-Lxts_dec_ret:
- mtspr 256,r12 # restore vrsave
- li r3,0
- blr
- .long 0
- .byte 0,12,0x04,0,0x80,6,6,0
- .long 0
-.size .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt
-___
-#########################################################################
-{{ # Optimized XTS procedures #
-my $key_=$key2;
-my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31));
- $x00=0 if ($flavour =~ /osx/);
-my ($in0, $in1, $in2, $in3, $in4, $in5 )=map("v$_",(0..5));
-my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16));
-my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22));
-my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
- # v26-v31 last 6 round keys
-my ($keyperm)=($out0); # aliases with "caller", redundant assignment
-my $taillen=$x70;
-
-$code.=<<___;
-.align 5
-_aesp8_xts_encrypt6x:
- $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
- mflr r11
- li r7,`$FRAME+8*16+15`
- li r3,`$FRAME+8*16+31`
- $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
- stvx v20,r7,$sp # ABI says so
- addi r7,r7,32
- stvx v21,r3,$sp
- addi r3,r3,32
- stvx v22,r7,$sp
- addi r7,r7,32
- stvx v23,r3,$sp
- addi r3,r3,32
- stvx v24,r7,$sp
- addi r7,r7,32
- stvx v25,r3,$sp
- addi r3,r3,32
- stvx v26,r7,$sp
- addi r7,r7,32
- stvx v27,r3,$sp
- addi r3,r3,32
- stvx v28,r7,$sp
- addi r7,r7,32
- stvx v29,r3,$sp
- addi r3,r3,32
- stvx v30,r7,$sp
- stvx v31,r3,$sp
- li r0,-1
- stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
- li $x10,0x10
- $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
- li $x20,0x20
- $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
- li $x30,0x30
- $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
- li $x40,0x40
- $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
- li $x50,0x50
- $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
- li $x60,0x60
- $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
- li $x70,0x70
- mtspr 256,r0
-
- xxlor 2, 32+$eighty7, 32+$eighty7
- vsldoi $eighty7,$tmp,$eighty7,1 # 0x010101..87
- xxlor 1, 32+$eighty7, 32+$eighty7
-
- # Load XOR Lconsts.
- mr $x70, r6
- bl Lconsts
- lxvw4x 0, $x40, r6 # load XOR contents
- mr r6, $x70
- li $x70,0x70
-
- subi $rounds,$rounds,3 # -4 in total
-
- lvx $rndkey0,$x00,$key1 # load key schedule
- lvx v30,$x10,$key1
- addi $key1,$key1,0x20
- lvx v31,$x00,$key1
- ?vperm $rndkey0,$rndkey0,v30,$keyperm
- addi $key_,$sp,$FRAME+15
- mtctr $rounds
-
-Load_xts_enc_key:
- ?vperm v24,v30,v31,$keyperm
- lvx v30,$x10,$key1
- addi $key1,$key1,0x20
- stvx v24,$x00,$key_ # off-load round[1]
- ?vperm v25,v31,v30,$keyperm
- lvx v31,$x00,$key1
- stvx v25,$x10,$key_ # off-load round[2]
- addi $key_,$key_,0x20
- bdnz Load_xts_enc_key
-
- lvx v26,$x10,$key1
- ?vperm v24,v30,v31,$keyperm
- lvx v27,$x20,$key1
- stvx v24,$x00,$key_ # off-load round[3]
- ?vperm v25,v31,v26,$keyperm
- lvx v28,$x30,$key1
- stvx v25,$x10,$key_ # off-load round[4]
- addi $key_,$sp,$FRAME+15 # rewind $key_
- ?vperm v26,v26,v27,$keyperm
- lvx v29,$x40,$key1
- ?vperm v27,v27,v28,$keyperm
- lvx v30,$x50,$key1
- ?vperm v28,v28,v29,$keyperm
- lvx v31,$x60,$key1
- ?vperm v29,v29,v30,$keyperm
- lvx $twk5,$x70,$key1 # borrow $twk5
- ?vperm v30,v30,v31,$keyperm
- lvx v24,$x00,$key_ # pre-load round[1]
- ?vperm v31,v31,$twk5,$keyperm
- lvx v25,$x10,$key_ # pre-load round[2]
-
- # Switch to use the following codes with 0x010101..87 to generate tweak.
- # eighty7 = 0x010101..87
- # vsrab tmp, tweak, seven # next tweak value, right shift 7 bits
- # vand tmp, tmp, eighty7 # last byte with carry
- # vaddubm tweak, tweak, tweak # left shift 1 bit (x2)
- # xxlor vsx, 0, 0
- # vpermxor tweak, tweak, tmp, vsx
-
- vperm $in0,$inout,$inptail,$inpperm
- subi $inp,$inp,31 # undo "caller"
- vxor $twk0,$tweak,$rndkey0
- vsrab $tmp,$tweak,$seven # next tweak value
- vaddubm $tweak,$tweak,$tweak
- vand $tmp,$tmp,$eighty7
- vxor $out0,$in0,$twk0
- xxlor 32+$in1, 0, 0
- vpermxor $tweak, $tweak, $tmp, $in1
-
- lvx_u $in1,$x10,$inp
- vxor $twk1,$tweak,$rndkey0
- vsrab $tmp,$tweak,$seven # next tweak value
- vaddubm $tweak,$tweak,$tweak
- le?vperm $in1,$in1,$in1,$leperm
- vand $tmp,$tmp,$eighty7
- vxor $out1,$in1,$twk1
- xxlor 32+$in2, 0, 0
- vpermxor $tweak, $tweak, $tmp, $in2
-
- lvx_u $in2,$x20,$inp
- andi. $taillen,$len,15
- vxor $twk2,$tweak,$rndkey0
- vsrab $tmp,$tweak,$seven # next tweak value
- vaddubm $tweak,$tweak,$tweak
- le?vperm $in2,$in2,$in2,$leperm
- vand $tmp,$tmp,$eighty7
- vxor $out2,$in2,$twk2
- xxlor 32+$in3, 0, 0
- vpermxor $tweak, $tweak, $tmp, $in3
-
- lvx_u $in3,$x30,$inp
- sub $len,$len,$taillen
- vxor $twk3,$tweak,$rndkey0
- vsrab $tmp,$tweak,$seven # next tweak value
- vaddubm $tweak,$tweak,$tweak
- le?vperm $in3,$in3,$in3,$leperm
- vand $tmp,$tmp,$eighty7
- vxor $out3,$in3,$twk3
- xxlor 32+$in4, 0, 0
- vpermxor $tweak, $tweak, $tmp, $in4
-
- lvx_u $in4,$x40,$inp
- subi $len,$len,0x60
- vxor $twk4,$tweak,$rndkey0
- vsrab $tmp,$tweak,$seven # next tweak value
- vaddubm $tweak,$tweak,$tweak
- le?vperm $in4,$in4,$in4,$leperm
- vand $tmp,$tmp,$eighty7
- vxor $out4,$in4,$twk4
- xxlor 32+$in5, 0, 0
- vpermxor $tweak, $tweak, $tmp, $in5
-
- lvx_u $in5,$x50,$inp
- addi $inp,$inp,0x60
- vxor $twk5,$tweak,$rndkey0
- vsrab $tmp,$tweak,$seven # next tweak value
- vaddubm $tweak,$tweak,$tweak
- le?vperm $in5,$in5,$in5,$leperm
- vand $tmp,$tmp,$eighty7
- vxor $out5,$in5,$twk5
- xxlor 32+$in0, 0, 0
- vpermxor $tweak, $tweak, $tmp, $in0
-
- vxor v31,v31,$rndkey0
- mtctr $rounds
- b Loop_xts_enc6x
-
-.align 5
-Loop_xts_enc6x:
- vcipher $out0,$out0,v24
- vcipher $out1,$out1,v24
- vcipher $out2,$out2,v24
- vcipher $out3,$out3,v24
- vcipher $out4,$out4,v24
- vcipher $out5,$out5,v24
- lvx v24,$x20,$key_ # round[3]
- addi $key_,$key_,0x20
-
- vcipher $out0,$out0,v25
- vcipher $out1,$out1,v25
- vcipher $out2,$out2,v25
- vcipher $out3,$out3,v25
- vcipher $out4,$out4,v25
- vcipher $out5,$out5,v25
- lvx v25,$x10,$key_ # round[4]
- bdnz Loop_xts_enc6x
-
- xxlor 32+$eighty7, 1, 1 # 0x010101..87
-
- subic $len,$len,96 # $len-=96
- vxor $in0,$twk0,v31 # xor with last round key
- vcipher $out0,$out0,v24
- vcipher $out1,$out1,v24
- vsrab $tmp,$tweak,$seven # next tweak value
- vxor $twk0,$tweak,$rndkey0
- vaddubm $tweak,$tweak,$tweak
- vcipher $out2,$out2,v24
- vcipher $out3,$out3,v24
- vcipher $out4,$out4,v24
- vcipher $out5,$out5,v24
-
- subfe. r0,r0,r0 # borrow?-1:0
- vand $tmp,$tmp,$eighty7
- vcipher $out0,$out0,v25
- vcipher $out1,$out1,v25
- xxlor 32+$in1, 0, 0
- vpermxor $tweak, $tweak, $tmp, $in1
- vcipher $out2,$out2,v25
- vcipher $out3,$out3,v25
- vxor $in1,$twk1,v31
- vsrab $tmp,$tweak,$seven # next tweak value
- vxor $twk1,$tweak,$rndkey0
- vcipher $out4,$out4,v25
- vcipher $out5,$out5,v25
-
- and r0,r0,$len
- vaddubm $tweak,$tweak,$tweak
- vcipher $out0,$out0,v26
- vcipher $out1,$out1,v26
- vand $tmp,$tmp,$eighty7
- vcipher $out2,$out2,v26
- vcipher $out3,$out3,v26
- xxlor 32+$in2, 0, 0
- vpermxor $tweak, $tweak, $tmp, $in2
- vcipher $out4,$out4,v26
- vcipher $out5,$out5,v26
-
- add $inp,$inp,r0 # $inp is adjusted in such
- # way that at exit from the
- # loop inX-in5 are loaded
- # with last "words"
- vxor $in2,$twk2,v31
- vsrab $tmp,$tweak,$seven # next tweak value
- vxor $twk2,$tweak,$rndkey0
- vaddubm $tweak,$tweak,$tweak
- vcipher $out0,$out0,v27
- vcipher $out1,$out1,v27
- vcipher $out2,$out2,v27
- vcipher $out3,$out3,v27
- vand $tmp,$tmp,$eighty7
- vcipher $out4,$out4,v27
- vcipher $out5,$out5,v27
-
- addi $key_,$sp,$FRAME+15 # rewind $key_
- xxlor 32+$in3, 0, 0
- vpermxor $tweak, $tweak, $tmp, $in3
- vcipher $out0,$out0,v28
- vcipher $out1,$out1,v28
- vxor $in3,$twk3,v31
- vsrab $tmp,$tweak,$seven # next tweak value
- vxor $twk3,$tweak,$rndkey0
- vcipher $out2,$out2,v28
- vcipher $out3,$out3,v28
- vaddubm $tweak,$tweak,$tweak
- vcipher $out4,$out4,v28
- vcipher $out5,$out5,v28
- lvx v24,$x00,$key_ # re-pre-load round[1]
- vand $tmp,$tmp,$eighty7
-
- vcipher $out0,$out0,v29
- vcipher $out1,$out1,v29
- xxlor 32+$in4, 0, 0
- vpermxor $tweak, $tweak, $tmp, $in4
- vcipher $out2,$out2,v29
- vcipher $out3,$out3,v29
- vxor $in4,$twk4,v31
- vsrab $tmp,$tweak,$seven # next tweak value
- vxor $twk4,$tweak,$rndkey0
- vcipher $out4,$out4,v29
- vcipher $out5,$out5,v29
- lvx v25,$x10,$key_ # re-pre-load round[2]
- vaddubm $tweak,$tweak,$tweak
-
- vcipher $out0,$out0,v30
- vcipher $out1,$out1,v30
- vand $tmp,$tmp,$eighty7
- vcipher $out2,$out2,v30
- vcipher $out3,$out3,v30
- xxlor 32+$in5, 0, 0
- vpermxor $tweak, $tweak, $tmp, $in5
- vcipher $out4,$out4,v30
- vcipher $out5,$out5,v30
- vxor $in5,$twk5,v31
- vsrab $tmp,$tweak,$seven # next tweak value
- vxor $twk5,$tweak,$rndkey0
-
- vcipherlast $out0,$out0,$in0
- lvx_u $in0,$x00,$inp # load next input block
- vaddubm $tweak,$tweak,$tweak
- vcipherlast $out1,$out1,$in1
- lvx_u $in1,$x10,$inp
- vcipherlast $out2,$out2,$in2
- le?vperm $in0,$in0,$in0,$leperm
- lvx_u $in2,$x20,$inp
- vand $tmp,$tmp,$eighty7
- vcipherlast $out3,$out3,$in3
- le?vperm $in1,$in1,$in1,$leperm
- lvx_u $in3,$x30,$inp
- vcipherlast $out4,$out4,$in4
- le?vperm $in2,$in2,$in2,$leperm
- lvx_u $in4,$x40,$inp
- xxlor 10, 32+$in0, 32+$in0
- xxlor 32+$in0, 0, 0
- vpermxor $tweak, $tweak, $tmp, $in0
- xxlor 32+$in0, 10, 10
- vcipherlast $tmp,$out5,$in5 # last block might be needed
- # in stealing mode
- le?vperm $in3,$in3,$in3,$leperm
- lvx_u $in5,$x50,$inp
- addi $inp,$inp,0x60
- le?vperm $in4,$in4,$in4,$leperm
- le?vperm $in5,$in5,$in5,$leperm
-
- le?vperm $out0,$out0,$out0,$leperm
- le?vperm $out1,$out1,$out1,$leperm
- stvx_u $out0,$x00,$out # store output
- vxor $out0,$in0,$twk0
- le?vperm $out2,$out2,$out2,$leperm
- stvx_u $out1,$x10,$out
- vxor $out1,$in1,$twk1
- le?vperm $out3,$out3,$out3,$leperm
- stvx_u $out2,$x20,$out
- vxor $out2,$in2,$twk2
- le?vperm $out4,$out4,$out4,$leperm
- stvx_u $out3,$x30,$out
- vxor $out3,$in3,$twk3
- le?vperm $out5,$tmp,$tmp,$leperm
- stvx_u $out4,$x40,$out
- vxor $out4,$in4,$twk4
- le?stvx_u $out5,$x50,$out
- be?stvx_u $tmp, $x50,$out
- vxor $out5,$in5,$twk5
- addi $out,$out,0x60
-
- mtctr $rounds
- beq Loop_xts_enc6x # did $len-=96 borrow?
-
- xxlor 32+$eighty7, 2, 2 # 0x010101..87
-
- addic. $len,$len,0x60
- beq Lxts_enc6x_zero
- cmpwi $len,0x20
- blt Lxts_enc6x_one
- nop
- beq Lxts_enc6x_two
- cmpwi $len,0x40
- blt Lxts_enc6x_three
- nop
- beq Lxts_enc6x_four
-
-Lxts_enc6x_five:
- vxor $out0,$in1,$twk0
- vxor $out1,$in2,$twk1
- vxor $out2,$in3,$twk2
- vxor $out3,$in4,$twk3
- vxor $out4,$in5,$twk4
-
- bl _aesp8_xts_enc5x
-
- le?vperm $out0,$out0,$out0,$leperm
- vmr $twk0,$twk5 # unused tweak
- le?vperm $out1,$out1,$out1,$leperm
- stvx_u $out0,$x00,$out # store output
- le?vperm $out2,$out2,$out2,$leperm
- stvx_u $out1,$x10,$out
- le?vperm $out3,$out3,$out3,$leperm
- stvx_u $out2,$x20,$out
- vxor $tmp,$out4,$twk5 # last block prep for stealing
- le?vperm $out4,$out4,$out4,$leperm
- stvx_u $out3,$x30,$out
- stvx_u $out4,$x40,$out
- addi $out,$out,0x50
- bne Lxts_enc6x_steal
- b Lxts_enc6x_done
-
-.align 4
-Lxts_enc6x_four:
- vxor $out0,$in2,$twk0
- vxor $out1,$in3,$twk1
- vxor $out2,$in4,$twk2
- vxor $out3,$in5,$twk3
- vxor $out4,$out4,$out4
-
- bl _aesp8_xts_enc5x
-
- le?vperm $out0,$out0,$out0,$leperm
- vmr $twk0,$twk4 # unused tweak
- le?vperm $out1,$out1,$out1,$leperm
- stvx_u $out0,$x00,$out # store output
- le?vperm $out2,$out2,$out2,$leperm
- stvx_u $out1,$x10,$out
- vxor $tmp,$out3,$twk4 # last block prep for stealing
- le?vperm $out3,$out3,$out3,$leperm
- stvx_u $out2,$x20,$out
- stvx_u $out3,$x30,$out
- addi $out,$out,0x40
- bne Lxts_enc6x_steal
- b Lxts_enc6x_done
-
-.align 4
-Lxts_enc6x_three:
- vxor $out0,$in3,$twk0
- vxor $out1,$in4,$twk1
- vxor $out2,$in5,$twk2
- vxor $out3,$out3,$out3
- vxor $out4,$out4,$out4
-
- bl _aesp8_xts_enc5x
-
- le?vperm $out0,$out0,$out0,$leperm
- vmr $twk0,$twk3 # unused tweak
- le?vperm $out1,$out1,$out1,$leperm
- stvx_u $out0,$x00,$out # store output
- vxor $tmp,$out2,$twk3 # last block prep for stealing
- le?vperm $out2,$out2,$out2,$leperm
- stvx_u $out1,$x10,$out
- stvx_u $out2,$x20,$out
- addi $out,$out,0x30
- bne Lxts_enc6x_steal
- b Lxts_enc6x_done
-
-.align 4
-Lxts_enc6x_two:
- vxor $out0,$in4,$twk0
- vxor $out1,$in5,$twk1
- vxor $out2,$out2,$out2
- vxor $out3,$out3,$out3
- vxor $out4,$out4,$out4
-
- bl _aesp8_xts_enc5x
-
- le?vperm $out0,$out0,$out0,$leperm
- vmr $twk0,$twk2 # unused tweak
- vxor $tmp,$out1,$twk2 # last block prep for stealing
- le?vperm $out1,$out1,$out1,$leperm
- stvx_u $out0,$x00,$out # store output
- stvx_u $out1,$x10,$out
- addi $out,$out,0x20
- bne Lxts_enc6x_steal
- b Lxts_enc6x_done
-
-.align 4
-Lxts_enc6x_one:
- vxor $out0,$in5,$twk0
- nop
-Loop_xts_enc1x:
- vcipher $out0,$out0,v24
- lvx v24,$x20,$key_ # round[3]
- addi $key_,$key_,0x20
-
- vcipher $out0,$out0,v25
- lvx v25,$x10,$key_ # round[4]
- bdnz Loop_xts_enc1x
-
- add $inp,$inp,$taillen
- cmpwi $taillen,0
- vcipher $out0,$out0,v24
-
- subi $inp,$inp,16
- vcipher $out0,$out0,v25
-
- lvsr $inpperm,0,$taillen
- vcipher $out0,$out0,v26
-
- lvx_u $in0,0,$inp
- vcipher $out0,$out0,v27
-
- addi $key_,$sp,$FRAME+15 # rewind $key_
- vcipher $out0,$out0,v28
- lvx v24,$x00,$key_ # re-pre-load round[1]
-
- vcipher $out0,$out0,v29
- lvx v25,$x10,$key_ # re-pre-load round[2]
- vxor $twk0,$twk0,v31
-
- le?vperm $in0,$in0,$in0,$leperm
- vcipher $out0,$out0,v30
-
- vperm $in0,$in0,$in0,$inpperm
- vcipherlast $out0,$out0,$twk0
-
- vmr $twk0,$twk1 # unused tweak
- vxor $tmp,$out0,$twk1 # last block prep for stealing
- le?vperm $out0,$out0,$out0,$leperm
- stvx_u $out0,$x00,$out # store output
- addi $out,$out,0x10
- bne Lxts_enc6x_steal
- b Lxts_enc6x_done
-
-.align 4
-Lxts_enc6x_zero:
- cmpwi $taillen,0
- beq Lxts_enc6x_done
-
- add $inp,$inp,$taillen
- subi $inp,$inp,16
- lvx_u $in0,0,$inp
- lvsr $inpperm,0,$taillen # $in5 is no more
- le?vperm $in0,$in0,$in0,$leperm
- vperm $in0,$in0,$in0,$inpperm
- vxor $tmp,$tmp,$twk0
-Lxts_enc6x_steal:
- vxor $in0,$in0,$twk0
- vxor $out0,$out0,$out0
- vspltisb $out1,-1
- vperm $out0,$out0,$out1,$inpperm
- vsel $out0,$in0,$tmp,$out0 # $tmp is last block, remember?
-
- subi r30,$out,17
- subi $out,$out,16
- mtctr $taillen
-Loop_xts_enc6x_steal:
- lbzu r0,1(r30)
- stb r0,16(r30)
- bdnz Loop_xts_enc6x_steal
-
- li $taillen,0
- mtctr $rounds
- b Loop_xts_enc1x # one more time...
-
-.align 4
-Lxts_enc6x_done:
- ${UCMP}i $ivp,0
- beq Lxts_enc6x_ret
-
- vxor $tweak,$twk0,$rndkey0
- le?vperm $tweak,$tweak,$tweak,$leperm
- stvx_u $tweak,0,$ivp
-
-Lxts_enc6x_ret:
- mtlr r11
- li r10,`$FRAME+15`
- li r11,`$FRAME+31`
- stvx $seven,r10,$sp # wipe copies of round keys
- addi r10,r10,32
- stvx $seven,r11,$sp
- addi r11,r11,32
- stvx $seven,r10,$sp
- addi r10,r10,32
- stvx $seven,r11,$sp
- addi r11,r11,32
- stvx $seven,r10,$sp
- addi r10,r10,32
- stvx $seven,r11,$sp
- addi r11,r11,32
- stvx $seven,r10,$sp
- addi r10,r10,32
- stvx $seven,r11,$sp
- addi r11,r11,32
-
- mtspr 256,$vrsave
- lvx v20,r10,$sp # ABI says so
- addi r10,r10,32
- lvx v21,r11,$sp
- addi r11,r11,32
- lvx v22,r10,$sp
- addi r10,r10,32
- lvx v23,r11,$sp
- addi r11,r11,32
- lvx v24,r10,$sp
- addi r10,r10,32
- lvx v25,r11,$sp
- addi r11,r11,32
- lvx v26,r10,$sp
- addi r10,r10,32
- lvx v27,r11,$sp
- addi r11,r11,32
- lvx v28,r10,$sp
- addi r10,r10,32
- lvx v29,r11,$sp
- addi r11,r11,32
- lvx v30,r10,$sp
- lvx v31,r11,$sp
- $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
- $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
- $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
- $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
- $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
- $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
- addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
- blr
- .long 0
- .byte 0,12,0x04,1,0x80,6,6,0
- .long 0
-
-.align 5
-_aesp8_xts_enc5x:
- vcipher $out0,$out0,v24
- vcipher $out1,$out1,v24
- vcipher $out2,$out2,v24
- vcipher $out3,$out3,v24
- vcipher $out4,$out4,v24
- lvx v24,$x20,$key_ # round[3]
- addi $key_,$key_,0x20
-
- vcipher $out0,$out0,v25
- vcipher $out1,$out1,v25
- vcipher $out2,$out2,v25
- vcipher $out3,$out3,v25
- vcipher $out4,$out4,v25
- lvx v25,$x10,$key_ # round[4]
- bdnz _aesp8_xts_enc5x
-
- add $inp,$inp,$taillen
- cmpwi $taillen,0
- vcipher $out0,$out0,v24
- vcipher $out1,$out1,v24
- vcipher $out2,$out2,v24
- vcipher $out3,$out3,v24
- vcipher $out4,$out4,v24
-
- subi $inp,$inp,16
- vcipher $out0,$out0,v25
- vcipher $out1,$out1,v25
- vcipher $out2,$out2,v25
- vcipher $out3,$out3,v25
- vcipher $out4,$out4,v25
- vxor $twk0,$twk0,v31
-
- vcipher $out0,$out0,v26
- lvsr $inpperm,r0,$taillen # $in5 is no more
- vcipher $out1,$out1,v26
- vcipher $out2,$out2,v26
- vcipher $out3,$out3,v26
- vcipher $out4,$out4,v26
- vxor $in1,$twk1,v31
-
- vcipher $out0,$out0,v27
- lvx_u $in0,0,$inp
- vcipher $out1,$out1,v27
- vcipher $out2,$out2,v27
- vcipher $out3,$out3,v27
- vcipher $out4,$out4,v27
- vxor $in2,$twk2,v31
-
- addi $key_,$sp,$FRAME+15 # rewind $key_
- vcipher $out0,$out0,v28
- vcipher $out1,$out1,v28
- vcipher $out2,$out2,v28
- vcipher $out3,$out3,v28
- vcipher $out4,$out4,v28
- lvx v24,$x00,$key_ # re-pre-load round[1]
- vxor $in3,$twk3,v31
-
- vcipher $out0,$out0,v29
- le?vperm $in0,$in0,$in0,$leperm
- vcipher $out1,$out1,v29
- vcipher $out2,$out2,v29
- vcipher $out3,$out3,v29
- vcipher $out4,$out4,v29
- lvx v25,$x10,$key_ # re-pre-load round[2]
- vxor $in4,$twk4,v31
-
- vcipher $out0,$out0,v30
- vperm $in0,$in0,$in0,$inpperm
- vcipher $out1,$out1,v30
- vcipher $out2,$out2,v30
- vcipher $out3,$out3,v30
- vcipher $out4,$out4,v30
-
- vcipherlast $out0,$out0,$twk0
- vcipherlast $out1,$out1,$in1
- vcipherlast $out2,$out2,$in2
- vcipherlast $out3,$out3,$in3
- vcipherlast $out4,$out4,$in4
- blr
- .long 0
- .byte 0,12,0x14,0,0,0,0,0
-
-.align 5
-_aesp8_xts_decrypt6x:
- $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
- mflr r11
- li r7,`$FRAME+8*16+15`
- li r3,`$FRAME+8*16+31`
- $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
- stvx v20,r7,$sp # ABI says so
- addi r7,r7,32
- stvx v21,r3,$sp
- addi r3,r3,32
- stvx v22,r7,$sp
- addi r7,r7,32
- stvx v23,r3,$sp
- addi r3,r3,32
- stvx v24,r7,$sp
- addi r7,r7,32
- stvx v25,r3,$sp
- addi r3,r3,32
- stvx v26,r7,$sp
- addi r7,r7,32
- stvx v27,r3,$sp
- addi r3,r3,32
- stvx v28,r7,$sp
- addi r7,r7,32
- stvx v29,r3,$sp
- addi r3,r3,32
- stvx v30,r7,$sp
- stvx v31,r3,$sp
- li r0,-1
- stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
- li $x10,0x10
- $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
- li $x20,0x20
- $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
- li $x30,0x30
- $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
- li $x40,0x40
- $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
- li $x50,0x50
- $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
- li $x60,0x60
- $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
- li $x70,0x70
- mtspr 256,r0
-
- xxlor 2, 32+$eighty7, 32+$eighty7
- vsldoi $eighty7,$tmp,$eighty7,1 # 0x010101..87
- xxlor 1, 32+$eighty7, 32+$eighty7
-
- # Load XOR Lconsts.
- mr $x70, r6
- bl Lconsts
- lxvw4x 0, $x40, r6 # load XOR contents
- mr r6, $x70
- li $x70,0x70
-
- subi $rounds,$rounds,3 # -4 in total
-
- lvx $rndkey0,$x00,$key1 # load key schedule
- lvx v30,$x10,$key1
- addi $key1,$key1,0x20
- lvx v31,$x00,$key1
- ?vperm $rndkey0,$rndkey0,v30,$keyperm
- addi $key_,$sp,$FRAME+15
- mtctr $rounds
-
-Load_xts_dec_key:
- ?vperm v24,v30,v31,$keyperm
- lvx v30,$x10,$key1
- addi $key1,$key1,0x20
- stvx v24,$x00,$key_ # off-load round[1]
- ?vperm v25,v31,v30,$keyperm
- lvx v31,$x00,$key1
- stvx v25,$x10,$key_ # off-load round[2]
- addi $key_,$key_,0x20
- bdnz Load_xts_dec_key
-
- lvx v26,$x10,$key1
- ?vperm v24,v30,v31,$keyperm
- lvx v27,$x20,$key1
- stvx v24,$x00,$key_ # off-load round[3]
- ?vperm v25,v31,v26,$keyperm
- lvx v28,$x30,$key1
- stvx v25,$x10,$key_ # off-load round[4]
- addi $key_,$sp,$FRAME+15 # rewind $key_
- ?vperm v26,v26,v27,$keyperm
- lvx v29,$x40,$key1
- ?vperm v27,v27,v28,$keyperm
- lvx v30,$x50,$key1
- ?vperm v28,v28,v29,$keyperm
- lvx v31,$x60,$key1
- ?vperm v29,v29,v30,$keyperm
- lvx $twk5,$x70,$key1 # borrow $twk5
- ?vperm v30,v30,v31,$keyperm
- lvx v24,$x00,$key_ # pre-load round[1]
- ?vperm v31,v31,$twk5,$keyperm
- lvx v25,$x10,$key_ # pre-load round[2]
-
- vperm $in0,$inout,$inptail,$inpperm
- subi $inp,$inp,31 # undo "caller"
- vxor $twk0,$tweak,$rndkey0
- vsrab $tmp,$tweak,$seven # next tweak value
- vaddubm $tweak,$tweak,$tweak
- vand $tmp,$tmp,$eighty7
- vxor $out0,$in0,$twk0
- xxlor 32+$in1, 0, 0
- vpermxor $tweak, $tweak, $tmp, $in1
-
- lvx_u $in1,$x10,$inp
- vxor $twk1,$tweak,$rndkey0
- vsrab $tmp,$tweak,$seven # next tweak value
- vaddubm $tweak,$tweak,$tweak
- le?vperm $in1,$in1,$in1,$leperm
- vand $tmp,$tmp,$eighty7
- vxor $out1,$in1,$twk1
- xxlor 32+$in2, 0, 0
- vpermxor $tweak, $tweak, $tmp, $in2
-
- lvx_u $in2,$x20,$inp
- andi. $taillen,$len,15
- vxor $twk2,$tweak,$rndkey0
- vsrab $tmp,$tweak,$seven # next tweak value
- vaddubm $tweak,$tweak,$tweak
- le?vperm $in2,$in2,$in2,$leperm
- vand $tmp,$tmp,$eighty7
- vxor $out2,$in2,$twk2
- xxlor 32+$in3, 0, 0
- vpermxor $tweak, $tweak, $tmp, $in3
-
- lvx_u $in3,$x30,$inp
- sub $len,$len,$taillen
- vxor $twk3,$tweak,$rndkey0
- vsrab $tmp,$tweak,$seven # next tweak value
- vaddubm $tweak,$tweak,$tweak
- le?vperm $in3,$in3,$in3,$leperm
- vand $tmp,$tmp,$eighty7
- vxor $out3,$in3,$twk3
- xxlor 32+$in4, 0, 0
- vpermxor $tweak, $tweak, $tmp, $in4
-
- lvx_u $in4,$x40,$inp
- subi $len,$len,0x60
- vxor $twk4,$tweak,$rndkey0
- vsrab $tmp,$tweak,$seven # next tweak value
- vaddubm $tweak,$tweak,$tweak
- le?vperm $in4,$in4,$in4,$leperm
- vand $tmp,$tmp,$eighty7
- vxor $out4,$in4,$twk4
- xxlor 32+$in5, 0, 0
- vpermxor $tweak, $tweak, $tmp, $in5
-
- lvx_u $in5,$x50,$inp
- addi $inp,$inp,0x60
- vxor $twk5,$tweak,$rndkey0
- vsrab $tmp,$tweak,$seven # next tweak value
- vaddubm $tweak,$tweak,$tweak
- le?vperm $in5,$in5,$in5,$leperm
- vand $tmp,$tmp,$eighty7
- vxor $out5,$in5,$twk5
- xxlor 32+$in0, 0, 0
- vpermxor $tweak, $tweak, $tmp, $in0
-
- vxor v31,v31,$rndkey0
- mtctr $rounds
- b Loop_xts_dec6x
-
-.align 5
-Loop_xts_dec6x:
- vncipher $out0,$out0,v24
- vncipher $out1,$out1,v24
- vncipher $out2,$out2,v24
- vncipher $out3,$out3,v24
- vncipher $out4,$out4,v24
- vncipher $out5,$out5,v24
- lvx v24,$x20,$key_ # round[3]
- addi $key_,$key_,0x20
-
- vncipher $out0,$out0,v25
- vncipher $out1,$out1,v25
- vncipher $out2,$out2,v25
- vncipher $out3,$out3,v25
- vncipher $out4,$out4,v25
- vncipher $out5,$out5,v25
- lvx v25,$x10,$key_ # round[4]
- bdnz Loop_xts_dec6x
-
- xxlor 32+$eighty7, 1, 1 # 0x010101..87
-
- subic $len,$len,96 # $len-=96
- vxor $in0,$twk0,v31 # xor with last round key
- vncipher $out0,$out0,v24
- vncipher $out1,$out1,v24
- vsrab $tmp,$tweak,$seven # next tweak value
- vxor $twk0,$tweak,$rndkey0
- vaddubm $tweak,$tweak,$tweak
- vncipher $out2,$out2,v24
- vncipher $out3,$out3,v24
- vncipher $out4,$out4,v24
- vncipher $out5,$out5,v24
-
- subfe. r0,r0,r0 # borrow?-1:0
- vand $tmp,$tmp,$eighty7
- vncipher $out0,$out0,v25
- vncipher $out1,$out1,v25
- xxlor 32+$in1, 0, 0
- vpermxor $tweak, $tweak, $tmp, $in1
- vncipher $out2,$out2,v25
- vncipher $out3,$out3,v25
- vxor $in1,$twk1,v31
- vsrab $tmp,$tweak,$seven # next tweak value
- vxor $twk1,$tweak,$rndkey0
- vncipher $out4,$out4,v25
- vncipher $out5,$out5,v25
-
- and r0,r0,$len
- vaddubm $tweak,$tweak,$tweak
- vncipher $out0,$out0,v26
- vncipher $out1,$out1,v26
- vand $tmp,$tmp,$eighty7
- vncipher $out2,$out2,v26
- vncipher $out3,$out3,v26
- xxlor 32+$in2, 0, 0
- vpermxor $tweak, $tweak, $tmp, $in2
- vncipher $out4,$out4,v26
- vncipher $out5,$out5,v26
-
- add $inp,$inp,r0 # $inp is adjusted in such
- # way that at exit from the
- # loop inX-in5 are loaded
- # with last "words"
- vxor $in2,$twk2,v31
- vsrab $tmp,$tweak,$seven # next tweak value
- vxor $twk2,$tweak,$rndkey0
- vaddubm $tweak,$tweak,$tweak
- vncipher $out0,$out0,v27
- vncipher $out1,$out1,v27
- vncipher $out2,$out2,v27
- vncipher $out3,$out3,v27
- vand $tmp,$tmp,$eighty7
- vncipher $out4,$out4,v27
- vncipher $out5,$out5,v27
-
- addi $key_,$sp,$FRAME+15 # rewind $key_
- xxlor 32+$in3, 0, 0
- vpermxor $tweak, $tweak, $tmp, $in3
- vncipher $out0,$out0,v28
- vncipher $out1,$out1,v28
- vxor $in3,$twk3,v31
- vsrab $tmp,$tweak,$seven # next tweak value
- vxor $twk3,$tweak,$rndkey0
- vncipher $out2,$out2,v28
- vncipher $out3,$out3,v28
- vaddubm $tweak,$tweak,$tweak
- vncipher $out4,$out4,v28
- vncipher $out5,$out5,v28
- lvx v24,$x00,$key_ # re-pre-load round[1]
- vand $tmp,$tmp,$eighty7
-
- vncipher $out0,$out0,v29
- vncipher $out1,$out1,v29
- xxlor 32+$in4, 0, 0
- vpermxor $tweak, $tweak, $tmp, $in4
- vncipher $out2,$out2,v29
- vncipher $out3,$out3,v29
- vxor $in4,$twk4,v31
- vsrab $tmp,$tweak,$seven # next tweak value
- vxor $twk4,$tweak,$rndkey0
- vncipher $out4,$out4,v29
- vncipher $out5,$out5,v29
- lvx v25,$x10,$key_ # re-pre-load round[2]
- vaddubm $tweak,$tweak,$tweak
-
- vncipher $out0,$out0,v30
- vncipher $out1,$out1,v30
- vand $tmp,$tmp,$eighty7
- vncipher $out2,$out2,v30
- vncipher $out3,$out3,v30
- xxlor 32+$in5, 0, 0
- vpermxor $tweak, $tweak, $tmp, $in5
- vncipher $out4,$out4,v30
- vncipher $out5,$out5,v30
- vxor $in5,$twk5,v31
- vsrab $tmp,$tweak,$seven # next tweak value
- vxor $twk5,$tweak,$rndkey0
-
- vncipherlast $out0,$out0,$in0
- lvx_u $in0,$x00,$inp # load next input block
- vaddubm $tweak,$tweak,$tweak
- vncipherlast $out1,$out1,$in1
- lvx_u $in1,$x10,$inp
- vncipherlast $out2,$out2,$in2
- le?vperm $in0,$in0,$in0,$leperm
- lvx_u $in2,$x20,$inp
- vand $tmp,$tmp,$eighty7
- vncipherlast $out3,$out3,$in3
- le?vperm $in1,$in1,$in1,$leperm
- lvx_u $in3,$x30,$inp
- vncipherlast $out4,$out4,$in4
- le?vperm $in2,$in2,$in2,$leperm
- lvx_u $in4,$x40,$inp
- xxlor 10, 32+$in0, 32+$in0
- xxlor 32+$in0, 0, 0
- vpermxor $tweak, $tweak, $tmp, $in0
- xxlor 32+$in0, 10, 10
- vncipherlast $out5,$out5,$in5
- le?vperm $in3,$in3,$in3,$leperm
- lvx_u $in5,$x50,$inp
- addi $inp,$inp,0x60
- le?vperm $in4,$in4,$in4,$leperm
- le?vperm $in5,$in5,$in5,$leperm
-
- le?vperm $out0,$out0,$out0,$leperm
- le?vperm $out1,$out1,$out1,$leperm
- stvx_u $out0,$x00,$out # store output
- vxor $out0,$in0,$twk0
- le?vperm $out2,$out2,$out2,$leperm
- stvx_u $out1,$x10,$out
- vxor $out1,$in1,$twk1
- le?vperm $out3,$out3,$out3,$leperm
- stvx_u $out2,$x20,$out
- vxor $out2,$in2,$twk2
- le?vperm $out4,$out4,$out4,$leperm
- stvx_u $out3,$x30,$out
- vxor $out3,$in3,$twk3
- le?vperm $out5,$out5,$out5,$leperm
- stvx_u $out4,$x40,$out
- vxor $out4,$in4,$twk4
- stvx_u $out5,$x50,$out
- vxor $out5,$in5,$twk5
- addi $out,$out,0x60
-
- mtctr $rounds
- beq Loop_xts_dec6x # did $len-=96 borrow?
-
- xxlor 32+$eighty7, 2, 2 # 0x010101..87
-
- addic. $len,$len,0x60
- beq Lxts_dec6x_zero
- cmpwi $len,0x20
- blt Lxts_dec6x_one
- nop
- beq Lxts_dec6x_two
- cmpwi $len,0x40
- blt Lxts_dec6x_three
- nop
- beq Lxts_dec6x_four
-
-Lxts_dec6x_five:
- vxor $out0,$in1,$twk0
- vxor $out1,$in2,$twk1
- vxor $out2,$in3,$twk2
- vxor $out3,$in4,$twk3
- vxor $out4,$in5,$twk4
-
- bl _aesp8_xts_dec5x
-
- le?vperm $out0,$out0,$out0,$leperm
- vmr $twk0,$twk5 # unused tweak
- vxor $twk1,$tweak,$rndkey0
- le?vperm $out1,$out1,$out1,$leperm
- stvx_u $out0,$x00,$out # store output
- vxor $out0,$in0,$twk1
- le?vperm $out2,$out2,$out2,$leperm
- stvx_u $out1,$x10,$out
- le?vperm $out3,$out3,$out3,$leperm
- stvx_u $out2,$x20,$out
- le?vperm $out4,$out4,$out4,$leperm
- stvx_u $out3,$x30,$out
- stvx_u $out4,$x40,$out
- addi $out,$out,0x50
- bne Lxts_dec6x_steal
- b Lxts_dec6x_done
-
-.align 4
-Lxts_dec6x_four:
- vxor $out0,$in2,$twk0
- vxor $out1,$in3,$twk1
- vxor $out2,$in4,$twk2
- vxor $out3,$in5,$twk3
- vxor $out4,$out4,$out4
-
- bl _aesp8_xts_dec5x
-
- le?vperm $out0,$out0,$out0,$leperm
- vmr $twk0,$twk4 # unused tweak
- vmr $twk1,$twk5
- le?vperm $out1,$out1,$out1,$leperm
- stvx_u $out0,$x00,$out # store output
- vxor $out0,$in0,$twk5
- le?vperm $out2,$out2,$out2,$leperm
- stvx_u $out1,$x10,$out
- le?vperm $out3,$out3,$out3,$leperm
- stvx_u $out2,$x20,$out
- stvx_u $out3,$x30,$out
- addi $out,$out,0x40
- bne Lxts_dec6x_steal
- b Lxts_dec6x_done
-
-.align 4
-Lxts_dec6x_three:
- vxor $out0,$in3,$twk0
- vxor $out1,$in4,$twk1
- vxor $out2,$in5,$twk2
- vxor $out3,$out3,$out3
- vxor $out4,$out4,$out4
-
- bl _aesp8_xts_dec5x
-
- le?vperm $out0,$out0,$out0,$leperm
- vmr $twk0,$twk3 # unused tweak
- vmr $twk1,$twk4
- le?vperm $out1,$out1,$out1,$leperm
- stvx_u $out0,$x00,$out # store output
- vxor $out0,$in0,$twk4
- le?vperm $out2,$out2,$out2,$leperm
- stvx_u $out1,$x10,$out
- stvx_u $out2,$x20,$out
- addi $out,$out,0x30
- bne Lxts_dec6x_steal
- b Lxts_dec6x_done
-
-.align 4
-Lxts_dec6x_two:
- vxor $out0,$in4,$twk0
- vxor $out1,$in5,$twk1
- vxor $out2,$out2,$out2
- vxor $out3,$out3,$out3
- vxor $out4,$out4,$out4
-
- bl _aesp8_xts_dec5x
-
- le?vperm $out0,$out0,$out0,$leperm
- vmr $twk0,$twk2 # unused tweak
- vmr $twk1,$twk3
- le?vperm $out1,$out1,$out1,$leperm
- stvx_u $out0,$x00,$out # store output
- vxor $out0,$in0,$twk3
- stvx_u $out1,$x10,$out
- addi $out,$out,0x20
- bne Lxts_dec6x_steal
- b Lxts_dec6x_done
-
-.align 4
-Lxts_dec6x_one:
- vxor $out0,$in5,$twk0
- nop
-Loop_xts_dec1x:
- vncipher $out0,$out0,v24
- lvx v24,$x20,$key_ # round[3]
- addi $key_,$key_,0x20
-
- vncipher $out0,$out0,v25
- lvx v25,$x10,$key_ # round[4]
- bdnz Loop_xts_dec1x
-
- subi r0,$taillen,1
- vncipher $out0,$out0,v24
-
- andi. r0,r0,16
- cmpwi $taillen,0
- vncipher $out0,$out0,v25
-
- sub $inp,$inp,r0
- vncipher $out0,$out0,v26
-
- lvx_u $in0,0,$inp
- vncipher $out0,$out0,v27
-
- addi $key_,$sp,$FRAME+15 # rewind $key_
- vncipher $out0,$out0,v28
- lvx v24,$x00,$key_ # re-pre-load round[1]
-
- vncipher $out0,$out0,v29
- lvx v25,$x10,$key_ # re-pre-load round[2]
- vxor $twk0,$twk0,v31
-
- le?vperm $in0,$in0,$in0,$leperm
- vncipher $out0,$out0,v30
-
- mtctr $rounds
- vncipherlast $out0,$out0,$twk0
-
- vmr $twk0,$twk1 # unused tweak
- vmr $twk1,$twk2
- le?vperm $out0,$out0,$out0,$leperm
- stvx_u $out0,$x00,$out # store output
- addi $out,$out,0x10
- vxor $out0,$in0,$twk2
- bne Lxts_dec6x_steal
- b Lxts_dec6x_done
-
-.align 4
-Lxts_dec6x_zero:
- cmpwi $taillen,0
- beq Lxts_dec6x_done
-
- lvx_u $in0,0,$inp
- le?vperm $in0,$in0,$in0,$leperm
- vxor $out0,$in0,$twk1
-Lxts_dec6x_steal:
- vncipher $out0,$out0,v24
- lvx v24,$x20,$key_ # round[3]
- addi $key_,$key_,0x20
-
- vncipher $out0,$out0,v25
- lvx v25,$x10,$key_ # round[4]
- bdnz Lxts_dec6x_steal
-
- add $inp,$inp,$taillen
- vncipher $out0,$out0,v24
-
- cmpwi $taillen,0
- vncipher $out0,$out0,v25
-
- lvx_u $in0,0,$inp
- vncipher $out0,$out0,v26
-
- lvsr $inpperm,0,$taillen # $in5 is no more
- vncipher $out0,$out0,v27
-
- addi $key_,$sp,$FRAME+15 # rewind $key_
- vncipher $out0,$out0,v28
- lvx v24,$x00,$key_ # re-pre-load round[1]
-
- vncipher $out0,$out0,v29
- lvx v25,$x10,$key_ # re-pre-load round[2]
- vxor $twk1,$twk1,v31
-
- le?vperm $in0,$in0,$in0,$leperm
- vncipher $out0,$out0,v30
-
- vperm $in0,$in0,$in0,$inpperm
- vncipherlast $tmp,$out0,$twk1
-
- le?vperm $out0,$tmp,$tmp,$leperm
- le?stvx_u $out0,0,$out
- be?stvx_u $tmp,0,$out
-
- vxor $out0,$out0,$out0
- vspltisb $out1,-1
- vperm $out0,$out0,$out1,$inpperm
- vsel $out0,$in0,$tmp,$out0
- vxor $out0,$out0,$twk0
-
- subi r30,$out,1
- mtctr $taillen
-Loop_xts_dec6x_steal:
- lbzu r0,1(r30)
- stb r0,16(r30)
- bdnz Loop_xts_dec6x_steal
-
- li $taillen,0
- mtctr $rounds
- b Loop_xts_dec1x # one more time...
-
-.align 4
-Lxts_dec6x_done:
- ${UCMP}i $ivp,0
- beq Lxts_dec6x_ret
-
- vxor $tweak,$twk0,$rndkey0
- le?vperm $tweak,$tweak,$tweak,$leperm
- stvx_u $tweak,0,$ivp
-
-Lxts_dec6x_ret:
- mtlr r11
- li r10,`$FRAME+15`
- li r11,`$FRAME+31`
- stvx $seven,r10,$sp # wipe copies of round keys
- addi r10,r10,32
- stvx $seven,r11,$sp
- addi r11,r11,32
- stvx $seven,r10,$sp
- addi r10,r10,32
- stvx $seven,r11,$sp
- addi r11,r11,32
- stvx $seven,r10,$sp
- addi r10,r10,32
- stvx $seven,r11,$sp
- addi r11,r11,32
- stvx $seven,r10,$sp
- addi r10,r10,32
- stvx $seven,r11,$sp
- addi r11,r11,32
-
- mtspr 256,$vrsave
- lvx v20,r10,$sp # ABI says so
- addi r10,r10,32
- lvx v21,r11,$sp
- addi r11,r11,32
- lvx v22,r10,$sp
- addi r10,r10,32
- lvx v23,r11,$sp
- addi r11,r11,32
- lvx v24,r10,$sp
- addi r10,r10,32
- lvx v25,r11,$sp
- addi r11,r11,32
- lvx v26,r10,$sp
- addi r10,r10,32
- lvx v27,r11,$sp
- addi r11,r11,32
- lvx v28,r10,$sp
- addi r10,r10,32
- lvx v29,r11,$sp
- addi r11,r11,32
- lvx v30,r10,$sp
- lvx v31,r11,$sp
- $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
- $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
- $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
- $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
- $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
- $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
- addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
- blr
- .long 0
- .byte 0,12,0x04,1,0x80,6,6,0
- .long 0
-
-.align 5
-_aesp8_xts_dec5x:
- vncipher $out0,$out0,v24
- vncipher $out1,$out1,v24
- vncipher $out2,$out2,v24
- vncipher $out3,$out3,v24
- vncipher $out4,$out4,v24
- lvx v24,$x20,$key_ # round[3]
- addi $key_,$key_,0x20
-
- vncipher $out0,$out0,v25
- vncipher $out1,$out1,v25
- vncipher $out2,$out2,v25
- vncipher $out3,$out3,v25
- vncipher $out4,$out4,v25
- lvx v25,$x10,$key_ # round[4]
- bdnz _aesp8_xts_dec5x
-
- subi r0,$taillen,1
- vncipher $out0,$out0,v24
- vncipher $out1,$out1,v24
- vncipher $out2,$out2,v24
- vncipher $out3,$out3,v24
- vncipher $out4,$out4,v24
-
- andi. r0,r0,16
- cmpwi $taillen,0
- vncipher $out0,$out0,v25
- vncipher $out1,$out1,v25
- vncipher $out2,$out2,v25
- vncipher $out3,$out3,v25
- vncipher $out4,$out4,v25
- vxor $twk0,$twk0,v31
-
- sub $inp,$inp,r0
- vncipher $out0,$out0,v26
- vncipher $out1,$out1,v26
- vncipher $out2,$out2,v26
- vncipher $out3,$out3,v26
- vncipher $out4,$out4,v26
- vxor $in1,$twk1,v31
-
- vncipher $out0,$out0,v27
- lvx_u $in0,0,$inp
- vncipher $out1,$out1,v27
- vncipher $out2,$out2,v27
- vncipher $out3,$out3,v27
- vncipher $out4,$out4,v27
- vxor $in2,$twk2,v31
-
- addi $key_,$sp,$FRAME+15 # rewind $key_
- vncipher $out0,$out0,v28
- vncipher $out1,$out1,v28
- vncipher $out2,$out2,v28
- vncipher $out3,$out3,v28
- vncipher $out4,$out4,v28
- lvx v24,$x00,$key_ # re-pre-load round[1]
- vxor $in3,$twk3,v31
-
- vncipher $out0,$out0,v29
- le?vperm $in0,$in0,$in0,$leperm
- vncipher $out1,$out1,v29
- vncipher $out2,$out2,v29
- vncipher $out3,$out3,v29
- vncipher $out4,$out4,v29
- lvx v25,$x10,$key_ # re-pre-load round[2]
- vxor $in4,$twk4,v31
-
- vncipher $out0,$out0,v30
- vncipher $out1,$out1,v30
- vncipher $out2,$out2,v30
- vncipher $out3,$out3,v30
- vncipher $out4,$out4,v30
-
- vncipherlast $out0,$out0,$twk0
- vncipherlast $out1,$out1,$in1
- vncipherlast $out2,$out2,$in2
- vncipherlast $out3,$out3,$in3
- vncipherlast $out4,$out4,$in4
- mtctr $rounds
- blr
- .long 0
- .byte 0,12,0x14,0,0,0,0,0
-___
-}} }}}
-
-my $consts=1;
-foreach(split("\n",$code)) {
- s/\`([^\`]*)\`/eval($1)/geo;
-
- # constants table endian-specific conversion
- if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
- my $conv=$3;
- my @bytes=();
-
- # convert to endian-agnostic format
- if ($1 eq "long") {
- foreach (split(/,\s*/,$2)) {
- my $l = /^0/?oct:int;
- push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
- }
- } else {
- @bytes = map(/^0/?oct:int,split(/,\s*/,$2));
- }
-
- # little-endian conversion
- if ($flavour =~ /le$/o) {
- SWITCH: for($conv) {
- /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; };
- /\?rev/ && do { @bytes=reverse(@bytes); last; };
- }
- }
-
- #emit
- print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
- next;
- }
- $consts=0 if (m/Lconsts:/o); # end of table
-
- # instructions prefixed with '?' are endian-specific and need
- # to be adjusted accordingly...
- if ($flavour =~ /le$/o) { # little-endian
- s/le\?//o or
- s/be\?/#be#/o or
- s/\?lvsr/lvsl/o or
- s/\?lvsl/lvsr/o or
- s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
- s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
- s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
- } else { # big-endian
- s/le\?/#le#/o or
- s/be\?//o or
- s/\?([a-z]+)/$1/o;
- }
-
- print $_,"\n";
-}
-
-close STDOUT;
diff --git a/drivers/crypto/vmx/ghash.c b/drivers/crypto/vmx/ghash.c
deleted file mode 100644
index 77eca20bc7ac..000000000000
--- a/drivers/crypto/vmx/ghash.c
+++ /dev/null
@@ -1,185 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GHASH routines supporting VMX instructions on the Power 8
- *
- * Copyright (C) 2015, 2019 International Business Machines Inc.
- *
- * Author: Marcelo Henrique Cerri <mhcerri@br.ibm.com>
- *
- * Extended by Daniel Axtens <dja@axtens.net> to replace the fallback
- * mechanism. The new approach is based on arm64 code, which is:
- * Copyright (C) 2014 - 2018 Linaro Ltd. <ard.biesheuvel@linaro.org>
- */
-
-#include <linux/types.h>
-#include <linux/err.h>
-#include <linux/crypto.h>
-#include <linux/delay.h>
-#include <asm/simd.h>
-#include <asm/switch_to.h>
-#include <crypto/aes.h>
-#include <crypto/ghash.h>
-#include <crypto/scatterwalk.h>
-#include <crypto/internal/hash.h>
-#include <crypto/internal/simd.h>
-#include <crypto/b128ops.h>
-#include "aesp8-ppc.h"
-
-void gcm_init_p8(u128 htable[16], const u64 Xi[2]);
-void gcm_gmult_p8(u64 Xi[2], const u128 htable[16]);
-void gcm_ghash_p8(u64 Xi[2], const u128 htable[16],
- const u8 *in, size_t len);
-
-struct p8_ghash_ctx {
- /* key used by vector asm */
- u128 htable[16];
- /* key used by software fallback */
- be128 key;
-};
-
-struct p8_ghash_desc_ctx {
- u64 shash[2];
- u8 buffer[GHASH_DIGEST_SIZE];
- int bytes;
-};
-
-static int p8_ghash_init(struct shash_desc *desc)
-{
- struct p8_ghash_desc_ctx *dctx = shash_desc_ctx(desc);
-
- dctx->bytes = 0;
- memset(dctx->shash, 0, GHASH_DIGEST_SIZE);
- return 0;
-}
-
-static int p8_ghash_setkey(struct crypto_shash *tfm, const u8 *key,
- unsigned int keylen)
-{
- struct p8_ghash_ctx *ctx = crypto_tfm_ctx(crypto_shash_tfm(tfm));
-
- if (keylen != GHASH_BLOCK_SIZE)
- return -EINVAL;
-
- preempt_disable();
- pagefault_disable();
- enable_kernel_vsx();
- gcm_init_p8(ctx->htable, (const u64 *) key);
- disable_kernel_vsx();
- pagefault_enable();
- preempt_enable();
-
- memcpy(&ctx->key, key, GHASH_BLOCK_SIZE);
-
- return 0;
-}
-
-static inline void __ghash_block(struct p8_ghash_ctx *ctx,
- struct p8_ghash_desc_ctx *dctx)
-{
- if (crypto_simd_usable()) {
- preempt_disable();
- pagefault_disable();
- enable_kernel_vsx();
- gcm_ghash_p8(dctx->shash, ctx->htable,
- dctx->buffer, GHASH_DIGEST_SIZE);
- disable_kernel_vsx();
- pagefault_enable();
- preempt_enable();
- } else {
- crypto_xor((u8 *)dctx->shash, dctx->buffer, GHASH_BLOCK_SIZE);
- gf128mul_lle((be128 *)dctx->shash, &ctx->key);
- }
-}
-
-static inline void __ghash_blocks(struct p8_ghash_ctx *ctx,
- struct p8_ghash_desc_ctx *dctx,
- const u8 *src, unsigned int srclen)
-{
- if (crypto_simd_usable()) {
- preempt_disable();
- pagefault_disable();
- enable_kernel_vsx();
- gcm_ghash_p8(dctx->shash, ctx->htable,
- src, srclen);
- disable_kernel_vsx();
- pagefault_enable();
- preempt_enable();
- } else {
- while (srclen >= GHASH_BLOCK_SIZE) {
- crypto_xor((u8 *)dctx->shash, src, GHASH_BLOCK_SIZE);
- gf128mul_lle((be128 *)dctx->shash, &ctx->key);
- srclen -= GHASH_BLOCK_SIZE;
- src += GHASH_BLOCK_SIZE;
- }
- }
-}
-
-static int p8_ghash_update(struct shash_desc *desc,
- const u8 *src, unsigned int srclen)
-{
- unsigned int len;
- struct p8_ghash_ctx *ctx = crypto_tfm_ctx(crypto_shash_tfm(desc->tfm));
- struct p8_ghash_desc_ctx *dctx = shash_desc_ctx(desc);
-
- if (dctx->bytes) {
- if (dctx->bytes + srclen < GHASH_DIGEST_SIZE) {
- memcpy(dctx->buffer + dctx->bytes, src,
- srclen);
- dctx->bytes += srclen;
- return 0;
- }
- memcpy(dctx->buffer + dctx->bytes, src,
- GHASH_DIGEST_SIZE - dctx->bytes);
-
- __ghash_block(ctx, dctx);
-
- src += GHASH_DIGEST_SIZE - dctx->bytes;
- srclen -= GHASH_DIGEST_SIZE - dctx->bytes;
- dctx->bytes = 0;
- }
- len = srclen & ~(GHASH_DIGEST_SIZE - 1);
- if (len) {
- __ghash_blocks(ctx, dctx, src, len);
- src += len;
- srclen -= len;
- }
- if (srclen) {
- memcpy(dctx->buffer, src, srclen);
- dctx->bytes = srclen;
- }
- return 0;
-}
-
-static int p8_ghash_final(struct shash_desc *desc, u8 *out)
-{
- int i;
- struct p8_ghash_ctx *ctx = crypto_tfm_ctx(crypto_shash_tfm(desc->tfm));
- struct p8_ghash_desc_ctx *dctx = shash_desc_ctx(desc);
-
- if (dctx->bytes) {
- for (i = dctx->bytes; i < GHASH_DIGEST_SIZE; i++)
- dctx->buffer[i] = 0;
- __ghash_block(ctx, dctx);
- dctx->bytes = 0;
- }
- memcpy(out, dctx->shash, GHASH_DIGEST_SIZE);
- return 0;
-}
-
-struct shash_alg p8_ghash_alg = {
- .digestsize = GHASH_DIGEST_SIZE,
- .init = p8_ghash_init,
- .update = p8_ghash_update,
- .final = p8_ghash_final,
- .setkey = p8_ghash_setkey,
- .descsize = sizeof(struct p8_ghash_desc_ctx)
- + sizeof(struct ghash_desc_ctx),
- .base = {
- .cra_name = "ghash",
- .cra_driver_name = "p8_ghash",
- .cra_priority = 1000,
- .cra_blocksize = GHASH_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct p8_ghash_ctx),
- .cra_module = THIS_MODULE,
- },
-};
diff --git a/drivers/crypto/vmx/ghashp8-ppc.pl b/drivers/crypto/vmx/ghashp8-ppc.pl
deleted file mode 100644
index 041e633c214f..000000000000
--- a/drivers/crypto/vmx/ghashp8-ppc.pl
+++ /dev/null
@@ -1,243 +0,0 @@
-#!/usr/bin/env perl
-# SPDX-License-Identifier: GPL-2.0
-
-# This code is taken from the OpenSSL project but the author (Andy Polyakov)
-# has relicensed it under the GPLv2. Therefore this program is free software;
-# you can redistribute it and/or modify it under the terms of the GNU General
-# Public License version 2 as published by the Free Software Foundation.
-#
-# The original headers, including the original license headers, are
-# included below for completeness.
-
-# ====================================================================
-# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
-# project. The module is, however, dual licensed under OpenSSL and
-# CRYPTOGAMS licenses depending on where you obtain it. For further
-# details see https://www.openssl.org/~appro/cryptogams/.
-# ====================================================================
-#
-# GHASH for PowerISA v2.07.
-#
-# July 2014
-#
-# Accurate performance measurements are problematic, because it's
-# always virtualized setup with possibly throttled processor.
-# Relative comparison is therefore more informative. This initial
-# version is ~2.1x slower than hardware-assisted AES-128-CTR, ~12x
-# faster than "4-bit" integer-only compiler-generated 64-bit code.
-# "Initial version" means that there is room for futher improvement.
-
-$flavour=shift;
-$output =shift;
-
-if ($flavour =~ /64/) {
- $SIZE_T=8;
- $LRSAVE=2*$SIZE_T;
- $STU="stdu";
- $POP="ld";
- $PUSH="std";
-} elsif ($flavour =~ /32/) {
- $SIZE_T=4;
- $LRSAVE=$SIZE_T;
- $STU="stwu";
- $POP="lwz";
- $PUSH="stw";
-} else { die "nonsense $flavour"; }
-
-$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
-( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
-( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
-die "can't locate ppc-xlate.pl";
-
-open STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!";
-
-my ($Xip,$Htbl,$inp,$len)=map("r$_",(3..6)); # argument block
-
-my ($Xl,$Xm,$Xh,$IN)=map("v$_",(0..3));
-my ($zero,$t0,$t1,$t2,$xC2,$H,$Hh,$Hl,$lemask)=map("v$_",(4..12));
-my $vrsave="r12";
-
-$code=<<___;
-.machine "any"
-
-.text
-
-.globl .gcm_init_p8
- lis r0,0xfff0
- li r8,0x10
- mfspr $vrsave,256
- li r9,0x20
- mtspr 256,r0
- li r10,0x30
- lvx_u $H,0,r4 # load H
- le?xor r7,r7,r7
- le?addi r7,r7,0x8 # need a vperm start with 08
- le?lvsr 5,0,r7
- le?vspltisb 6,0x0f
- le?vxor 5,5,6 # set a b-endian mask
- le?vperm $H,$H,$H,5
-
- vspltisb $xC2,-16 # 0xf0
- vspltisb $t0,1 # one
- vaddubm $xC2,$xC2,$xC2 # 0xe0
- vxor $zero,$zero,$zero
- vor $xC2,$xC2,$t0 # 0xe1
- vsldoi $xC2,$xC2,$zero,15 # 0xe1...
- vsldoi $t1,$zero,$t0,1 # ...1
- vaddubm $xC2,$xC2,$xC2 # 0xc2...
- vspltisb $t2,7
- vor $xC2,$xC2,$t1 # 0xc2....01
- vspltb $t1,$H,0 # most significant byte
- vsl $H,$H,$t0 # H<<=1
- vsrab $t1,$t1,$t2 # broadcast carry bit
- vand $t1,$t1,$xC2
- vxor $H,$H,$t1 # twisted H
-
- vsldoi $H,$H,$H,8 # twist even more ...
- vsldoi $xC2,$zero,$xC2,8 # 0xc2.0
- vsldoi $Hl,$zero,$H,8 # ... and split
- vsldoi $Hh,$H,$zero,8
-
- stvx_u $xC2,0,r3 # save pre-computed table
- stvx_u $Hl,r8,r3
- stvx_u $H, r9,r3
- stvx_u $Hh,r10,r3
-
- mtspr 256,$vrsave
- blr
- .long 0
- .byte 0,12,0x14,0,0,0,2,0
- .long 0
-.size .gcm_init_p8,.-.gcm_init_p8
-
-.globl .gcm_gmult_p8
- lis r0,0xfff8
- li r8,0x10
- mfspr $vrsave,256
- li r9,0x20
- mtspr 256,r0
- li r10,0x30
- lvx_u $IN,0,$Xip # load Xi
-
- lvx_u $Hl,r8,$Htbl # load pre-computed table
- le?lvsl $lemask,r0,r0
- lvx_u $H, r9,$Htbl
- le?vspltisb $t0,0x07
- lvx_u $Hh,r10,$Htbl
- le?vxor $lemask,$lemask,$t0
- lvx_u $xC2,0,$Htbl
- le?vperm $IN,$IN,$IN,$lemask
- vxor $zero,$zero,$zero
-
- vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo
- vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi
- vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi
-
- vpmsumd $t2,$Xl,$xC2 # 1st phase
-
- vsldoi $t0,$Xm,$zero,8
- vsldoi $t1,$zero,$Xm,8
- vxor $Xl,$Xl,$t0
- vxor $Xh,$Xh,$t1
-
- vsldoi $Xl,$Xl,$Xl,8
- vxor $Xl,$Xl,$t2
-
- vsldoi $t1,$Xl,$Xl,8 # 2nd phase
- vpmsumd $Xl,$Xl,$xC2
- vxor $t1,$t1,$Xh
- vxor $Xl,$Xl,$t1
-
- le?vperm $Xl,$Xl,$Xl,$lemask
- stvx_u $Xl,0,$Xip # write out Xi
-
- mtspr 256,$vrsave
- blr
- .long 0
- .byte 0,12,0x14,0,0,0,2,0
- .long 0
-.size .gcm_gmult_p8,.-.gcm_gmult_p8
-
-.globl .gcm_ghash_p8
- lis r0,0xfff8
- li r8,0x10
- mfspr $vrsave,256
- li r9,0x20
- mtspr 256,r0
- li r10,0x30
- lvx_u $Xl,0,$Xip # load Xi
-
- lvx_u $Hl,r8,$Htbl # load pre-computed table
- le?lvsl $lemask,r0,r0
- lvx_u $H, r9,$Htbl
- le?vspltisb $t0,0x07
- lvx_u $Hh,r10,$Htbl
- le?vxor $lemask,$lemask,$t0
- lvx_u $xC2,0,$Htbl
- le?vperm $Xl,$Xl,$Xl,$lemask
- vxor $zero,$zero,$zero
-
- lvx_u $IN,0,$inp
- addi $inp,$inp,16
- subi $len,$len,16
- le?vperm $IN,$IN,$IN,$lemask
- vxor $IN,$IN,$Xl
- b Loop
-
-.align 5
-Loop:
- subic $len,$len,16
- vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo
- subfe. r0,r0,r0 # borrow?-1:0
- vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi
- and r0,r0,$len
- vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi
- add $inp,$inp,r0
-
- vpmsumd $t2,$Xl,$xC2 # 1st phase
-
- vsldoi $t0,$Xm,$zero,8
- vsldoi $t1,$zero,$Xm,8
- vxor $Xl,$Xl,$t0
- vxor $Xh,$Xh,$t1
-
- vsldoi $Xl,$Xl,$Xl,8
- vxor $Xl,$Xl,$t2
- lvx_u $IN,0,$inp
- addi $inp,$inp,16
-
- vsldoi $t1,$Xl,$Xl,8 # 2nd phase
- vpmsumd $Xl,$Xl,$xC2
- le?vperm $IN,$IN,$IN,$lemask
- vxor $t1,$t1,$Xh
- vxor $IN,$IN,$t1
- vxor $IN,$IN,$Xl
- beq Loop # did $len-=16 borrow?
-
- vxor $Xl,$Xl,$t1
- le?vperm $Xl,$Xl,$Xl,$lemask
- stvx_u $Xl,0,$Xip # write out Xi
-
- mtspr 256,$vrsave
- blr
- .long 0
- .byte 0,12,0x14,0,0,0,4,0
- .long 0
-.size .gcm_ghash_p8,.-.gcm_ghash_p8
-
-.asciz "GHASH for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
-.align 2
-___
-
-foreach (split("\n",$code)) {
- if ($flavour =~ /le$/o) { # little-endian
- s/le\?//o or
- s/be\?/#be#/o;
- } else {
- s/le\?/#le#/o or
- s/be\?//o;
- }
- print $_,"\n";
-}
-
-close STDOUT; # enforce flush
diff --git a/drivers/crypto/vmx/ppc-xlate.pl b/drivers/crypto/vmx/ppc-xlate.pl
deleted file mode 100644
index b583898c11ae..000000000000
--- a/drivers/crypto/vmx/ppc-xlate.pl
+++ /dev/null
@@ -1,231 +0,0 @@
-#!/usr/bin/env perl
-# SPDX-License-Identifier: GPL-2.0
-
-# PowerPC assembler distiller by <appro>.
-
-my $flavour = shift;
-my $output = shift;
-open STDOUT,">$output" || die "can't open $output: $!";
-
-my %GLOBALS;
-my $dotinlocallabels=($flavour=~/linux/)?1:0;
-my $elfv2abi=(($flavour =~ /linux-ppc64le/) or ($flavour =~ /linux-ppc64-elfv2/))?1:0;
-my $dotfunctions=($elfv2abi=~1)?0:1;
-
-################################################################
-# directives which need special treatment on different platforms
-################################################################
-my $globl = sub {
- my $junk = shift;
- my $name = shift;
- my $global = \$GLOBALS{$name};
- my $ret;
-
- $name =~ s|^[\.\_]||;
-
- SWITCH: for ($flavour) {
- /aix/ && do { $name = ".$name";
- last;
- };
- /osx/ && do { $name = "_$name";
- last;
- };
- /linux/
- && do { $ret = "_GLOBAL($name)";
- last;
- };
- }
-
- $ret = ".globl $name\nalign 5\n$name:" if (!$ret);
- $$global = $name;
- $ret;
-};
-my $text = sub {
- my $ret = ($flavour =~ /aix/) ? ".csect\t.text[PR],7" : ".text";
- $ret = ".abiversion 2\n".$ret if ($elfv2abi);
- $ret;
-};
-my $machine = sub {
- my $junk = shift;
- my $arch = shift;
- if ($flavour =~ /osx/)
- { $arch =~ s/\"//g;
- $arch = ($flavour=~/64/) ? "ppc970-64" : "ppc970" if ($arch eq "any");
- }
- ".machine $arch";
-};
-my $size = sub {
- if ($flavour =~ /linux/)
- { shift;
- my $name = shift; $name =~ s|^[\.\_]||;
- my $ret = ".size $name,.-".($dotfunctions?".":"").$name;
- $ret .= "\n.size .$name,.-.$name" if ($dotfunctions);
- $ret;
- }
- else
- { ""; }
-};
-my $asciz = sub {
- shift;
- my $line = join(",",@_);
- if ($line =~ /^"(.*)"$/)
- { ".byte " . join(",",unpack("C*",$1),0) . "\n.align 2"; }
- else
- { ""; }
-};
-my $quad = sub {
- shift;
- my @ret;
- my ($hi,$lo);
- for (@_) {
- if (/^0x([0-9a-f]*?)([0-9a-f]{1,8})$/io)
- { $hi=$1?"0x$1":"0"; $lo="0x$2"; }
- elsif (/^([0-9]+)$/o)
- { $hi=$1>>32; $lo=$1&0xffffffff; } # error-prone with 32-bit perl
- else
- { $hi=undef; $lo=$_; }
-
- if (defined($hi))
- { push(@ret,$flavour=~/le$/o?".long\t$lo,$hi":".long\t$hi,$lo"); }
- else
- { push(@ret,".quad $lo"); }
- }
- join("\n",@ret);
-};
-
-################################################################
-# simplified mnemonics not handled by at least one assembler
-################################################################
-my $cmplw = sub {
- my $f = shift;
- my $cr = 0; $cr = shift if ($#_>1);
- # Some out-of-date 32-bit GNU assembler just can't handle cmplw...
- ($flavour =~ /linux.*32/) ?
- " .long ".sprintf "0x%x",31<<26|$cr<<23|$_[0]<<16|$_[1]<<11|64 :
- " cmplw ".join(',',$cr,@_);
-};
-my $bdnz = sub {
- my $f = shift;
- my $bo = $f=~/[\+\-]/ ? 16+9 : 16; # optional "to be taken" hint
- " bc $bo,0,".shift;
-} if ($flavour!~/linux/);
-my $bltlr = sub {
- my $f = shift;
- my $bo = $f=~/\-/ ? 12+2 : 12; # optional "not to be taken" hint
- ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints
- " .long ".sprintf "0x%x",19<<26|$bo<<21|16<<1 :
- " bclr $bo,0";
-};
-my $bnelr = sub {
- my $f = shift;
- my $bo = $f=~/\-/ ? 4+2 : 4; # optional "not to be taken" hint
- ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints
- " .long ".sprintf "0x%x",19<<26|$bo<<21|2<<16|16<<1 :
- " bclr $bo,2";
-};
-my $beqlr = sub {
- my $f = shift;
- my $bo = $f=~/-/ ? 12+2 : 12; # optional "not to be taken" hint
- ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints
- " .long ".sprintf "0x%X",19<<26|$bo<<21|2<<16|16<<1 :
- " bclr $bo,2";
-};
-# GNU assembler can't handle extrdi rA,rS,16,48, or when sum of last two
-# arguments is 64, with "operand out of range" error.
-my $extrdi = sub {
- my ($f,$ra,$rs,$n,$b) = @_;
- $b = ($b+$n)&63; $n = 64-$n;
- " rldicl $ra,$rs,$b,$n";
-};
-my $vmr = sub {
- my ($f,$vx,$vy) = @_;
- " vor $vx,$vy,$vy";
-};
-
-# Some ABIs specify vrsave, special-purpose register #256, as reserved
-# for system use.
-my $no_vrsave = ($elfv2abi);
-my $mtspr = sub {
- my ($f,$idx,$ra) = @_;
- if ($idx == 256 && $no_vrsave) {
- " or $ra,$ra,$ra";
- } else {
- " mtspr $idx,$ra";
- }
-};
-my $mfspr = sub {
- my ($f,$rd,$idx) = @_;
- if ($idx == 256 && $no_vrsave) {
- " li $rd,-1";
- } else {
- " mfspr $rd,$idx";
- }
-};
-
-# PowerISA 2.06 stuff
-sub vsxmem_op {
- my ($f, $vrt, $ra, $rb, $op) = @_;
- " .long ".sprintf "0x%X",(31<<26)|($vrt<<21)|($ra<<16)|($rb<<11)|($op*2+1);
-}
-# made-up unaligned memory reference AltiVec/VMX instructions
-my $lvx_u = sub { vsxmem_op(@_, 844); }; # lxvd2x
-my $stvx_u = sub { vsxmem_op(@_, 972); }; # stxvd2x
-my $lvdx_u = sub { vsxmem_op(@_, 588); }; # lxsdx
-my $stvdx_u = sub { vsxmem_op(@_, 716); }; # stxsdx
-my $lvx_4w = sub { vsxmem_op(@_, 780); }; # lxvw4x
-my $stvx_4w = sub { vsxmem_op(@_, 908); }; # stxvw4x
-
-# PowerISA 2.07 stuff
-sub vcrypto_op {
- my ($f, $vrt, $vra, $vrb, $op) = @_;
- " .long ".sprintf "0x%X",(4<<26)|($vrt<<21)|($vra<<16)|($vrb<<11)|$op;
-}
-my $vcipher = sub { vcrypto_op(@_, 1288); };
-my $vcipherlast = sub { vcrypto_op(@_, 1289); };
-my $vncipher = sub { vcrypto_op(@_, 1352); };
-my $vncipherlast= sub { vcrypto_op(@_, 1353); };
-my $vsbox = sub { vcrypto_op(@_, 0, 1480); };
-my $vshasigmad = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1730); };
-my $vshasigmaw = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1666); };
-my $vpmsumb = sub { vcrypto_op(@_, 1032); };
-my $vpmsumd = sub { vcrypto_op(@_, 1224); };
-my $vpmsubh = sub { vcrypto_op(@_, 1096); };
-my $vpmsumw = sub { vcrypto_op(@_, 1160); };
-my $vaddudm = sub { vcrypto_op(@_, 192); };
-my $vadduqm = sub { vcrypto_op(@_, 256); };
-
-my $mtsle = sub {
- my ($f, $arg) = @_;
- " .long ".sprintf "0x%X",(31<<26)|($arg<<21)|(147*2);
-};
-
-print "#include <asm/ppc_asm.h>\n" if $flavour =~ /linux/;
-
-while($line=<>) {
-
- $line =~ s|[#!;].*$||; # get rid of asm-style comments...
- $line =~ s|/\*.*\*/||; # ... and C-style comments...
- $line =~ s|^\s+||; # ... and skip white spaces in beginning...
- $line =~ s|\s+$||; # ... and at the end
-
- {
- $line =~ s|\b\.L(\w+)|L$1|g; # common denominator for Locallabel
- $line =~ s|\bL(\w+)|\.L$1|g if ($dotinlocallabels);
- }
-
- {
- $line =~ s|^\s*(\.?)(\w+)([\.\+\-]?)\s*||;
- my $c = $1; $c = "\t" if ($c eq "");
- my $mnemonic = $2;
- my $f = $3;
- my $opcode = eval("\$$mnemonic");
- $line =~ s/\b(c?[rf]|v|vs)([0-9]+)\b/$2/g if ($c ne "." and $flavour !~ /osx/);
- if (ref($opcode) eq 'CODE') { $line = &$opcode($f,split(',',$line)); }
- elsif ($mnemonic) { $line = $c.$mnemonic.$f."\t".$line; }
- }
-
- print $line if ($line);
- print "\n";
-}
-
-close STDOUT;
diff --git a/drivers/crypto/vmx/vmx.c b/drivers/crypto/vmx/vmx.c
deleted file mode 100644
index 7eb713cc87c8..000000000000
--- a/drivers/crypto/vmx/vmx.c
+++ /dev/null
@@ -1,77 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Routines supporting VMX instructions on the Power 8
- *
- * Copyright (C) 2015 International Business Machines Inc.
- *
- * Author: Marcelo Henrique Cerri <mhcerri@br.ibm.com>
- */
-
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/types.h>
-#include <linux/err.h>
-#include <linux/cpufeature.h>
-#include <linux/crypto.h>
-#include <asm/cputable.h>
-#include <crypto/internal/hash.h>
-#include <crypto/internal/skcipher.h>
-
-#include "aesp8-ppc.h"
-
-static int __init p8_init(void)
-{
- int ret;
-
- ret = crypto_register_shash(&p8_ghash_alg);
- if (ret)
- goto err;
-
- ret = crypto_register_alg(&p8_aes_alg);
- if (ret)
- goto err_unregister_ghash;
-
- ret = crypto_register_skcipher(&p8_aes_cbc_alg);
- if (ret)
- goto err_unregister_aes;
-
- ret = crypto_register_skcipher(&p8_aes_ctr_alg);
- if (ret)
- goto err_unregister_aes_cbc;
-
- ret = crypto_register_skcipher(&p8_aes_xts_alg);
- if (ret)
- goto err_unregister_aes_ctr;
-
- return 0;
-
-err_unregister_aes_ctr:
- crypto_unregister_skcipher(&p8_aes_ctr_alg);
-err_unregister_aes_cbc:
- crypto_unregister_skcipher(&p8_aes_cbc_alg);
-err_unregister_aes:
- crypto_unregister_alg(&p8_aes_alg);
-err_unregister_ghash:
- crypto_unregister_shash(&p8_ghash_alg);
-err:
- return ret;
-}
-
-static void __exit p8_exit(void)
-{
- crypto_unregister_skcipher(&p8_aes_xts_alg);
- crypto_unregister_skcipher(&p8_aes_ctr_alg);
- crypto_unregister_skcipher(&p8_aes_cbc_alg);
- crypto_unregister_alg(&p8_aes_alg);
- crypto_unregister_shash(&p8_ghash_alg);
-}
-
-module_cpu_feature_match(PPC_MODULE_FEATURE_VEC_CRYPTO, p8_init);
-module_exit(p8_exit);
-
-MODULE_AUTHOR("Marcelo Cerri<mhcerri@br.ibm.com>");
-MODULE_DESCRIPTION("IBM VMX cryptographic acceleration instructions "
- "support on Power 8");
-MODULE_LICENSE("GPL");
-MODULE_VERSION("1.0.0");
-MODULE_IMPORT_NS(CRYPTO_INTERNAL);
diff --git a/drivers/crypto/xilinx/zynqmp-aes-gcm.c b/drivers/crypto/xilinx/zynqmp-aes-gcm.c
index 3c205324b22b..e61405718840 100644
--- a/drivers/crypto/xilinx/zynqmp-aes-gcm.c
+++ b/drivers/crypto/xilinx/zynqmp-aes-gcm.c
@@ -231,7 +231,10 @@ static int zynqmp_handle_aes_req(struct crypto_engine *engine,
err = zynqmp_aes_aead_cipher(areq);
}
+ local_bh_disable();
crypto_finalize_aead_request(engine, areq, err);
+ local_bh_enable();
+
return 0;
}