diff options
Diffstat (limited to 'drivers/mmc')
-rw-r--r-- | drivers/mmc/host/cavium-octeon.c | 10 | ||||
-rw-r--r-- | drivers/mmc/host/cavium-thunderx.c | 148 | ||||
-rw-r--r-- | drivers/mmc/host/cavium.c | 1023 | ||||
-rw-r--r-- | drivers/mmc/host/cavium.h | 137 |
4 files changed, 1176 insertions, 142 deletions
diff --git a/drivers/mmc/host/cavium-octeon.c b/drivers/mmc/host/cavium-octeon.c index 22aded1065ae..302c052b8d93 100644 --- a/drivers/mmc/host/cavium-octeon.c +++ b/drivers/mmc/host/cavium-octeon.c @@ -247,8 +247,8 @@ static int octeon_mmc_probe(struct platform_device *pdev) /* Only CMD_DONE, DMA_DONE, CMD_ERR, DMA_ERR */ for (i = 1; i <= 4; i++) { ret = devm_request_irq(&pdev->dev, mmc_irq[i], - cvm_mmc_interrupt, - 0, cvm_mmc_irq_names[i], host); + cvm_mmc_interrupt, IRQF_NO_THREAD, + cvm_mmc_irq_names[i], host); if (ret < 0) { dev_err(&pdev->dev, "Error: devm_request_irq %d\n", mmc_irq[i]); @@ -257,8 +257,8 @@ static int octeon_mmc_probe(struct platform_device *pdev) } } else { ret = devm_request_irq(&pdev->dev, mmc_irq[0], - cvm_mmc_interrupt, 0, KBUILD_MODNAME, - host); + cvm_mmc_interrupt, IRQF_NO_THREAD, + KBUILD_MODNAME, host); if (ret < 0) { dev_err(&pdev->dev, "Error: devm_request_irq %d\n", mmc_irq[0]); @@ -277,7 +277,7 @@ static int octeon_mmc_probe(struct platform_device *pdev) platform_set_drvdata(pdev, host); i = 0; - for_each_child_of_node(node, cn) { + for_each_available_child_of_node(node, cn) { host->slot_pdev[i] = of_platform_device_create(cn, NULL, &pdev->dev); if (!host->slot_pdev[i]) { diff --git a/drivers/mmc/host/cavium-thunderx.c b/drivers/mmc/host/cavium-thunderx.c index eee08d81b242..7fbf33e34217 100644 --- a/drivers/mmc/host/cavium-thunderx.c +++ b/drivers/mmc/host/cavium-thunderx.c @@ -15,6 +15,8 @@ #include <linux/of.h> #include <linux/of_platform.h> #include <linux/pci.h> +#include <linux/delay.h> +#include <linux/bitfield.h> #include "cavium.h" static void thunder_mmc_acquire_bus(struct cvm_mmc_host *host) @@ -31,6 +33,8 @@ static void thunder_mmc_int_enable(struct cvm_mmc_host *host, u64 val) { writeq(val, host->base + MIO_EMM_INT(host)); writeq(val, host->base + MIO_EMM_INT_EN_SET(host)); + writeq(MIO_EMM_DMA_INT_DMA, + host->dma_base + MIO_EMM_DMA_INT(host)); } static int thunder_mmc_register_interrupts(struct cvm_mmc_host *host, @@ -45,14 +49,125 @@ static int thunder_mmc_register_interrupts(struct cvm_mmc_host *host, /* register interrupts */ for (i = 0; i < nvec; i++) { ret = devm_request_irq(&pdev->dev, pci_irq_vector(pdev, i), - cvm_mmc_interrupt, - 0, cvm_mmc_irq_names[i], host); + cvm_mmc_interrupt, IRQF_NO_THREAD, + cvm_mmc_irq_names[i], host); if (ret) return ret; } return 0; } +/* calibration evaluates the per tap delay */ +static void thunder_calibrate_mmc(struct cvm_mmc_host *host) +{ + u32 retries = 10; + u32 delay = 4; + unsigned int ps; + const char *how = "default"; + + if (is_mmc_8xxx(host)) + return; + + /* set _DEBUG[CLK_ON]=1 as workaround for clock issue */ + if (is_mmc_otx2_A0(host) || is_mmc_95xx(host)) + writeq(1, host->base + MIO_EMM_DEBUG(host)); + + if (is_mmc_otx2_A0(host)) { + /* + * Operation of up to 100 MHz may be achieved by skipping the + * steps that establish the tap delays and instead assuming + * that MIO_EMM_TAP[DELAY] returns 0x4 indicating 78 pS/tap. + */ + } else { + u64 tap; + u64 emm_cfg = readq(host->base + MIO_EMM_CFG(host)); + u64 tcfg; + u64 emm_io_ctl; + u64 emm_switch; + u64 emm_wdog; + u64 emm_sts_mask; + u64 emm_debug; + u64 emm_timing; + u64 emm_rca; + + /* + * MIO_EMM_CFG[BUS_ENA] must be zero for calibration, + * but that resets whole host, so save state. + */ + emm_io_ctl = readq(host->base + MIO_EMM_IO_CTL(host)); + emm_switch = readq(host->base + MIO_EMM_SWITCH(host)); + emm_wdog = readq(host->base + MIO_EMM_WDOG(host)); + emm_sts_mask = + readq(host->base + MIO_EMM_STS_MASK(host)); + emm_debug = readq(host->base + MIO_EMM_DEBUG(host)); + emm_timing = readq(host->base + MIO_EMM_TIMING(host)); + emm_rca = readq(host->base + MIO_EMM_RCA(host)); + + /* reset controller */ + tcfg = emm_cfg; + tcfg &= ~MIO_EMM_CFG_BUS_ENA; + writeq(tcfg, host->base + MIO_EMM_CFG(host)); + udelay(1); + + /* restart with phantom slot 3 */ + tcfg |= FIELD_PREP(MIO_EMM_CFG_BUS_ENA, 1ull << 3); + writeq(tcfg, host->base + MIO_EMM_CFG(host)); + mdelay(1); + + /* Start calibration */ + writeq(0, host->base + MIO_EMM_CALB(host)); + udelay(5); + writeq(START_CALIBRATION, host->base + MIO_EMM_CALB(host)); + udelay(5); + + do { + /* wait for approximately 300 coprocessor clock */ + udelay(5); + tap = readq(host->base + MIO_EMM_TAP(host)); + } while (!tap && retries--); + + /* leave calibration mode */ + writeq(0, host->base + MIO_EMM_CALB(host)); + udelay(5); + + if (retries <= 0 || !tap) { + how = "fallback"; + } else { + /* calculate the per-tap delay */ + delay = tap & MIO_EMM_TAP_DELAY; + how = "calibrated"; + } + + /* restore old state */ + writeq(emm_cfg, host->base + MIO_EMM_CFG(host)); + mdelay(1); + writeq(emm_rca, host->base + MIO_EMM_RCA(host)); + writeq(emm_timing, host->base + MIO_EMM_TIMING(host)); + writeq(emm_debug, host->base + MIO_EMM_DEBUG(host)); + writeq(emm_sts_mask, + host->base + MIO_EMM_STS_MASK(host)); + writeq(emm_wdog, host->base + MIO_EMM_WDOG(host)); + writeq(emm_switch, host->base + MIO_EMM_SWITCH(host)); + writeq(emm_io_ctl, host->base + MIO_EMM_IO_CTL(host)); + mdelay(1); + + } + + /* + * Scale measured/guessed calibration value to pS: + * The delay value should be multiplied by 10 ns(or 10000 ps) + * and then divided by no of taps to determine the estimated + * delay in pico second. The nominal value is 125 ps per tap. + */ + ps = (delay * PS_10000) / TOTAL_NO_OF_TAPS; + if (host->per_tap_delay != ps) { + dev_info(host->dev, "%s delay:%d per_tap_delay:%dpS\n", + how, delay, ps); + host->per_tap_delay = ps; + host->delay_logged = 0; + } +} + static int thunder_mmc_probe(struct pci_dev *pdev, const struct pci_device_id *id) { @@ -81,6 +196,7 @@ static int thunder_mmc_probe(struct pci_dev *pdev, /* On ThunderX these are identical */ host->dma_base = host->base; + host->pdev = pdev; host->reg_off = 0x2000; host->reg_off_dma = 0x160; @@ -107,24 +223,32 @@ static int thunder_mmc_probe(struct pci_dev *pdev, host->need_irq_handler_lock = true; host->last_slot = -1; - ret = dma_set_mask(dev, DMA_BIT_MASK(48)); if (ret) goto error; /* * Clear out any pending interrupts that may be left over from * bootloader. Writing 1 to the bits clears them. + * Clear DMA FIFO after IRQ disable, then stub any dangling events */ - writeq(127, host->base + MIO_EMM_INT_EN(host)); - writeq(3, host->base + MIO_EMM_DMA_INT_ENA_W1C(host)); - /* Clear DMA FIFO */ - writeq(BIT_ULL(16), host->base + MIO_EMM_DMA_FIFO_CFG(host)); + writeq(~0, host->base + MIO_EMM_INT(host)); + writeq(~0, host->dma_base + MIO_EMM_DMA_INT_ENA_W1C(host)); + writeq(~0, host->base + MIO_EMM_INT_EN_CLR(host)); + writeq(MIO_EMM_DMA_FIFO_CFG_CLR, + host->dma_base + MIO_EMM_DMA_FIFO_CFG(host)); + writeq(~0, host->dma_base + MIO_EMM_DMA_INT(host)); ret = thunder_mmc_register_interrupts(host, pdev); if (ret) goto error; - for_each_child_of_node(node, child_node) { + /* Run the calibration to calculate per tap delay that would be + * used to evaluate values. These values would be programmed in + * MIO_EMM_TIMING. + */ + thunder_calibrate_mmc(host); + + for_each_available_child_of_node(node, child_node) { /* * mmc_of_parse and devm* require one device per slot. * Create a dummy device per slot and set the node pointer to @@ -137,12 +261,15 @@ static int thunder_mmc_probe(struct pci_dev *pdev, if (!host->slot_pdev[i]) continue; + dev_info(dev, "Probing slot %d\n", i); + ret = cvm_mmc_of_slot_probe(&host->slot_pdev[i]->dev, host); if (ret) goto error; } i++; } + dev_info(dev, "probed\n"); return 0; @@ -171,8 +298,11 @@ static void thunder_mmc_remove(struct pci_dev *pdev) cvm_mmc_of_slot_remove(host->slot[i]); dma_cfg = readq(host->dma_base + MIO_EMM_DMA_CFG(host)); - dma_cfg &= ~MIO_EMM_DMA_CFG_EN; + dma_cfg |= MIO_EMM_DMA_CFG_CLR; writeq(dma_cfg, host->dma_base + MIO_EMM_DMA_CFG(host)); + do { + dma_cfg = readq(host->dma_base + MIO_EMM_DMA_CFG(host)); + } while (dma_cfg & MIO_EMM_DMA_CFG_EN); clk_disable_unprepare(host->clk); } diff --git a/drivers/mmc/host/cavium.c b/drivers/mmc/host/cavium.c index 89deb451e0ac..5005efd113ee 100644 --- a/drivers/mmc/host/cavium.c +++ b/drivers/mmc/host/cavium.c @@ -25,6 +25,8 @@ #include <linux/regulator/consumer.h> #include <linux/scatterlist.h> #include <linux/time.h> +#include <linux/iommu.h> +#include <linux/swiotlb.h> #include "cavium.h" @@ -38,6 +40,8 @@ const char *cvm_mmc_irq_names[] = { "MMC Switch Error", "MMC DMA int Fifo", "MMC DMA int", + "MMC NCB Fault", + "MMC RAS", }; /* @@ -71,7 +75,7 @@ static struct cvm_mmc_cr_type cvm_mmc_cr_types[] = { {0, 1}, /* CMD16 */ {1, 1}, /* CMD17 */ {1, 1}, /* CMD18 */ - {3, 1}, /* CMD19 */ + {2, 1}, /* CMD19 */ {2, 1}, /* CMD20 */ {0, 0}, /* CMD21 */ {0, 0}, /* CMD22 */ @@ -118,6 +122,156 @@ static struct cvm_mmc_cr_type cvm_mmc_cr_types[] = { {0, 0} /* CMD63 */ }; +static int tapdance; +module_param(tapdance, int, 0644); +MODULE_PARM_DESC(tapdance, "adjust bus-timing: (0=mid-eye, positive=Nth_fastest_tap)"); + +static int clk_scale = 100; +module_param(clk_scale, int, 0644); +MODULE_PARM_DESC(clk_scale, "percent scale data_/cmd_out taps (default 100)"); + +static bool fixed_timing; +module_param(fixed_timing, bool, 0444); +MODULE_PARM_DESC(fixed_timing, "use fixed data_/cmd_out taps"); + +static bool ddr_cmd_taps; +module_param(ddr_cmd_taps, bool, 0644); +MODULE_PARM_DESC(ddr_cmd_taps, "reduce cmd_out_taps in DDR modes, as before"); + +static bool cvm_is_mmc_timing_ddr(struct cvm_mmc_slot *slot) +{ + if ((slot->mmc->ios.timing == MMC_TIMING_UHS_DDR50) || + (slot->mmc->ios.timing == MMC_TIMING_MMC_DDR52) || + (slot->mmc->ios.timing == MMC_TIMING_MMC_HS400)) + return true; + else + return false; +} + +static void cvm_mmc_set_timing(struct cvm_mmc_slot *slot) +{ + if (!is_mmc_otx2(slot->host)) + return; + + writeq(slot->taps, slot->host->base + MIO_EMM_TIMING(slot->host)); +} + +static int tout(struct cvm_mmc_slot *slot, int ps, int hint) +{ + struct cvm_mmc_host *host = slot->host; + struct mmc_host *mmc = slot->mmc; + int tap_ps = host->per_tap_delay; + int timing = mmc->ios.timing; + static int old_scale; + int taps; + + if (fixed_timing) + return hint; + + if (!hint) + hint = 63; + + if (!tap_ps) + return hint; + + taps = min((int)(ps * clk_scale) / (tap_ps * 100), 63); + + /* when modparam is adjusted, re-announce timing */ + if (old_scale != clk_scale) { + host->delay_logged = 0; + old_scale = clk_scale; + } + + if (!test_and_set_bit(timing, + &host->delay_logged)) + dev_info(host->dev, "mmc%d.ios_timing:%d %dpS hint:%d taps:%d\n", + mmc->index, timing, ps, hint, taps); + + return taps; +} + +static int cvm_mmc_configure_delay(struct cvm_mmc_slot *slot) +{ + struct cvm_mmc_host *host = slot->host; + struct mmc_host *mmc = slot->mmc; + + pr_debug("slot%d.configure_delay\n", slot->bus_id); + + if (is_mmc_8xxx(host)) { + /* MIO_EMM_SAMPLE is till T83XX */ + u64 emm_sample = + FIELD_PREP(MIO_EMM_SAMPLE_CMD_CNT, slot->cmd_cnt) | + FIELD_PREP(MIO_EMM_SAMPLE_DAT_CNT, slot->data_cnt); + writeq(emm_sample, host->base + MIO_EMM_SAMPLE(host)); + } else { + int half = MAX_NO_OF_TAPS / 2; + int cin = FIELD_GET(MIO_EMM_TIMING_CMD_IN, slot->taps); + int din = FIELD_GET(MIO_EMM_TIMING_DATA_IN, slot->taps); + int cout, dout; + + if (!slot->taps) + cin = din = half; + /* + * EMM_CMD hold time from rising edge of EMMC_CLK. + * Typically 3.0 ns at frequencies < 26 MHz. + * Typically 3.0 ns at frequencies <= 52 MHz SDR. + * Typically 2.5 ns at frequencies <= 52 MHz DDR. + * Typically 0.8 ns at frequencies > 52 MHz SDR. + * Typically 0.4 ns at frequencies > 52 MHz DDR. + */ + switch (mmc->ios.timing) { + case MMC_TIMING_LEGACY: + default: + if (mmc->card && mmc_card_mmc(mmc->card)) + cout = tout(slot, 5000, 39); + else + cout = tout(slot, 8000, 63); + break; + case MMC_TIMING_UHS_SDR12: + cout = tout(slot, 3000, 39); + break; + case MMC_TIMING_MMC_HS: + cout = tout(slot, 2500, 32); + break; + case MMC_TIMING_SD_HS: + case MMC_TIMING_UHS_SDR25: + case MMC_TIMING_UHS_SDR50: + cout = tout(slot, 2000, 26); + break; + case MMC_TIMING_UHS_DDR50: + case MMC_TIMING_MMC_DDR52: + cout = tout(slot, 1500, 20); + break; + case MMC_TIMING_UHS_SDR104: + case MMC_TIMING_MMC_HS200: + case MMC_TIMING_MMC_HS400: + cout = tout(slot, 800, 10); + break; + } + + if (!is_mmc_95xx(host)) { + if (!cvm_is_mmc_timing_ddr(slot)) + dout = cout; + else if (ddr_cmd_taps) + cout = dout = cout / 2; + else + dout = cout / 2; + } else + dout = tout(slot, 800, 10); + + slot->taps = + FIELD_PREP(MIO_EMM_TIMING_CMD_IN, cin) | + FIELD_PREP(MIO_EMM_TIMING_CMD_OUT, cout) | + FIELD_PREP(MIO_EMM_TIMING_DATA_IN, din) | + FIELD_PREP(MIO_EMM_TIMING_DATA_OUT, dout); + + pr_debug("slot%d.taps %llx\n", slot->bus_id, slot->taps); + cvm_mmc_set_timing(slot); + } + + return 0; +} + static struct cvm_mmc_cr_mods cvm_mmc_get_cr_mods(struct mmc_command *cmd) { struct cvm_mmc_cr_type *cr; @@ -175,14 +329,14 @@ static void check_switch_errors(struct cvm_mmc_host *host) dev_err(host->dev, "Switch bus width error\n"); } -static void clear_bus_id(u64 *reg) +static inline void clear_bus_id(u64 *reg) { u64 bus_id_mask = GENMASK_ULL(61, 60); *reg &= ~bus_id_mask; } -static void set_bus_id(u64 *reg, int bus_id) +static inline void set_bus_id(u64 *reg, int bus_id) { clear_bus_id(reg); *reg |= FIELD_PREP(GENMASK(61, 60), bus_id); @@ -193,25 +347,69 @@ static int get_bus_id(u64 reg) return FIELD_GET(GENMASK_ULL(61, 60), reg); } -/* - * We never set the switch_exe bit since that would interfere - * with the commands send by the MMC core. - */ -static void do_switch(struct cvm_mmc_host *host, u64 emm_switch) +/* save old slot details, switch power */ +static bool pre_switch(struct cvm_mmc_host *host, u64 emm_switch) { - int retries = 100; - u64 rsp_sts; - int bus_id; + int bus_id = get_bus_id(emm_switch); + struct cvm_mmc_slot *slot = host->slot[bus_id]; + struct cvm_mmc_slot *old_slot; + bool same_vqmmc = false; - /* - * Modes setting only taken from slot 0. Work around that hardware - * issue by first switching to slot 0. + if (host->last_slot == bus_id) + return false; + + /* when VQMMC is switched, tri-state CMDn over any slot change + * to avoid transient states on D0-7 or CLK from level-shifters */ - bus_id = get_bus_id(emm_switch); - clear_bus_id(&emm_switch); - writeq(emm_switch, host->base + MIO_EMM_SWITCH(host)); + if (host->use_vqmmc) { + writeq(1ull << 3, host->base + MIO_EMM_CFG(host)); + udelay(10); + } + + if (host->last_slot >= 0 && host->slot[host->last_slot]) { + old_slot = host->slot[host->last_slot]; + old_slot->cached_switch = + readq(host->base + MIO_EMM_SWITCH(host)); + old_slot->cached_rca = readq(host->base + MIO_EMM_RCA(host)); + + same_vqmmc = (slot->mmc->supply.vqmmc == + old_slot->mmc->supply.vqmmc); + if (!same_vqmmc && !IS_ERR_OR_NULL(old_slot->mmc->supply.vqmmc)) + regulator_disable(old_slot->mmc->supply.vqmmc); + } + + if (!same_vqmmc && !IS_ERR_OR_NULL(slot->mmc->supply.vqmmc)) { + int e = regulator_enable(slot->mmc->supply.vqmmc); + + if (e) + dev_err(host->dev, "mmc-slot@%d.vqmmc err %d\n", + bus_id, e); + } + + host->last_slot = slot->bus_id; + + return true; +} + +static void post_switch(struct cvm_mmc_host *host, u64 emm_switch) +{ + int bus_id = get_bus_id(emm_switch); + struct cvm_mmc_slot *slot = host->slot[bus_id]; + + if (host->use_vqmmc) { + /* enable new CMDn */ + writeq(1ull << bus_id, host->base + MIO_EMM_CFG(host)); + udelay(10); + } + + writeq(slot->cached_rca, host->base + MIO_EMM_RCA(host)); +} + +static inline void mode_switch(struct cvm_mmc_host *host, u64 emm_switch) +{ + u64 rsp_sts; + int retries = 100; - set_bus_id(&emm_switch, bus_id); writeq(emm_switch, host->base + MIO_EMM_SWITCH(host)); /* wait for the switch to finish */ @@ -221,15 +419,49 @@ static void do_switch(struct cvm_mmc_host *host, u64 emm_switch) break; udelay(10); } while (--retries); +} + +/* + * We never set the switch_exe bit since that would interfere + * with the commands send by the MMC core. + */ +static void do_switch(struct cvm_mmc_host *host, u64 emm_switch) +{ + int bus_id = get_bus_id(emm_switch); + struct cvm_mmc_slot *slot = host->slot[bus_id]; + bool slot_changed = pre_switch(host, emm_switch); + + /* + * Modes setting only taken from slot 0. Work around that hardware + * issue by first switching to slot 0. + */ + if (bus_id) { + u64 switch0 = emm_switch; + + clear_bus_id(&switch0); + mode_switch(host, switch0); + } + + mode_switch(host, emm_switch); check_switch_errors(host); + if (slot_changed) + post_switch(host, emm_switch); + slot->cached_switch = emm_switch; + if (emm_switch & MIO_EMM_SWITCH_CLK) + slot->cmd6_pending = false; } +/* need to change hardware state to match software requirements? */ static bool switch_val_changed(struct cvm_mmc_slot *slot, u64 new_val) { /* Match BUS_ID, HS_TIMING, BUS_WIDTH, POWER_CLASS, CLK_HI, CLK_LO */ - u64 match = 0x3001070fffffffffull; + /* For 9xxx add HS200_TIMING and HS400_TIMING */ + u64 match = (is_mmc_otx2(slot->host)) ? + 0x3007070fffffffffull : 0x3001070fffffffffull; + if (!slot->host->powered) + return true; return (slot->cached_switch & match) != (new_val & match); } @@ -247,58 +479,62 @@ static void set_wdog(struct cvm_mmc_slot *slot, unsigned int ns) writeq(timeout, slot->host->base + MIO_EMM_WDOG(slot->host)); } +static void emmc_io_drive_setup(struct cvm_mmc_slot *slot) +{ + u64 ioctl_cfg; + struct cvm_mmc_host *host = slot->host; + + /* Setup drive and slew only for 9x */ + if (is_mmc_otx2(host)) { + if ((slot->drive < 0) || (slot->slew < 0)) + return; + /* Setup the emmc interface current drive + * strength & clk slew rate. + */ + ioctl_cfg = FIELD_PREP(MIO_EMM_IO_CTL_DRIVE, slot->drive) | + FIELD_PREP(MIO_EMM_IO_CTL_SLEW, slot->slew); + writeq(ioctl_cfg, host->base + MIO_EMM_IO_CTL(host)); + } +} + static void cvm_mmc_reset_bus(struct cvm_mmc_slot *slot) { struct cvm_mmc_host *host = slot->host; u64 emm_switch, wdog; - emm_switch = readq(slot->host->base + MIO_EMM_SWITCH(host)); - emm_switch &= ~(MIO_EMM_SWITCH_EXE | MIO_EMM_SWITCH_ERR0 | - MIO_EMM_SWITCH_ERR1 | MIO_EMM_SWITCH_ERR2); + emm_switch = readq(host->base + MIO_EMM_SWITCH(host)); + emm_switch &= ~(MIO_EMM_SWITCH_EXE | MIO_EMM_SWITCH_ERRS); set_bus_id(&emm_switch, slot->bus_id); - wdog = readq(slot->host->base + MIO_EMM_WDOG(host)); - do_switch(slot->host, emm_switch); - - slot->cached_switch = emm_switch; + wdog = readq(host->base + MIO_EMM_WDOG(host)); + do_switch(host, emm_switch); + host->powered = true; msleep(20); - writeq(wdog, slot->host->base + MIO_EMM_WDOG(host)); + writeq(wdog, host->base + MIO_EMM_WDOG(host)); } /* Switch to another slot if needed */ static void cvm_mmc_switch_to(struct cvm_mmc_slot *slot) { struct cvm_mmc_host *host = slot->host; - struct cvm_mmc_slot *old_slot; - u64 emm_sample, emm_switch; if (slot->bus_id == host->last_slot) return; - if (host->last_slot >= 0 && host->slot[host->last_slot]) { - old_slot = host->slot[host->last_slot]; - old_slot->cached_switch = readq(host->base + MIO_EMM_SWITCH(host)); - old_slot->cached_rca = readq(host->base + MIO_EMM_RCA(host)); - } - - writeq(slot->cached_rca, host->base + MIO_EMM_RCA(host)); - emm_switch = slot->cached_switch; - set_bus_id(&emm_switch, slot->bus_id); - do_switch(host, emm_switch); - - emm_sample = FIELD_PREP(MIO_EMM_SAMPLE_CMD_CNT, slot->cmd_cnt) | - FIELD_PREP(MIO_EMM_SAMPLE_DAT_CNT, slot->dat_cnt); - writeq(emm_sample, host->base + MIO_EMM_SAMPLE(host)); + do_switch(host, slot->cached_switch); + host->powered = true; - host->last_slot = slot->bus_id; + emmc_io_drive_setup(slot); + cvm_mmc_configure_delay(slot); } -static void do_read(struct cvm_mmc_host *host, struct mmc_request *req, +static void do_read(struct cvm_mmc_slot *slot, struct mmc_request *req, u64 dbuf) { - struct sg_mapping_iter *smi = &host->smi; + struct cvm_mmc_host *host = slot->host; + struct sg_mapping_iter *smi = &slot->smi; int data_len = req->data->blocks * req->data->blksz; int bytes_xfered, shift = -1; u64 dat = 0; @@ -365,7 +601,7 @@ static void set_cmd_response(struct cvm_mmc_host *host, struct mmc_request *req, } } -static int get_dma_dir(struct mmc_data *data) +static inline int get_dma_dir(struct mmc_data *data) { return (data->flags & MMC_DATA_WRITE) ? DMA_TO_DEVICE : DMA_FROM_DEVICE; } @@ -374,6 +610,9 @@ static int finish_dma_single(struct cvm_mmc_host *host, struct mmc_data *data) { data->bytes_xfered = data->blocks * data->blksz; data->error = 0; + + writeq(MIO_EMM_DMA_FIFO_CFG_CLR, + host->dma_base + MIO_EMM_DMA_FIFO_CFG(host)); dma_unmap_sg(host->dev, data->sg, data->sg_len, get_dma_dir(data)); return 1; } @@ -382,6 +621,7 @@ static int finish_dma_sg(struct cvm_mmc_host *host, struct mmc_data *data) { u64 fifo_cfg; int count; + void __iomem *dma_intp = host->dma_base + MIO_EMM_DMA_INT(host); /* Check if there are any pending requests left */ fifo_cfg = readq(host->dma_base + MIO_EMM_DMA_FIFO_CFG(host)); @@ -392,8 +632,16 @@ static int finish_dma_sg(struct cvm_mmc_host *host, struct mmc_data *data) data->bytes_xfered = data->blocks * data->blksz; data->error = 0; - /* Clear and disable FIFO */ - writeq(BIT_ULL(16), host->dma_base + MIO_EMM_DMA_FIFO_CFG(host)); + writeq(MIO_EMM_DMA_FIFO_CFG_CLR, + host->dma_base + MIO_EMM_DMA_FIFO_CFG(host)); + + /* on read, wait for internal buffer to flush out to mem */ + if (get_dma_dir(data) == DMA_FROM_DEVICE) { + while (!(readq(dma_intp) & MIO_EMM_DMA_INT_DMA)) + udelay(10); + writeq(MIO_EMM_DMA_INT_DMA, dma_intp); + } + dma_unmap_sg(host->dev, data->sg, data->sg_len, get_dma_dir(data)); return 1; } @@ -415,7 +663,8 @@ static int check_status(u64 rsp_sts) if (rsp_sts & MIO_EMM_RSP_STS_RSP_TIMEOUT || rsp_sts & MIO_EMM_RSP_STS_BLK_TIMEOUT) return -ETIMEDOUT; - if (rsp_sts & MIO_EMM_RSP_STS_DBUF_ERR) + if (rsp_sts & MIO_EMM_RSP_STS_DBUF_ERR || + rsp_sts & MIO_EMM_RSP_STS_BLK_CRC_ERR) return -EIO; return 0; } @@ -435,16 +684,24 @@ static void cleanup_dma(struct cvm_mmc_host *host, u64 rsp_sts) irqreturn_t cvm_mmc_interrupt(int irq, void *dev_id) { struct cvm_mmc_host *host = dev_id; - struct mmc_request *req; + struct mmc_request *req = NULL; + struct cvm_mmc_slot *slot = NULL; unsigned long flags = 0; u64 emm_int, rsp_sts; bool host_done; + int bus_id; if (host->need_irq_handler_lock) spin_lock_irqsave(&host->irq_handler_lock, flags); else __acquire(&host->irq_handler_lock); + rsp_sts = readq(host->base + MIO_EMM_RSP_STS(host)); + bus_id = get_bus_id(rsp_sts); + slot = host->slot[bus_id]; + if (slot) + req = slot->current_req; + /* Clear interrupt bits (write 1 clears ). */ emm_int = readq(host->base + MIO_EMM_INT(host)); writeq(emm_int, host->base + MIO_EMM_INT(host)); @@ -452,25 +709,32 @@ irqreturn_t cvm_mmc_interrupt(int irq, void *dev_id) if (emm_int & MIO_EMM_INT_SWITCH_ERR) check_switch_errors(host); - req = host->current_req; if (!req) goto out; - rsp_sts = readq(host->base + MIO_EMM_RSP_STS(host)); + /* + * dma_pend means DMA has stalled with CRC errs. + * start teardown, get irq on completion, mmc stack retries. + */ + if ((rsp_sts & MIO_EMM_RSP_STS_DMA_PEND) && slot->dma_active) { + cleanup_dma(host, rsp_sts); + goto out; + } + /* * dma_val set means DMA is still in progress. Don't touch * the request and wait for the interrupt indicating that * the DMA is finished. */ - if ((rsp_sts & MIO_EMM_RSP_STS_DMA_VAL) && host->dma_active) + if ((rsp_sts & MIO_EMM_RSP_STS_DMA_VAL) && slot->dma_active) goto out; - if (!host->dma_active && req->data && + if (!slot->dma_active && req->data && (emm_int & MIO_EMM_INT_BUF_DONE)) { unsigned int type = (rsp_sts >> 7) & 3; if (type == 1) - do_read(host, req, rsp_sts & MIO_EMM_RSP_STS_DBUF); + do_read(slot, req, rsp_sts & MIO_EMM_RSP_STS_DBUF); else if (type == 2) do_write(req); } @@ -480,12 +744,16 @@ irqreturn_t cvm_mmc_interrupt(int irq, void *dev_id) emm_int & MIO_EMM_INT_CMD_ERR || emm_int & MIO_EMM_INT_DMA_ERR; + /* Add NCB_FLT interrupt for octtx2 */ + if (is_mmc_otx2(host)) + host_done = host_done || emm_int & MIO_EMM_INT_NCB_FLT; + if (!(host_done && req->done)) goto no_req_done; req->cmd->error = check_status(rsp_sts); - if (host->dma_active && req->data) + if (slot->dma_active && req->data) if (!finish_dma(host, req->data)) goto no_req_done; @@ -494,7 +762,18 @@ irqreturn_t cvm_mmc_interrupt(int irq, void *dev_id) (rsp_sts & MIO_EMM_RSP_STS_DMA_PEND)) cleanup_dma(host, rsp_sts); - host->current_req = NULL; + /* follow CMD6 timing/width with IMMEDIATE switch */ + if (slot && slot->cmd6_pending) { + if (host_done && !req->cmd->error) { + do_switch(host, slot->want_switch); + emmc_io_drive_setup(slot); + cvm_mmc_configure_delay(slot); + } else if (slot) { + slot->cmd6_pending = false; + } + } + + slot->current_req = NULL; req->done(req); no_req_done: @@ -609,9 +888,9 @@ static u64 prepare_dma_sg(struct cvm_mmc_host *host, struct mmc_data *data) error: WARN_ON_ONCE(1); + writeq(MIO_EMM_DMA_FIFO_CFG_CLR, + host->dma_base + MIO_EMM_DMA_FIFO_CFG(host)); dma_unmap_sg(host->dev, data->sg, data->sg_len, get_dma_dir(data)); - /* Disable FIFO */ - writeq(BIT_ULL(16), host->dma_base + MIO_EMM_DMA_FIFO_CFG(host)); return 0; } @@ -653,7 +932,11 @@ static void cvm_mmc_dma_request(struct mmc_host *mmc, struct cvm_mmc_slot *slot = mmc_priv(mmc); struct cvm_mmc_host *host = slot->host; struct mmc_data *data; - u64 emm_dma, addr; + u64 emm_dma, addr, int_enable_mask = 0; + int seg; + + /* cleared by successful termination */ + mrq->cmd->error = -EINVAL; if (!mrq->data || !mrq->data->sg || !mrq->data->sg_len || !mrq->stop || mrq->stop->opcode != MMC_STOP_TRANSMISSION) { @@ -662,17 +945,27 @@ static void cvm_mmc_dma_request(struct mmc_host *mmc, goto error; } + /* unaligned multi-block DMA has problems, so forbid all unaligned */ + for (seg = 0; seg < mrq->data->sg_len; seg++) { + struct scatterlist *sg = &mrq->data->sg[seg]; + u64 align = (sg->offset | sg->length); + + if (!(align & 7)) + continue; + dev_info(&mmc->card->dev, + "Error:64bit alignment required\n"); + goto error; + } + cvm_mmc_switch_to(slot); data = mrq->data; + pr_debug("DMA request blocks: %d block_size: %d total_size: %d\n", data->blocks, data->blksz, data->blocks * data->blksz); if (data->timeout_ns) set_wdog(slot, data->timeout_ns); - WARN_ON(host->current_req); - host->current_req = mrq; - emm_dma = prepare_ext_dma(mmc, mrq); addr = prepare_dma(host, data); if (!addr) { @@ -680,9 +973,19 @@ static void cvm_mmc_dma_request(struct mmc_host *mmc, goto error; } - host->dma_active = true; - host->int_enable(host, MIO_EMM_INT_CMD_ERR | MIO_EMM_INT_DMA_DONE | - MIO_EMM_INT_DMA_ERR); + mrq->host = mmc; + WARN_ON(slot->current_req); + slot->current_req = mrq; + slot->dma_active = true; + + int_enable_mask = MIO_EMM_INT_CMD_ERR | MIO_EMM_INT_DMA_DONE | + MIO_EMM_INT_DMA_ERR; + + /* Add NCB_FLT interrupt for octtx2 */ + if (is_mmc_otx2(host)) + int_enable_mask |= MIO_EMM_INT_NCB_FLT; + + host->int_enable(host, int_enable_mask); if (host->dmar_fixup) host->dmar_fixup(host, mrq->cmd, data, addr); @@ -700,22 +1003,22 @@ static void cvm_mmc_dma_request(struct mmc_host *mmc, return; error: - mrq->cmd->error = -EINVAL; if (mrq->done) mrq->done(mrq); host->release_bus(host); } -static void do_read_request(struct cvm_mmc_host *host, struct mmc_request *mrq) +static void do_read_request(struct cvm_mmc_slot *slot, struct mmc_request *mrq) { - sg_miter_start(&host->smi, mrq->data->sg, mrq->data->sg_len, + sg_miter_start(&slot->smi, mrq->data->sg, mrq->data->sg_len, SG_MITER_ATOMIC | SG_MITER_TO_SG); } -static void do_write_request(struct cvm_mmc_host *host, struct mmc_request *mrq) +static void do_write_request(struct cvm_mmc_slot *slot, struct mmc_request *mrq) { + struct cvm_mmc_host *host = slot->host; unsigned int data_len = mrq->data->blocks * mrq->data->blksz; - struct sg_mapping_iter *smi = &host->smi; + struct sg_mapping_iter *smi = &slot->smi; unsigned int bytes_xfered; int shift = 56; u64 dat = 0; @@ -749,6 +1052,51 @@ static void do_write_request(struct cvm_mmc_host *host, struct mmc_request *mrq) sg_miter_stop(smi); } +static void cvm_mmc_track_switch(struct cvm_mmc_slot *slot, u32 cmd_arg) +{ + u8 how = (cmd_arg >> 24) & 3; + u8 where = (u8)(cmd_arg >> 16); + u8 val = (u8)(cmd_arg >> 8); + + slot->want_switch = slot->cached_switch; + + /* + * track ext_csd assignments (how==3) for critical entries + * to make sure we follow up with MIO_EMM_SWITCH adjustment + * before ANY mmc/core interaction at old settings. + * Current mmc/core logic (linux 4.14) does not set/clear + * bits (how = 1 or 2), which would require more complex + * logic to track the intent of a change + */ + + if (how != 3) + return; + + switch (where) { + case EXT_CSD_BUS_WIDTH: + slot->want_switch &= ~MIO_EMM_SWITCH_BUS_WIDTH; + slot->want_switch |= + FIELD_PREP(MIO_EMM_SWITCH_BUS_WIDTH, val); + break; + case EXT_CSD_POWER_CLASS: + slot->want_switch &= ~MIO_EMM_SWITCH_POWER_CLASS; + slot->want_switch |= + FIELD_PREP(MIO_EMM_SWITCH_POWER_CLASS, val); + break; + case EXT_CSD_HS_TIMING: + slot->want_switch &= ~MIO_EMM_SWITCH_TIMING; + if (val) + slot->want_switch |= + FIELD_PREP(MIO_EMM_SWITCH_TIMING, + (1 << (val - 1))); + break; + default: + return; + } + + slot->cmd6_pending = true; +} + static void cvm_mmc_request(struct mmc_host *mmc, struct mmc_request *mrq) { struct cvm_mmc_slot *slot = mmc_priv(mmc); @@ -777,23 +1125,27 @@ static void cvm_mmc_request(struct mmc_host *mmc, struct mmc_request *mrq) mods = cvm_mmc_get_cr_mods(cmd); - WARN_ON(host->current_req); - host->current_req = mrq; + WARN_ON(slot->current_req); + mrq->host = mmc; + slot->current_req = mrq; if (cmd->data) { if (cmd->data->flags & MMC_DATA_READ) - do_read_request(host, mrq); + do_read_request(slot, mrq); else - do_write_request(host, mrq); + do_write_request(slot, mrq); if (cmd->data->timeout_ns) set_wdog(slot, cmd->data->timeout_ns); } else set_wdog(slot, 0); - host->dma_active = false; + slot->dma_active = false; host->int_enable(host, MIO_EMM_INT_CMD_DONE | MIO_EMM_INT_CMD_ERR); + if (cmd->opcode == MMC_SWITCH) + cvm_mmc_track_switch(slot, cmd->arg); + emm_cmd = FIELD_PREP(MIO_EMM_CMD_VAL, 1) | FIELD_PREP(MIO_EMM_CMD_CTYPE_XOR, mods.ctype_xor) | FIELD_PREP(MIO_EMM_CMD_RTYPE_XOR, mods.rtype_xor) | @@ -819,37 +1171,257 @@ retry: if (!retries) dev_err(host->dev, "Bad status: %llx before command write\n", rsp_sts); writeq(emm_cmd, host->base + MIO_EMM_CMD(host)); + if (cmd->opcode == MMC_SWITCH) + udelay(1300); +} + +static void cvm_mmc_wait_done(struct mmc_request *cvm_mrq) +{ + complete(&cvm_mrq->completion); +} + +static int cvm_mmc_r1_cmd(struct mmc_host *mmc, u32 *statp, u32 opcode) +{ + static struct mmc_command cmd = {}; + static struct mmc_request cvm_mrq = {}; + + if (!opcode) + opcode = MMC_SEND_STATUS; + cmd.opcode = opcode; + if (mmc->card) + cmd.arg = mmc->card->rca << 16; + else + cmd.arg = 1 << 16; + cmd.flags = MMC_RSP_SPI_R2 | MMC_RSP_R1 | MMC_CMD_AC; + cmd.data = NULL; + cvm_mrq.cmd = &cmd; + + init_completion(&cvm_mrq.completion); + cvm_mrq.done = cvm_mmc_wait_done; + + cvm_mmc_request(mmc, &cvm_mrq); + if (!wait_for_completion_timeout(&cvm_mrq.completion, + msecs_to_jiffies(10))) { + mmc_abort_tuning(mmc, opcode); + return -ETIMEDOUT; + } + + if (statp) + *statp = cmd.resp[0]; + + return cvm_mrq.cmd->error; +} + +static int cvm_mmc_data_tuning(struct mmc_host *mmc, u32 *statp, u32 opcode) +{ + int err = 0; + u8 *ext_csd; + static struct mmc_command cmd = {}; + static struct mmc_data data = {}; + static struct mmc_request cvm_mrq = {}; + static struct scatterlist sg; + struct cvm_mmc_slot *slot = mmc_priv(mmc); + struct mmc_card *card = mmc->card; + + if (!(slot->cached_switch & MIO_EMM_SWITCH_HS400_TIMING)) { + int edetail = -EINVAL; + int core_opinion; + + core_opinion = + mmc_send_tuning(mmc, opcode, &edetail); + + /* only accept mmc/core opinion when it's happy */ + if (!core_opinion) + return core_opinion; + } + + /* EXT_CSD supported only after ver 3 */ + if (card && card->csd.mmca_vsn <= CSD_SPEC_VER_3) + return -EOPNOTSUPP; + /* + * As the ext_csd is so large and mostly unused, we don't store the + * raw block in mmc_card. + */ + ext_csd = kzalloc(BLKSZ_EXT_CSD, GFP_KERNEL); + if (!ext_csd) + return -ENOMEM; + + cvm_mrq.cmd = &cmd; + cvm_mrq.data = &data; + cmd.data = &data; + + cmd.opcode = MMC_SEND_EXT_CSD; + cmd.arg = 0; + cmd.flags = MMC_RSP_SPI_R1 | MMC_RSP_R1 | MMC_CMD_ADTC; + + data.blksz = BLKSZ_EXT_CSD; + data.blocks = 1; + data.flags = MMC_DATA_READ; + data.sg = &sg; + data.sg_len = 1; + + sg_init_one(&sg, ext_csd, BLKSZ_EXT_CSD); + + /* set timeout */ + if (card) { + /* SD cards use a 100 multiplier rather than 10 */ + u32 mult = mmc_card_sd(card) ? 100 : 10; + + data.timeout_ns = card->csd.taac_ns * mult; + data.timeout_clks = card->csd.taac_clks * mult; + } else { + data.timeout_ns = 50 * NSEC_PER_MSEC; + } + + init_completion(&cvm_mrq.completion); + cvm_mrq.done = cvm_mmc_wait_done; + + cvm_mmc_request(mmc, &cvm_mrq); + if (!wait_for_completion_timeout(&cvm_mrq.completion, + msecs_to_jiffies(100))) { + mmc_abort_tuning(mmc, cmd.opcode); + err = -ETIMEDOUT; + } + + data.sg_len = 0; /* FIXME: catch over-time completions? */ + kfree(ext_csd); + + if (err) + return err; + + if (statp) + *statp = cvm_mrq.cmd->resp[0]; + + return cvm_mrq.cmd->error; +} + +/* adjusters for the 4 otx2 delay line taps */ +struct adj { + const char *name; + u64 mask; + int (*test)(struct mmc_host *mmc, u32 *statp, u32 opcode); + u32 opcode; + bool ddr_only; +}; + +static int adjust_tuning(struct mmc_host *mmc, struct adj *adj, u32 opcode) +{ + int err, start_run = -1, best_run = 0, best_start = -1; + int last_good = -1; + bool prev_ok = false; + u64 timing, tap; + struct cvm_mmc_slot *slot = mmc_priv(mmc); + struct cvm_mmc_host *host = slot->host; + char how[MAX_NO_OF_TAPS+1] = ""; + + /* loop over range+1 to simplify processing */ + for (tap = 0; tap <= MAX_NO_OF_TAPS; tap++, prev_ok = !err) { + if (tap < MAX_NO_OF_TAPS) { + timing = readq(host->base + MIO_EMM_TIMING(host)); + timing &= ~adj->mask; + timing |= (tap << __bf_shf(adj->mask)); + writeq(timing, host->base + MIO_EMM_TIMING(host)); + + err = adj->test(mmc, NULL, opcode); + + how[tap] = "-+"[!err]; + if (!err) + last_good = tap; + } else { + /* + * putting the end+1 case in loop simplifies + * logic, allowing 'prev_ok' to process a + * sweet spot in tuning which extends to wall. + */ + err = -EINVAL; + } + + if (!err) { + /* + * If no CRC/etc errors in response, but previous + * failed, note the start of a new run + */ + if (!prev_ok) + start_run = tap; + } else if (prev_ok) { + int run = tap - 1 - start_run; + + /* did we just exit a wider sweet spot? */ + if (start_run >= 0 && run > best_run) { + best_start = start_run; + best_run = run; + } + } + } + + if (best_start < 0) { + dev_warn(host->dev, "%s %lldMHz tuning %s failed\n", + mmc_hostname(mmc), slot->clock / 1000000, adj->name); + return -EINVAL; + } + + tap = best_start + best_run / 2; + how[tap] = '@'; + if (tapdance) { + tap = last_good - tapdance; + how[tap] = 'X'; + } + dev_dbg(host->dev, "%s/%s %d/%lld/%d %s\n", + mmc_hostname(mmc), adj->name, + best_start, tap, best_start + best_run, + how); + slot->taps &= ~adj->mask; + slot->taps |= (tap << __bf_shf(adj->mask)); + cvm_mmc_set_timing(slot); + return 0; +} + +static u32 max_supported_frequency(struct cvm_mmc_host *host) +{ + /* Default maximum freqeuncey is 52000000 for chip prior to 9X */ + u32 max_frequency = MHZ_52; + + if (is_mmc_otx2(host)) { + /* Default max frequency is 200MHz for 9X chips */ + max_frequency = MHZ_200; + + /* Erratum is only applicable pass A0 */ + if (is_mmc_otx2_A0(host)) + max_frequency = MHZ_100; + } + return max_frequency; } static void cvm_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) { + struct cvm_mmc_slot *slot = mmc_priv(mmc); struct cvm_mmc_host *host = slot->host; int clk_period = 0, power_class = 10, bus_width = 0; - u64 clock, emm_switch; + u64 clock, emm_switch, mode; + u32 max_f; + + if (ios->power_mode == MMC_POWER_OFF) { + if (host->powered) { + cvm_mmc_reset_bus(slot); + if (host->global_pwr_gpiod) + host->set_shared_power(host, 0); + else if (!IS_ERR_OR_NULL(mmc->supply.vmmc)) + mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, 0); + host->powered = false; + } + set_wdog(slot, 0); + return; + } host->acquire_bus(host); cvm_mmc_switch_to(slot); - /* Set the power state */ - switch (ios->power_mode) { - case MMC_POWER_ON: - break; - - case MMC_POWER_OFF: - cvm_mmc_reset_bus(slot); - if (host->global_pwr_gpiod) - host->set_shared_power(host, 0); - else if (!IS_ERR(mmc->supply.vmmc)) - mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, 0); - break; - - case MMC_POWER_UP: + if (ios->power_mode == MMC_POWER_UP) { if (host->global_pwr_gpiod) host->set_shared_power(host, 1); - else if (!IS_ERR(mmc->supply.vmmc)) + else if (!IS_ERR_OR_NULL(mmc->supply.vmmc)) mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, ios->vdd); - break; } /* Convert bus width to HW definition */ @@ -866,41 +1438,201 @@ static void cvm_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) } /* DDR is available for 4/8 bit bus width */ - if (ios->bus_width && ios->timing == MMC_TIMING_MMC_DDR52) - bus_width |= 4; + switch (ios->timing) { + case MMC_TIMING_UHS_DDR50: + case MMC_TIMING_MMC_DDR52: + if (ios->bus_width) + bus_width |= 4; + break; + case MMC_TIMING_MMC_HS400: + if (ios->bus_width & 2) + bus_width |= 4; + break; + } /* Change the clock frequency. */ clock = ios->clock; - if (clock > 52000000) - clock = 52000000; + max_f = max_supported_frequency(host); + + if (clock < mmc->f_min) + clock = mmc->f_min; + if (clock > max_f) + clock = max_f; + slot->clock = clock; - if (clock) - clk_period = (host->sys_freq + clock - 1) / (2 * clock); + if (clock) { + clk_period = host->sys_freq / (2 * clock); + /* check to not exceed requested speed */ + while (1) { + int hz = host->sys_freq / (2 * clk_period); - emm_switch = FIELD_PREP(MIO_EMM_SWITCH_HS_TIMING, - (ios->timing == MMC_TIMING_MMC_HS)) | + if (hz <= clock) + break; + clk_period++; + } + } + + emm_switch = FIELD_PREP(MIO_EMM_SWITCH_BUS_WIDTH, bus_width) | FIELD_PREP(MIO_EMM_SWITCH_POWER_CLASS, power_class) | FIELD_PREP(MIO_EMM_SWITCH_CLK_HI, clk_period) | FIELD_PREP(MIO_EMM_SWITCH_CLK_LO, clk_period); + switch (ios->timing) { + case MMC_TIMING_LEGACY: + break; + case MMC_TIMING_MMC_HS: + case MMC_TIMING_SD_HS: + case MMC_TIMING_UHS_SDR12: + case MMC_TIMING_UHS_SDR25: + case MMC_TIMING_UHS_SDR50: + case MMC_TIMING_UHS_SDR104: + case MMC_TIMING_UHS_DDR50: + case MMC_TIMING_MMC_DDR52: + emm_switch |= FIELD_PREP(MIO_EMM_SWITCH_HS_TIMING, 1); + break; + case MMC_TIMING_MMC_HS200: + emm_switch |= FIELD_PREP(MIO_EMM_SWITCH_HS200_TIMING, 1); + break; + case MMC_TIMING_MMC_HS400: + emm_switch |= FIELD_PREP(MIO_EMM_SWITCH_HS400_TIMING, 1); + break; + } set_bus_id(&emm_switch, slot->bus_id); + pr_debug("mmc-slot%d trying switch %llx w%lld hs%lld hs200:%lld hs400:%lld\n", + slot->bus_id, emm_switch, + FIELD_GET(MIO_EMM_SWITCH_BUS_WIDTH, emm_switch), + FIELD_GET(MIO_EMM_SWITCH_HS_TIMING, emm_switch), + FIELD_GET(MIO_EMM_SWITCH_HS200_TIMING, emm_switch), + FIELD_GET(MIO_EMM_SWITCH_HS400_TIMING, emm_switch)); + if (!switch_val_changed(slot, emm_switch)) goto out; set_wdog(slot, 0); do_switch(host, emm_switch); + + mode = readq(host->base + MIO_EMM_MODE(host, slot->bus_id)); + pr_debug("mmc-slot%d mode %llx w%lld hs%lld hs200:%lld hs400:%lld\n", + slot->bus_id, mode, + (mode >> 40) & 7, (mode >> 48) & 1, + (mode >> 49) & 1, (mode >> 50) & 1); + slot->cached_switch = emm_switch; + host->powered = true; + cvm_mmc_configure_delay(slot); out: host->release_bus(host); } +static struct adj adj[] = { + { "CMD_IN", MIO_EMM_TIMING_CMD_IN, + cvm_mmc_r1_cmd, MMC_SEND_STATUS, }, + { "DATA_IN", MIO_EMM_TIMING_DATA_IN, + cvm_mmc_data_tuning, }, + { NULL, }, +}; + +static int cvm_scan_tuning(struct mmc_host *mmc, u32 opcode) +{ + struct cvm_mmc_slot *slot = mmc_priv(mmc); + struct adj *a; + int ret; + + for (a = adj; a->name; a++) { + if (a->ddr_only && !cvm_is_mmc_timing_ddr(slot)) + continue; + + ret = adjust_tuning(mmc, a, + a->opcode ?: opcode); + + if (ret) + return ret; + } + + cvm_mmc_set_timing(slot); + return 0; +} + +static int cvm_execute_tuning(struct mmc_host *mmc, u32 opcode) +{ + struct cvm_mmc_slot *slot = mmc_priv(mmc); + struct cvm_mmc_host *host = slot->host; + int clk_period, hz; + + int ret; + + do { + u64 emm_switch = + readq(host->base + MIO_EMM_MODE(host, slot->bus_id)); + + clk_period = FIELD_GET(MIO_EMM_SWITCH_CLK_LO, emm_switch); + dev_info(slot->host->dev, "%s re-tuning\n", + mmc_hostname(mmc)); + ret = cvm_scan_tuning(mmc, opcode); + if (ret) { + int inc = clk_period >> 3; + + if (!inc) + inc++; + clk_period += inc; + hz = host->sys_freq / (2 * clk_period); + pr_debug("clk_period %d += %d, now %d Hz\n", + clk_period - inc, inc, hz); + + if (hz < 400000) + break; + + slot->clock = hz; + mmc->ios.clock = hz; + + emm_switch &= ~MIO_EMM_SWITCH_CLK_LO; + emm_switch |= FIELD_PREP(MIO_EMM_SWITCH_CLK_LO, + clk_period); + emm_switch &= ~MIO_EMM_SWITCH_CLK_HI; + emm_switch |= FIELD_PREP(MIO_EMM_SWITCH_CLK_HI, + clk_period); + do_switch(host, emm_switch); + } + } while (ret); + + return ret; +} + +static int cvm_prepare_hs400_tuning(struct mmc_host *mmc, struct mmc_ios *ios) +{ + struct cvm_mmc_slot *slot = mmc_priv(mmc); + + return cvm_mmc_configure_delay(slot); +} + +static void cvm_mmc_reset(struct mmc_host *mmc) +{ + struct cvm_mmc_slot *slot = mmc_priv(mmc); + struct cvm_mmc_host *host = slot->host; + u64 r; + + cvm_mmc_reset_bus(slot); + + r = FIELD_PREP(MIO_EMM_CMD_VAL, 1) | + FIELD_PREP(MIO_EMM_CMD_BUS_ID, slot->bus_id); + + writeq(r, host->base + MIO_EMM_CMD(host)); + + do { + r = readq(host->base + MIO_EMM_RSP_STS(host)); + } while (!(r & MIO_EMM_RSP_STS_CMD_DONE)); +} + static const struct mmc_host_ops cvm_mmc_ops = { .request = cvm_mmc_request, .set_ios = cvm_mmc_set_ios, .get_ro = mmc_gpio_get_ro, .get_cd = mmc_gpio_get_cd, + .hw_reset = cvm_mmc_reset, + .execute_tuning = cvm_execute_tuning, + .prepare_hs400_tuning = cvm_prepare_hs400_tuning, }; static void cvm_mmc_set_clock(struct cvm_mmc_slot *slot, unsigned int clock) @@ -917,7 +1649,7 @@ static int cvm_mmc_init_lowlevel(struct cvm_mmc_slot *slot) struct cvm_mmc_host *host = slot->host; u64 emm_switch; - /* Enable this bus slot. */ + /* Enable this bus slot. Overridden when vqmmc-switching engaged */ host->emm_cfg |= (1ull << slot->bus_id); writeq(host->emm_cfg, slot->host->base + MIO_EMM_CFG(host)); udelay(10); @@ -933,8 +1665,8 @@ static int cvm_mmc_init_lowlevel(struct cvm_mmc_slot *slot) /* Make the changes take effect on this bus slot. */ set_bus_id(&emm_switch, slot->bus_id); do_switch(host, emm_switch); - slot->cached_switch = emm_switch; + host->powered = true; /* * Set watchdog timeout value and default reset value @@ -953,7 +1685,7 @@ static int cvm_mmc_of_parse(struct device *dev, struct cvm_mmc_slot *slot) u32 id, cmd_skew = 0, dat_skew = 0, bus_width = 0; struct device_node *node = dev->of_node; struct mmc_host *mmc = slot->mmc; - u64 clock_period; + u32 max_frequency, current_drive, clk_slew; int ret; ret = of_property_read_u32(node, "reg", &id); @@ -962,8 +1694,14 @@ static int cvm_mmc_of_parse(struct device *dev, struct cvm_mmc_slot *slot) return ret; } - if (id >= CAVIUM_MAX_MMC || slot->host->slot[id]) { - dev_err(dev, "Invalid reg property on %pOF\n", node); + if (id >= CAVIUM_MAX_MMC) { + dev_err(dev, "Invalid reg=<%d> property on %pOF\n", id, node); + return -EINVAL; + } + + if (slot->host->slot[id]) { + dev_err(dev, "Duplicate reg=<%d> property on %pOF\n", + id, node); return -EINVAL; } @@ -974,7 +1712,7 @@ static int cvm_mmc_of_parse(struct device *dev, struct cvm_mmc_slot *slot) * Legacy Octeon firmware has no regulator entry, fall-back to * a hard-coded voltage to get a sane OCR. */ - if (IS_ERR(mmc->supply.vmmc)) + if (IS_ERR_OR_NULL(mmc->supply.vmmc)) mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34; /* Common MMC bindings */ @@ -982,7 +1720,7 @@ static int cvm_mmc_of_parse(struct device *dev, struct cvm_mmc_slot *slot) if (ret) return ret; - /* Set bus width */ + /* Set bus width from obsolete properties, if unset */ if (!(mmc->caps & (MMC_CAP_8_BIT_DATA | MMC_CAP_4_BIT_DATA))) { of_property_read_u32(node, "cavium,bus-max-width", &bus_width); if (bus_width == 8) @@ -991,19 +1729,40 @@ static int cvm_mmc_of_parse(struct device *dev, struct cvm_mmc_slot *slot) mmc->caps |= MMC_CAP_4_BIT_DATA; } + max_frequency = max_supported_frequency(slot->host); + /* Set maximum and minimum frequency */ if (!mmc->f_max) of_property_read_u32(node, "spi-max-frequency", &mmc->f_max); - if (!mmc->f_max || mmc->f_max > 52000000) - mmc->f_max = 52000000; - mmc->f_min = 400000; + if (!mmc->f_max || mmc->f_max > max_frequency) + mmc->f_max = max_frequency; + mmc->f_min = KHZ_400; /* Sampling register settings, period in picoseconds */ - clock_period = 1000000000000ull / slot->host->sys_freq; of_property_read_u32(node, "cavium,cmd-clk-skew", &cmd_skew); of_property_read_u32(node, "cavium,dat-clk-skew", &dat_skew); - slot->cmd_cnt = (cmd_skew + clock_period / 2) / clock_period; - slot->dat_cnt = (dat_skew + clock_period / 2) / clock_period; + if (is_mmc_8xxx(slot->host) || is_mmc_otx2(slot->host)) { + slot->cmd_cnt = cmd_skew; + slot->data_cnt = dat_skew; + } else { + u64 clock_period = 1000000000000ull / slot->host->sys_freq; + + slot->cmd_cnt = (cmd_skew + clock_period / 2) / clock_period; + slot->data_cnt = (dat_skew + clock_period / 2) / clock_period; + } + + /* Get current drive and clk skew */ + ret = of_property_read_u32(node, "cavium,drv-strength", ¤t_drive); + if (ret) + slot->drive = -1; + else + slot->drive = current_drive; + + ret = of_property_read_u32(node, "cavium,clk-slew", &clk_slew); + if (ret) + slot->slew = -1; + else + slot->slew = clk_slew; return id; } @@ -1012,6 +1771,7 @@ int cvm_mmc_of_slot_probe(struct device *dev, struct cvm_mmc_host *host) { struct cvm_mmc_slot *slot; struct mmc_host *mmc; + struct iommu_domain *dom; int ret, id; mmc = mmc_alloc_host(sizeof(struct cvm_mmc_slot), dev); @@ -1030,16 +1790,19 @@ int cvm_mmc_of_slot_probe(struct device *dev, struct cvm_mmc_host *host) /* Set up host parameters */ mmc->ops = &cvm_mmc_ops; + mmc->caps |= MMC_CAP_ERASE | MMC_CAP_BUS_WIDTH_TEST; + mmc->caps |= MMC_CAP_CMD23 | MMC_CAP_POWER_OFF_CARD; + /* - * We only have a 3.3v supply, we cannot support any - * of the UHS modes. We do support the high speed DDR - * modes up to 52MHz. + * We only have a 3.3v supply for slots, we cannot + * support any of the UHS modes. We do support the + * high speed DDR modes up to 52MHz. * * Disable bounce buffers for max_segs = 1 */ - mmc->caps |= MMC_CAP_MMC_HIGHSPEED | MMC_CAP_SD_HIGHSPEED | - MMC_CAP_ERASE | MMC_CAP_CMD23 | MMC_CAP_POWER_OFF_CARD | - MMC_CAP_3_3V_DDR; + + if (!is_mmc_otx2(host)) + mmc->caps |= MMC_CAP_3_3V_DDR; if (host->use_sg) mmc->max_segs = 16; @@ -1055,14 +1818,30 @@ int cvm_mmc_of_slot_probe(struct device *dev, struct cvm_mmc_host *host) /* DMA block count field is 15 bits */ mmc->max_blk_count = 32767; + dom = iommu_get_domain_for_dev(dev->parent); + if (dom && dom->type == IOMMU_DOMAIN_IDENTITY) { + unsigned int max_size = (1 << IO_TLB_SHIFT) * IO_TLB_SEGSIZE; + + if (mmc->max_seg_size > max_size) + mmc->max_seg_size = max_size; + + max_size *= mmc->max_segs; + + if (mmc->max_req_size > max_size) + mmc->max_req_size = max_size; + } + + mmc_can_retune(mmc); + slot->clock = mmc->f_min; slot->bus_id = id; slot->cached_rca = 1; host->acquire_bus(host); host->slot[id] = slot; - cvm_mmc_switch_to(slot); + host->use_vqmmc |= !IS_ERR_OR_NULL(slot->mmc->supply.vqmmc); cvm_mmc_init_lowlevel(slot); + cvm_mmc_switch_to(slot); host->release_bus(host); ret = mmc_add_host(mmc); diff --git a/drivers/mmc/host/cavium.h b/drivers/mmc/host/cavium.h index f3eea5eaa678..f38353171678 100644 --- a/drivers/mmc/host/cavium.h +++ b/drivers/mmc/host/cavium.h @@ -19,8 +19,42 @@ #include <linux/of.h> #include <linux/scatterlist.h> #include <linux/semaphore.h> +#include <linux/pci.h> #define CAVIUM_MAX_MMC 4 +#define BLKSZ_EXT_CSD 512 +#define MRVL_OCTEONTX2_96XX_PARTNUM 0xB2 + +/* Subsystem Device ID */ +#define PCI_SUBSYS_DEVID_8XXX 0xA +#define PCI_SUBSYS_DEVID_9XXX 0xB +#define PCI_SUBSYS_DEVID_95XX 0xB3 + +#define KHZ_400 (400000) +#define MHZ_26 (26000000) +#define MHZ_52 (52000000) +#define MHZ_100 (100000000) +#define MHZ_200 (200000000) + +/* octtx2: emmc interface io current drive strength */ +#define MILLI_AMP_2 (0x0) +#define MILLI_AMP_4 (0x1) +#define MILLI_AMP_8 (0x2) +#define MILLI_AMP_16 (0x3) + +/* octtx2: emmc interface io clk skew */ +#define LOW_SLEW_RATE (0x0) +#define HIGH_SLEW_RATE (0x1) + +/* octtx2: emmc interface calibration */ +#define START_CALIBRATION (0x1) +#define TOTAL_NO_OF_TAPS (512) +#define PS_10000 (10 * 1000) +#define PS_5000 (5000) +#define PS_2500 (2500) +#define PS_400 (400) +#define MAX_NO_OF_TAPS 64 + /* DMA register addresses */ #define MIO_EMM_DMA_FIFO_CFG(x) (0x00 + x->reg_off_dma) @@ -33,8 +67,17 @@ #define MIO_EMM_DMA_INT_ENA_W1S(x) (0x40 + x->reg_off_dma) #define MIO_EMM_DMA_INT_ENA_W1C(x) (0x48 + x->reg_off_dma) +/* octtx2 specific registers */ +#define MIO_EMM_CALB(x) (0xC0 + x->reg_off) +#define MIO_EMM_TAP(x) (0xC8 + x->reg_off) +#define MIO_EMM_TIMING(x) (0xD0 + x->reg_off) +#define MIO_EMM_DEBUG(x) (0xF8 + x->reg_off) + /* register addresses */ #define MIO_EMM_CFG(x) (0x00 + x->reg_off) +#define MIO_EMM_MODE(x, s) (0x08 + 8*(s) + (x)->reg_off) +/* octtx2 specific register */ +#define MIO_EMM_IO_CTL(x) (0x40 + x->reg_off) #define MIO_EMM_SWITCH(x) (0x48 + x->reg_off) #define MIO_EMM_DMA(x) (0x50 + x->reg_off) #define MIO_EMM_CMD(x) (0x58 + x->reg_off) @@ -56,6 +99,7 @@ struct cvm_mmc_host { struct device *dev; void __iomem *base; void __iomem *dma_base; + struct pci_dev *pdev; int reg_off; int reg_off_dma; u64 emm_cfg; @@ -64,12 +108,10 @@ struct cvm_mmc_host { struct clk *clk; int sys_freq; - struct mmc_request *current_req; - struct sg_mapping_iter smi; - bool dma_active; bool use_sg; - bool has_ciu3; + bool powered; + bool use_vqmmc; /* must disable slots over switch */ bool big_dma_addr; bool need_irq_handler_lock; spinlock_t irq_handler_lock; @@ -80,6 +122,9 @@ struct cvm_mmc_host { struct cvm_mmc_slot *slot[CAVIUM_MAX_MMC]; struct platform_device *slot_pdev[CAVIUM_MAX_MMC]; + /* octtx2 specific */ + unsigned int per_tap_delay; /* per tap delay in pico second */ + unsigned long delay_logged; /* per-ios.timing bitmask */ void (*set_shared_power)(struct cvm_mmc_host *, int); void (*acquire_bus)(struct cvm_mmc_host *); @@ -94,16 +139,27 @@ struct cvm_mmc_host { struct cvm_mmc_slot { struct mmc_host *mmc; /* slot-level mmc_core object */ struct cvm_mmc_host *host; /* common hw for all slots */ + struct mmc_request *current_req; u64 clock; + u32 ecount, gcount; u64 cached_switch; u64 cached_rca; - unsigned int cmd_cnt; /* sample delay */ - unsigned int dat_cnt; /* sample delay */ + struct sg_mapping_iter smi; + bool dma_active; + + u64 taps; /* otx2: MIO_EMM_TIMING */ + unsigned int cmd_cnt; /* otx: sample cmd in delay */ + unsigned int data_cnt; /* otx: sample data in delay */ + + int drive; /* Current drive */ + int slew; /* clock skew */ int bus_id; + bool cmd6_pending; + u64 want_switch; }; struct cvm_mmc_cr_type { @@ -161,6 +217,21 @@ struct cvm_mmc_cr_mods { #define MIO_EMM_DMA_CFG_SIZE GENMASK_ULL(55, 36) #define MIO_EMM_DMA_CFG_ADR GENMASK_ULL(35, 0) +#define MIO_EMM_CFG_BUS_ENA GENMASK_ULL(3, 0) + +#define MIO_EMM_IO_CTL_DRIVE GENMASK_ULL(3, 2) +#define MIO_EMM_IO_CTL_SLEW BIT_ULL(0) + +#define MIO_EMM_CALB_START BIT_ULL(0) +#define MIO_EMM_TAP_DELAY GENMASK_ULL(7, 0) + +#define MIO_EMM_TIMING_CMD_IN GENMASK_ULL(53, 48) +#define MIO_EMM_TIMING_CMD_OUT GENMASK_ULL(37, 32) +#define MIO_EMM_TIMING_DATA_IN GENMASK_ULL(21, 16) +#define MIO_EMM_TIMING_DATA_OUT GENMASK_ULL(5, 0) + +#define MIO_EMM_INT_NCB_RAS BIT_ULL(8) +#define MIO_EMM_INT_NCB_FLT BIT_ULL(7) #define MIO_EMM_INT_SWITCH_ERR BIT_ULL(6) #define MIO_EMM_INT_SWITCH_DONE BIT_ULL(5) #define MIO_EMM_INT_DMA_ERR BIT_ULL(4) @@ -169,6 +240,9 @@ struct cvm_mmc_cr_mods { #define MIO_EMM_INT_CMD_DONE BIT_ULL(1) #define MIO_EMM_INT_BUF_DONE BIT_ULL(0) +#define MIO_EMM_DMA_INT_FIFO BIT_ULL(1) +#define MIO_EMM_DMA_INT_DMA BIT_ULL(0) + #define MIO_EMM_RSP_STS_BUS_ID GENMASK_ULL(61, 60) #define MIO_EMM_RSP_STS_CMD_VAL BIT_ULL(59) #define MIO_EMM_RSP_STS_SWITCH_VAL BIT_ULL(58) @@ -200,9 +274,14 @@ struct cvm_mmc_cr_mods { #define MIO_EMM_SWITCH_ERR0 BIT_ULL(58) #define MIO_EMM_SWITCH_ERR1 BIT_ULL(57) #define MIO_EMM_SWITCH_ERR2 BIT_ULL(56) +#define MIO_EMM_SWITCH_ERRS GENMASK_ULL(58, 56) +#define MIO_EMM_SWITCH_HS400_TIMING BIT_ULL(50) +#define MIO_EMM_SWITCH_HS200_TIMING BIT_ULL(49) #define MIO_EMM_SWITCH_HS_TIMING BIT_ULL(48) +#define MIO_EMM_SWITCH_TIMING GENMASK_ULL(50, 48) #define MIO_EMM_SWITCH_BUS_WIDTH GENMASK_ULL(42, 40) #define MIO_EMM_SWITCH_POWER_CLASS GENMASK_ULL(35, 32) +#define MIO_EMM_SWITCH_CLK GENMASK_ULL(31, 0) #define MIO_EMM_SWITCH_CLK_HI GENMASK_ULL(31, 16) #define MIO_EMM_SWITCH_CLK_LO GENMASK_ULL(15, 0) @@ -210,6 +289,52 @@ struct cvm_mmc_cr_mods { irqreturn_t cvm_mmc_interrupt(int irq, void *dev_id); int cvm_mmc_of_slot_probe(struct device *dev, struct cvm_mmc_host *host); int cvm_mmc_of_slot_remove(struct cvm_mmc_slot *slot); + extern const char *cvm_mmc_irq_names[]; +static inline bool is_mmc_8xxx(struct cvm_mmc_host *host) +{ +#ifdef CONFIG_ARM64 + struct pci_dev *pdev = host->pdev; + u32 chip_id = (pdev->subsystem_device >> 12) & 0xF; + + return (chip_id == PCI_SUBSYS_DEVID_8XXX); +#else + return false; +#endif +} + +static inline bool is_mmc_otx2(struct cvm_mmc_host *host) +{ +#ifdef CONFIG_ARM64 + struct pci_dev *pdev = host->pdev; + u32 chip_id = (pdev->subsystem_device >> 12) & 0xF; + + return (chip_id == PCI_SUBSYS_DEVID_9XXX); +#else + return false; +#endif +} + +static inline bool is_mmc_otx2_A0(struct cvm_mmc_host *host) +{ +#ifdef CONFIG_ARM64 + struct pci_dev *pdev = host->pdev; + u32 chip_id = (pdev->subsystem_device >> 8) & 0xFF; + + return (pdev->revision == 0x00) && + (chip_id == MRVL_OCTEONTX2_96XX_PARTNUM); +#else + return false; +#endif +} + +static inline bool is_mmc_95xx(struct cvm_mmc_host *host) +{ + struct pci_dev *pdev = host->pdev; + u32 chip_id = (pdev->subsystem_device >> 8) & 0xFF; + + return (chip_id == PCI_SUBSYS_DEVID_95XX); +} + #endif |