aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/iommu/intel-iommu.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/iommu/intel-iommu.c')
-rw-r--r--drivers/iommu/intel-iommu.c167
1 files changed, 154 insertions, 13 deletions
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index a2a03df97704..58898e389577 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -307,6 +307,14 @@ static int hw_pass_through = 1;
*/
#define DOMAIN_FLAG_LOSE_CHILDREN BIT(1)
+/*
+ * When VT-d works in the scalable mode, it allows DMA translation to
+ * happen through either first level or second level page table. This
+ * bit marks that the DMA translation for the domain goes through the
+ * first level page table, otherwise, it goes through the second level.
+ */
+#define DOMAIN_FLAG_USE_FIRST_LEVEL BIT(2)
+
#define for_each_domain_iommu(idx, domain) \
for (idx = 0; idx < g_num_of_iommus; idx++) \
if (domain->iommu_refcnt[idx])
@@ -552,6 +560,11 @@ static inline int domain_type_is_si(struct dmar_domain *domain)
return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY;
}
+static inline bool domain_use_first_level(struct dmar_domain *domain)
+{
+ return domain->flags & DOMAIN_FLAG_USE_FIRST_LEVEL;
+}
+
static inline int domain_pfn_supported(struct dmar_domain *domain,
unsigned long pfn)
{
@@ -667,11 +680,12 @@ static int domain_update_iommu_snooping(struct intel_iommu *skip)
return ret;
}
-static int domain_update_iommu_superpage(struct intel_iommu *skip)
+static int domain_update_iommu_superpage(struct dmar_domain *domain,
+ struct intel_iommu *skip)
{
struct dmar_drhd_unit *drhd;
struct intel_iommu *iommu;
- int mask = 0xf;
+ int mask = 0x3;
if (!intel_iommu_superpage) {
return 0;
@@ -681,7 +695,13 @@ static int domain_update_iommu_superpage(struct intel_iommu *skip)
rcu_read_lock();
for_each_active_iommu(iommu, drhd) {
if (iommu != skip) {
- mask &= cap_super_page_val(iommu->cap);
+ if (domain && domain_use_first_level(domain)) {
+ if (!cap_fl1gp_support(iommu->cap))
+ mask = 0x1;
+ } else {
+ mask &= cap_super_page_val(iommu->cap);
+ }
+
if (!mask)
break;
}
@@ -691,12 +711,47 @@ static int domain_update_iommu_superpage(struct intel_iommu *skip)
return fls(mask);
}
+static int domain_update_device_node(struct dmar_domain *domain)
+{
+ struct device_domain_info *info;
+ int nid = NUMA_NO_NODE;
+
+ assert_spin_locked(&device_domain_lock);
+
+ if (list_empty(&domain->devices))
+ return NUMA_NO_NODE;
+
+ list_for_each_entry(info, &domain->devices, link) {
+ if (!info->dev)
+ continue;
+
+ /*
+ * There could possibly be multiple device numa nodes as devices
+ * within the same domain may sit behind different IOMMUs. There
+ * isn't perfect answer in such situation, so we select first
+ * come first served policy.
+ */
+ nid = dev_to_node(info->dev);
+ if (nid != NUMA_NO_NODE)
+ break;
+ }
+
+ return nid;
+}
+
/* Some capabilities may be different across iommus */
static void domain_update_iommu_cap(struct dmar_domain *domain)
{
domain_update_iommu_coherency(domain);
domain->iommu_snooping = domain_update_iommu_snooping(NULL);
- domain->iommu_superpage = domain_update_iommu_superpage(NULL);
+ domain->iommu_superpage = domain_update_iommu_superpage(domain, NULL);
+
+ /*
+ * If RHSA is missing, we should default to the device numa domain
+ * as fall back.
+ */
+ if (domain->nid == NUMA_NO_NODE)
+ domain->nid = domain_update_device_node(domain);
}
struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
@@ -919,6 +974,11 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
+ if (domain_use_first_level(domain)) {
+ pteval |= DMA_FL_PTE_XD;
+ if (domain->domain.type == IOMMU_DOMAIN_DMA)
+ pteval |= DMA_FL_PTE_ACCESS;
+ }
if (cmpxchg64(&pte->val, 0ULL, pteval))
/* Someone else set it while we were thinking; use theirs. */
free_pgtable_page(tmp_page);
@@ -1715,6 +1775,35 @@ static void free_dmar_iommu(struct intel_iommu *iommu)
#endif
}
+/*
+ * Check and return whether first level is used by default for
+ * DMA translation. Currently, we make it off by setting
+ * first_level_support = 0, and will change it to -1 after all
+ * map/unmap paths support first level page table.
+ */
+static bool first_level_by_default(void)
+{
+ struct dmar_drhd_unit *drhd;
+ struct intel_iommu *iommu;
+ static int first_level_support = 0;
+
+ if (likely(first_level_support != -1))
+ return first_level_support;
+
+ first_level_support = 1;
+
+ rcu_read_lock();
+ for_each_active_iommu(iommu, drhd) {
+ if (!sm_supported(iommu) || !ecap_flts(iommu->ecap)) {
+ first_level_support = 0;
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ return first_level_support;
+}
+
static struct dmar_domain *alloc_domain(int flags)
{
struct dmar_domain *domain;
@@ -1726,6 +1815,8 @@ static struct dmar_domain *alloc_domain(int flags)
memset(domain, 0, sizeof(*domain));
domain->nid = NUMA_NO_NODE;
domain->flags = flags;
+ if (first_level_by_default())
+ domain->flags |= DOMAIN_FLAG_USE_FIRST_LEVEL;
domain->has_iotlb_device = false;
INIT_LIST_HEAD(&domain->devices);
@@ -2237,17 +2328,27 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
unsigned long sg_res = 0;
unsigned int largepage_lvl = 0;
unsigned long lvl_pages = 0;
+ u64 attr;
BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
return -EINVAL;
- prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
+ attr = prot & (DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP);
+ if (domain_use_first_level(domain)) {
+ attr |= DMA_FL_PTE_PRESENT | DMA_FL_PTE_XD;
+
+ if (domain->domain.type == IOMMU_DOMAIN_DMA) {
+ attr |= DMA_FL_PTE_ACCESS;
+ if (prot & DMA_PTE_WRITE)
+ attr |= DMA_FL_PTE_DIRTY;
+ }
+ }
if (!sg) {
sg_res = nr_pages;
- pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
+ pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | attr;
}
while (nr_pages > 0) {
@@ -2259,7 +2360,7 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
sg_res = aligned_nrpages(sg->offset, sg->length);
sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + pgoff;
sg->dma_length = sg->length;
- pteval = (sg_phys(sg) - pgoff) | prot;
+ pteval = (sg_phys(sg) - pgoff) | attr;
phys_pfn = pteval >> VTD_PAGE_SHIFT;
}
@@ -2466,6 +2567,36 @@ dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
return NULL;
}
+static int domain_setup_first_level(struct intel_iommu *iommu,
+ struct dmar_domain *domain,
+ struct device *dev,
+ int pasid)
+{
+ int flags = PASID_FLAG_SUPERVISOR_MODE;
+ struct dma_pte *pgd = domain->pgd;
+ int agaw, level;
+
+ /*
+ * Skip top levels of page tables for iommu which has
+ * less agaw than default. Unnecessary for PT mode.
+ */
+ for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
+ pgd = phys_to_virt(dma_pte_addr(pgd));
+ if (!dma_pte_present(pgd))
+ return -ENOMEM;
+ }
+
+ level = agaw_to_level(agaw);
+ if (level != 4 && level != 5)
+ return -EINVAL;
+
+ flags |= (level == 5) ? PASID_FLAG_FL5LP : 0;
+
+ return intel_pasid_setup_first_level(iommu, dev, (pgd_t *)pgd, pasid,
+ domain->iommu_did[iommu->seq_id],
+ flags);
+}
+
static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
int bus, int devfn,
struct device *dev,
@@ -2565,6 +2696,9 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
if (hw_pass_through && domain_type_is_si(domain))
ret = intel_pasid_setup_pass_through(iommu, domain,
dev, PASID_RID2PASID);
+ else if (domain_use_first_level(domain))
+ ret = domain_setup_first_level(iommu, domain, dev,
+ PASID_RID2PASID);
else
ret = intel_pasid_setup_second_level(iommu, domain,
dev, PASID_RID2PASID);
@@ -2751,6 +2885,7 @@ static int __init si_domain_init(int hw)
if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
domain_exit(si_domain);
+ si_domain = NULL;
return -EFAULT;
}
@@ -3371,6 +3506,10 @@ free_iommu:
disable_dmar_iommu(iommu);
free_dmar_iommu(iommu);
}
+ if (si_domain) {
+ domain_exit(si_domain);
+ si_domain = NULL;
+ }
kfree(g_iommus);
@@ -4465,7 +4604,7 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
iommu->name);
return -ENXIO;
}
- sp = domain_update_iommu_superpage(iommu) - 1;
+ sp = domain_update_iommu_superpage(NULL, iommu) - 1;
if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
pr_warn("%s: Doesn't support large page.\n",
iommu->name);
@@ -5202,8 +5341,6 @@ static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
intel_iommu_strict = 1;
}
- domain_update_iommu_cap(dmar_domain);
-
domain = &dmar_domain->domain;
domain->geometry.aperture_start = 0;
domain->geometry.aperture_end =
@@ -5304,8 +5441,12 @@ static int aux_domain_add_dev(struct dmar_domain *domain,
goto attach_failed;
/* Setup the PASID entry for mediated devices: */
- ret = intel_pasid_setup_second_level(iommu, domain, dev,
- domain->default_pasid);
+ if (domain_use_first_level(domain))
+ ret = domain_setup_first_level(iommu, domain, dev,
+ domain->default_pasid);
+ else
+ ret = intel_pasid_setup_second_level(iommu, domain, dev,
+ domain->default_pasid);
if (ret)
goto table_failed;
spin_unlock(&iommu->lock);
@@ -5453,7 +5594,7 @@ static void intel_iommu_aux_detach_device(struct iommu_domain *domain,
static int intel_iommu_map(struct iommu_domain *domain,
unsigned long iova, phys_addr_t hpa,
- size_t size, int iommu_prot)
+ size_t size, int iommu_prot, gfp_t gfp)
{
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
u64 max_addr;