diff options
Diffstat (limited to 'drivers/iommu/intel-iommu.c')
-rw-r--r-- | drivers/iommu/intel-iommu.c | 167 |
1 files changed, 154 insertions, 13 deletions
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index a2a03df97704..58898e389577 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -307,6 +307,14 @@ static int hw_pass_through = 1; */ #define DOMAIN_FLAG_LOSE_CHILDREN BIT(1) +/* + * When VT-d works in the scalable mode, it allows DMA translation to + * happen through either first level or second level page table. This + * bit marks that the DMA translation for the domain goes through the + * first level page table, otherwise, it goes through the second level. + */ +#define DOMAIN_FLAG_USE_FIRST_LEVEL BIT(2) + #define for_each_domain_iommu(idx, domain) \ for (idx = 0; idx < g_num_of_iommus; idx++) \ if (domain->iommu_refcnt[idx]) @@ -552,6 +560,11 @@ static inline int domain_type_is_si(struct dmar_domain *domain) return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY; } +static inline bool domain_use_first_level(struct dmar_domain *domain) +{ + return domain->flags & DOMAIN_FLAG_USE_FIRST_LEVEL; +} + static inline int domain_pfn_supported(struct dmar_domain *domain, unsigned long pfn) { @@ -667,11 +680,12 @@ static int domain_update_iommu_snooping(struct intel_iommu *skip) return ret; } -static int domain_update_iommu_superpage(struct intel_iommu *skip) +static int domain_update_iommu_superpage(struct dmar_domain *domain, + struct intel_iommu *skip) { struct dmar_drhd_unit *drhd; struct intel_iommu *iommu; - int mask = 0xf; + int mask = 0x3; if (!intel_iommu_superpage) { return 0; @@ -681,7 +695,13 @@ static int domain_update_iommu_superpage(struct intel_iommu *skip) rcu_read_lock(); for_each_active_iommu(iommu, drhd) { if (iommu != skip) { - mask &= cap_super_page_val(iommu->cap); + if (domain && domain_use_first_level(domain)) { + if (!cap_fl1gp_support(iommu->cap)) + mask = 0x1; + } else { + mask &= cap_super_page_val(iommu->cap); + } + if (!mask) break; } @@ -691,12 +711,47 @@ static int domain_update_iommu_superpage(struct intel_iommu *skip) return fls(mask); } +static int domain_update_device_node(struct dmar_domain *domain) +{ + struct device_domain_info *info; + int nid = NUMA_NO_NODE; + + assert_spin_locked(&device_domain_lock); + + if (list_empty(&domain->devices)) + return NUMA_NO_NODE; + + list_for_each_entry(info, &domain->devices, link) { + if (!info->dev) + continue; + + /* + * There could possibly be multiple device numa nodes as devices + * within the same domain may sit behind different IOMMUs. There + * isn't perfect answer in such situation, so we select first + * come first served policy. + */ + nid = dev_to_node(info->dev); + if (nid != NUMA_NO_NODE) + break; + } + + return nid; +} + /* Some capabilities may be different across iommus */ static void domain_update_iommu_cap(struct dmar_domain *domain) { domain_update_iommu_coherency(domain); domain->iommu_snooping = domain_update_iommu_snooping(NULL); - domain->iommu_superpage = domain_update_iommu_superpage(NULL); + domain->iommu_superpage = domain_update_iommu_superpage(domain, NULL); + + /* + * If RHSA is missing, we should default to the device numa domain + * as fall back. + */ + if (domain->nid == NUMA_NO_NODE) + domain->nid = domain_update_device_node(domain); } struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus, @@ -919,6 +974,11 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE); pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE; + if (domain_use_first_level(domain)) { + pteval |= DMA_FL_PTE_XD; + if (domain->domain.type == IOMMU_DOMAIN_DMA) + pteval |= DMA_FL_PTE_ACCESS; + } if (cmpxchg64(&pte->val, 0ULL, pteval)) /* Someone else set it while we were thinking; use theirs. */ free_pgtable_page(tmp_page); @@ -1715,6 +1775,35 @@ static void free_dmar_iommu(struct intel_iommu *iommu) #endif } +/* + * Check and return whether first level is used by default for + * DMA translation. Currently, we make it off by setting + * first_level_support = 0, and will change it to -1 after all + * map/unmap paths support first level page table. + */ +static bool first_level_by_default(void) +{ + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu; + static int first_level_support = 0; + + if (likely(first_level_support != -1)) + return first_level_support; + + first_level_support = 1; + + rcu_read_lock(); + for_each_active_iommu(iommu, drhd) { + if (!sm_supported(iommu) || !ecap_flts(iommu->ecap)) { + first_level_support = 0; + break; + } + } + rcu_read_unlock(); + + return first_level_support; +} + static struct dmar_domain *alloc_domain(int flags) { struct dmar_domain *domain; @@ -1726,6 +1815,8 @@ static struct dmar_domain *alloc_domain(int flags) memset(domain, 0, sizeof(*domain)); domain->nid = NUMA_NO_NODE; domain->flags = flags; + if (first_level_by_default()) + domain->flags |= DOMAIN_FLAG_USE_FIRST_LEVEL; domain->has_iotlb_device = false; INIT_LIST_HEAD(&domain->devices); @@ -2237,17 +2328,27 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, unsigned long sg_res = 0; unsigned int largepage_lvl = 0; unsigned long lvl_pages = 0; + u64 attr; BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1)); if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0) return -EINVAL; - prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP; + attr = prot & (DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP); + if (domain_use_first_level(domain)) { + attr |= DMA_FL_PTE_PRESENT | DMA_FL_PTE_XD; + + if (domain->domain.type == IOMMU_DOMAIN_DMA) { + attr |= DMA_FL_PTE_ACCESS; + if (prot & DMA_PTE_WRITE) + attr |= DMA_FL_PTE_DIRTY; + } + } if (!sg) { sg_res = nr_pages; - pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot; + pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | attr; } while (nr_pages > 0) { @@ -2259,7 +2360,7 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, sg_res = aligned_nrpages(sg->offset, sg->length); sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + pgoff; sg->dma_length = sg->length; - pteval = (sg_phys(sg) - pgoff) | prot; + pteval = (sg_phys(sg) - pgoff) | attr; phys_pfn = pteval >> VTD_PAGE_SHIFT; } @@ -2466,6 +2567,36 @@ dmar_search_domain_by_dev_info(int segment, int bus, int devfn) return NULL; } +static int domain_setup_first_level(struct intel_iommu *iommu, + struct dmar_domain *domain, + struct device *dev, + int pasid) +{ + int flags = PASID_FLAG_SUPERVISOR_MODE; + struct dma_pte *pgd = domain->pgd; + int agaw, level; + + /* + * Skip top levels of page tables for iommu which has + * less agaw than default. Unnecessary for PT mode. + */ + for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) { + pgd = phys_to_virt(dma_pte_addr(pgd)); + if (!dma_pte_present(pgd)) + return -ENOMEM; + } + + level = agaw_to_level(agaw); + if (level != 4 && level != 5) + return -EINVAL; + + flags |= (level == 5) ? PASID_FLAG_FL5LP : 0; + + return intel_pasid_setup_first_level(iommu, dev, (pgd_t *)pgd, pasid, + domain->iommu_did[iommu->seq_id], + flags); +} + static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, int bus, int devfn, struct device *dev, @@ -2565,6 +2696,9 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, if (hw_pass_through && domain_type_is_si(domain)) ret = intel_pasid_setup_pass_through(iommu, domain, dev, PASID_RID2PASID); + else if (domain_use_first_level(domain)) + ret = domain_setup_first_level(iommu, domain, dev, + PASID_RID2PASID); else ret = intel_pasid_setup_second_level(iommu, domain, dev, PASID_RID2PASID); @@ -2751,6 +2885,7 @@ static int __init si_domain_init(int hw) if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { domain_exit(si_domain); + si_domain = NULL; return -EFAULT; } @@ -3371,6 +3506,10 @@ free_iommu: disable_dmar_iommu(iommu); free_dmar_iommu(iommu); } + if (si_domain) { + domain_exit(si_domain); + si_domain = NULL; + } kfree(g_iommus); @@ -4465,7 +4604,7 @@ static int intel_iommu_add(struct dmar_drhd_unit *dmaru) iommu->name); return -ENXIO; } - sp = domain_update_iommu_superpage(iommu) - 1; + sp = domain_update_iommu_superpage(NULL, iommu) - 1; if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) { pr_warn("%s: Doesn't support large page.\n", iommu->name); @@ -5202,8 +5341,6 @@ static struct iommu_domain *intel_iommu_domain_alloc(unsigned type) intel_iommu_strict = 1; } - domain_update_iommu_cap(dmar_domain); - domain = &dmar_domain->domain; domain->geometry.aperture_start = 0; domain->geometry.aperture_end = @@ -5304,8 +5441,12 @@ static int aux_domain_add_dev(struct dmar_domain *domain, goto attach_failed; /* Setup the PASID entry for mediated devices: */ - ret = intel_pasid_setup_second_level(iommu, domain, dev, - domain->default_pasid); + if (domain_use_first_level(domain)) + ret = domain_setup_first_level(iommu, domain, dev, + domain->default_pasid); + else + ret = intel_pasid_setup_second_level(iommu, domain, dev, + domain->default_pasid); if (ret) goto table_failed; spin_unlock(&iommu->lock); @@ -5453,7 +5594,7 @@ static void intel_iommu_aux_detach_device(struct iommu_domain *domain, static int intel_iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t hpa, - size_t size, int iommu_prot) + size_t size, int iommu_prot, gfp_t gfp) { struct dmar_domain *dmar_domain = to_dmar_domain(domain); u64 max_addr; |