diff options
Diffstat (limited to 'mm/hmm.c')
-rw-r--r-- | mm/hmm.c | 67 |
1 files changed, 43 insertions, 24 deletions
@@ -26,6 +26,8 @@ #include <linux/mmu_notifier.h> #include <linux/memory_hotplug.h> +#include "internal.h" + struct hmm_vma_walk { struct hmm_range *range; unsigned long last; @@ -210,14 +212,6 @@ int hmm_vma_handle_pmd(struct mm_walk *walk, unsigned long addr, unsigned long end, unsigned long hmm_pfns[], pmd_t pmd); #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ -static inline bool hmm_is_device_private_entry(struct hmm_range *range, - swp_entry_t entry) -{ - return is_device_private_entry(entry) && - device_private_entry_to_page(entry)->pgmap->owner == - range->dev_private_owner; -} - static inline unsigned long pte_to_hmm_pfn_flags(struct hmm_range *range, pte_t pte) { @@ -234,10 +228,10 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr, struct hmm_range *range = hmm_vma_walk->range; unsigned int required_fault; unsigned long cpu_flags; - pte_t pte = *ptep; + pte_t pte = ptep_get(ptep); uint64_t pfn_req_flags = *hmm_pfn; - if (pte_none(pte)) { + if (pte_none_mostly(pte)) { required_fault = hmm_pte_need_fault(hmm_vma_walk, pfn_req_flags, 0); if (required_fault) @@ -250,15 +244,16 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr, swp_entry_t entry = pte_to_swp_entry(pte); /* - * Never fault in device private pages, but just report - * the PFN even if not present. + * Don't fault in device private pages owned by the caller, + * just report the PFN. */ - if (hmm_is_device_private_entry(range, entry)) { + if (is_device_private_entry(entry) && + pfn_swap_entry_to_page(entry)->pgmap->owner == + range->dev_private_owner) { cpu_flags = HMM_PFN_VALID; - if (is_write_device_private_entry(entry)) + if (is_writable_device_private_entry(entry)) cpu_flags |= HMM_PFN_WRITE; - *hmm_pfn = device_private_entry_to_pfn(entry) | - cpu_flags; + *hmm_pfn = swp_offset_pfn(entry) | cpu_flags; return 0; } @@ -272,6 +267,12 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr, if (!non_swap_entry(entry)) goto fault; + if (is_device_private_entry(entry)) + goto fault; + + if (is_device_exclusive_entry(entry)) + goto fault; + if (is_migration_entry(entry)) { pte_unmap(ptep); hmm_vma_walk->last = addr; @@ -291,10 +292,14 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr, goto fault; /* + * Bypass devmap pte such as DAX page when all pfn requested + * flags(pfn_req_flags) are fulfilled. * Since each architecture defines a struct page for the zero page, just * fall through and treat it like a normal page. */ - if (pte_special(pte) && !is_zero_pfn(pte_pfn(pte))) { + if (!vm_normal_page(walk->vma, addr, pte) && + !pte_devmap(pte) && + !is_zero_pfn(pte_pfn(pte))) { if (hmm_pte_need_fault(hmm_vma_walk, pfn_req_flags, 0)) { pte_unmap(ptep); return -EFAULT; @@ -327,7 +332,7 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp, pmd_t pmd; again: - pmd = READ_ONCE(*pmdp); + pmd = pmdp_get_lockless(pmdp); if (pmd_none(pmd)) return hmm_vma_walk_hole(start, end, -1, walk); @@ -356,8 +361,7 @@ again: * huge or device mapping one and compute corresponding pfn * values. */ - pmd = pmd_read_atomic(pmdp); - barrier(); + pmd = pmdp_get_lockless(pmdp); if (!pmd_devmap(pmd) && !pmd_trans_huge(pmd)) goto again; @@ -377,6 +381,8 @@ again: } ptep = pte_offset_map(pmdp, addr); + if (!ptep) + goto again; for (; addr < end; addr += PAGE_SIZE, ptep++, hmm_pfns++) { int r; @@ -409,7 +415,6 @@ static int hmm_vma_walk_pud(pud_t *pudp, unsigned long start, unsigned long end, struct hmm_range *range = hmm_vma_walk->range; unsigned long addr = start; pud_t pud; - int ret = 0; spinlock_t *ptl = pud_trans_huge_lock(pudp, walk->vma); if (!ptl) @@ -458,7 +463,7 @@ static int hmm_vma_walk_pud(pud_t *pudp, unsigned long start, unsigned long end, out_unlock: spin_unlock(ptl); - return ret; + return 0; } #else #define hmm_vma_walk_pud NULL @@ -489,8 +494,21 @@ static int hmm_vma_walk_hugetlb_entry(pte_t *pte, unsigned long hmask, required_fault = hmm_pte_need_fault(hmm_vma_walk, pfn_req_flags, cpu_flags); if (required_fault) { + int ret; + spin_unlock(ptl); - return hmm_vma_fault(addr, end, required_fault, walk); + hugetlb_vma_unlock_read(vma); + /* + * Avoid deadlock: drop the vma lock before calling + * hmm_vma_fault(), which will itself potentially take and + * drop the vma lock. This is also correct from a + * protection point of view, because there is no further + * use here of either pte or ptl after dropping the vma + * lock. + */ + ret = hmm_vma_fault(addr, end, required_fault, walk); + hugetlb_vma_lock_read(vma); + return ret; } pfn = pte_pfn(entry) + ((start & ~hmask) >> PAGE_SHIFT); @@ -511,7 +529,7 @@ static int hmm_vma_walk_test(unsigned long start, unsigned long end, struct hmm_range *range = hmm_vma_walk->range; struct vm_area_struct *vma = walk->vma; - if (!(vma->vm_flags & (VM_IO | VM_PFNMAP | VM_MIXEDMAP)) && + if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)) && vma->vm_flags & VM_READ) return 0; @@ -544,6 +562,7 @@ static const struct mm_walk_ops hmm_walk_ops = { .pte_hole = hmm_vma_walk_hole, .hugetlb_entry = hmm_vma_walk_hugetlb_entry, .test_walk = hmm_vma_walk_test, + .walk_lock = PGWALK_RDLOCK, }; /** |