diff options
Diffstat (limited to 'include/linux/pgtable.h')
-rw-r--r-- | include/linux/pgtable.h | 162 |
1 files changed, 73 insertions, 89 deletions
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index a108b60a6962..c5a51481bbb9 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -165,6 +165,13 @@ static inline pte_t *virt_to_kpte(unsigned long vaddr) return pmd_none(*pmd) ? NULL : pte_offset_kernel(pmd, vaddr); } +#ifndef pmd_young +static inline int pmd_young(pmd_t pmd) +{ + return 0; +} +#endif + #ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS extern int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, pte_t *ptep, @@ -260,6 +267,17 @@ static inline int pmdp_clear_flush_young(struct vm_area_struct *vma, #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif +#ifndef arch_has_hw_nonleaf_pmd_young +/* + * Return whether the accessed bit in non-leaf PMD entries is supported on the + * local CPU. + */ +static inline bool arch_has_hw_nonleaf_pmd_young(void) +{ + return IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG); +} +#endif + #ifndef arch_has_hw_pte_young /* * Return whether the accessed bit is supported on the local CPU. @@ -291,24 +309,28 @@ static inline void ptep_clear(struct mm_struct *mm, unsigned long addr, ptep_get_and_clear(mm, addr, ptep); } -#ifndef __HAVE_ARCH_PTEP_GET +#ifndef ptep_get static inline pte_t ptep_get(pte_t *ptep) { return READ_ONCE(*ptep); } #endif -#ifdef CONFIG_GUP_GET_PTE_LOW_HIGH +#ifndef pmdp_get +static inline pmd_t pmdp_get(pmd_t *pmdp) +{ + return READ_ONCE(*pmdp); +} +#endif + +#ifdef CONFIG_GUP_GET_PXX_LOW_HIGH /* - * WARNING: only to be used in the get_user_pages_fast() implementation. - * - * With get_user_pages_fast(), we walk down the pagetables without taking any - * locks. For this we would like to load the pointers atomically, but sometimes - * that is not possible (e.g. without expensive cmpxchg8b on x86_32 PAE). What - * we do have is the guarantee that a PTE will only either go from not present - * to present, or present to not present or both -- it will not switch to a - * completely different present page without a TLB flush in between; something - * that we are blocking by holding interrupts off. + * For walking the pagetables without holding any locks. Some architectures + * (eg x86-32 PAE) cannot load the entries atomically without using expensive + * instructions. We are guaranteed that a PTE will only either go from not + * present to present, or present to not present -- it will not switch to a + * completely different present page without a TLB flush inbetween; which we + * are blocking by holding interrupts off. * * Setting ptes from not present to present goes: * @@ -343,15 +365,42 @@ static inline pte_t ptep_get_lockless(pte_t *ptep) return pte; } -#else /* CONFIG_GUP_GET_PTE_LOW_HIGH */ +#define ptep_get_lockless ptep_get_lockless + +#if CONFIG_PGTABLE_LEVELS > 2 +static inline pmd_t pmdp_get_lockless(pmd_t *pmdp) +{ + pmd_t pmd; + + do { + pmd.pmd_low = pmdp->pmd_low; + smp_rmb(); + pmd.pmd_high = pmdp->pmd_high; + smp_rmb(); + } while (unlikely(pmd.pmd_low != pmdp->pmd_low)); + + return pmd; +} +#define pmdp_get_lockless pmdp_get_lockless +#endif /* CONFIG_PGTABLE_LEVELS > 2 */ +#endif /* CONFIG_GUP_GET_PXX_LOW_HIGH */ + /* * We require that the PTE can be read atomically. */ +#ifndef ptep_get_lockless static inline pte_t ptep_get_lockless(pte_t *ptep) { return ptep_get(ptep); } -#endif /* CONFIG_GUP_GET_PTE_LOW_HIGH */ +#endif + +#ifndef pmdp_get_lockless +static inline pmd_t pmdp_get_lockless(pmd_t *pmdp) +{ + return pmdp_get(pmdp); +} +#endif #ifdef CONFIG_TRANSPARENT_HUGEPAGE #ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR @@ -407,9 +456,7 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long address, pte_t *ptep, int full) { - pte_t pte; - pte = ptep_get_and_clear(mm, address, ptep); - return pte; + return ptep_get_and_clear(mm, address, ptep); } #endif @@ -485,30 +532,6 @@ static inline pte_t pte_sw_mkyoung(pte_t pte) #define pte_sw_mkyoung pte_sw_mkyoung #endif -#ifndef pte_savedwrite -#define pte_savedwrite pte_write -#endif - -#ifndef pte_mk_savedwrite -#define pte_mk_savedwrite pte_mkwrite -#endif - -#ifndef pte_clear_savedwrite -#define pte_clear_savedwrite pte_wrprotect -#endif - -#ifndef pmd_savedwrite -#define pmd_savedwrite pmd_write -#endif - -#ifndef pmd_mk_savedwrite -#define pmd_mk_savedwrite pmd_mkwrite -#endif - -#ifndef pmd_clear_savedwrite -#define pmd_clear_savedwrite pmd_wrprotect -#endif - #ifndef __HAVE_ARCH_PMDP_SET_WRPROTECT #ifdef CONFIG_TRANSPARENT_HUGEPAGE static inline void pmdp_set_wrprotect(struct mm_struct *mm, @@ -794,7 +817,7 @@ static inline void arch_swap_restore(swp_entry_t entry, struct folio *folio) #endif #ifndef flush_tlb_fix_spurious_fault -#define flush_tlb_fix_spurious_fault(vma, address) flush_tlb_page(vma, address) +#define flush_tlb_fix_spurious_fault(vma, address, ptep) flush_tlb_page(vma, address) #endif /* @@ -1041,35 +1064,6 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) #define arch_start_context_switch(prev) do {} while (0) #endif -/* - * When replacing an anonymous page by a real (!non) swap entry, we clear - * PG_anon_exclusive from the page and instead remember whether the flag was - * set in the swp pte. During fork(), we have to mark the entry as !exclusive - * (possibly shared). On swapin, we use that information to restore - * PG_anon_exclusive, which is very helpful in cases where we might have - * additional (e.g., FOLL_GET) references on a page and wouldn't be able to - * detect exclusivity. - * - * These functions don't apply to non-swap entries (e.g., migration, hwpoison, - * ...). - */ -#ifndef __HAVE_ARCH_PTE_SWP_EXCLUSIVE -static inline pte_t pte_swp_mkexclusive(pte_t pte) -{ - return pte; -} - -static inline int pte_swp_exclusive(pte_t pte) -{ - return false; -} - -static inline pte_t pte_swp_clear_exclusive(pte_t pte) -{ - return pte; -} -#endif - #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY #ifndef CONFIG_ARCH_ENABLE_THP_MIGRATION static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd) @@ -1191,14 +1185,16 @@ static inline int track_pfn_copy(struct vm_area_struct *vma) * can be for the entire vma (in which case pfn, size are zero). */ static inline void untrack_pfn(struct vm_area_struct *vma, - unsigned long pfn, unsigned long size) + unsigned long pfn, unsigned long size, + bool mm_wr_locked) { } /* - * untrack_pfn_moved is called while mremapping a pfnmap for a new region. + * untrack_pfn_clear is called while mremapping a pfnmap for a new region + * or fails to copy pgtable during duplicate vm area. */ -static inline void untrack_pfn_moved(struct vm_area_struct *vma) +static inline void untrack_pfn_clear(struct vm_area_struct *vma) { } #else @@ -1209,8 +1205,8 @@ extern void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot, pfn_t pfn); extern int track_pfn_copy(struct vm_area_struct *vma); extern void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn, - unsigned long size); -extern void untrack_pfn_moved(struct vm_area_struct *vma); + unsigned long size, bool mm_wr_locked); +extern void untrack_pfn_clear(struct vm_area_struct *vma); #endif #ifdef CONFIG_MMU @@ -1321,18 +1317,6 @@ static inline int pud_trans_unstable(pud_t *pud) #endif } -#ifndef pmd_read_atomic -static inline pmd_t pmd_read_atomic(pmd_t *pmdp) -{ - /* - * Depend on compiler for an atomic pmd read. NOTE: this is - * only going to work, if the pmdval_t isn't larger than - * an unsigned long. - */ - return *pmdp; -} -#endif - #ifndef arch_needs_pgtable_deposit #define arch_needs_pgtable_deposit() (false) #endif @@ -1359,13 +1343,13 @@ static inline pmd_t pmd_read_atomic(pmd_t *pmdp) */ static inline int pmd_none_or_trans_huge_or_clear_bad(pmd_t *pmd) { - pmd_t pmdval = pmd_read_atomic(pmd); + pmd_t pmdval = pmdp_get_lockless(pmd); /* * The barrier will stabilize the pmdval in a register or on * the stack so that it will stop changing under the code. * * When CONFIG_TRANSPARENT_HUGEPAGE=y on x86 32bit PAE, - * pmd_read_atomic is allowed to return a not atomic pmdval + * pmdp_get_lockless is allowed to return a not atomic pmdval * (for example pointing to an hugepage that has never been * mapped in the pmd). The below checks will only care about * the low part of the pmd with 32bit PAE x86 anyway, with the |