aboutsummaryrefslogtreecommitdiffstats
path: root/mm/rmap.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/rmap.c')
-rw-r--r--mm/rmap.c499
1 files changed, 307 insertions, 192 deletions
diff --git a/mm/rmap.c b/mm/rmap.c
index 7a27a2b41802..f5d43edad529 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -470,7 +470,7 @@ void __init anon_vma_init(void)
/*
* Getting a lock on a stable anon_vma from a page off the LRU is tricky!
*
- * Since there is no serialization what so ever against page_remove_rmap()
+ * Since there is no serialization what so ever against folio_remove_rmap_*()
* the best this function can do is return a refcount increased anon_vma
* that might have been relevant to this page.
*
@@ -487,9 +487,15 @@ void __init anon_vma_init(void)
* [ something equivalent to page_mapped_in_vma() ].
*
* Since anon_vma's slab is SLAB_TYPESAFE_BY_RCU and we know from
- * page_remove_rmap() that the anon_vma pointer from page->mapping is valid
+ * folio_remove_rmap_*() that the anon_vma pointer from page->mapping is valid
* if there is a mapcount, we can dereference the anon_vma after observing
* those.
+ *
+ * NOTE: the caller should normally hold folio lock when calling this. If
+ * not, the caller needs to double check the anon_vma didn't change after
+ * taking the anon_vma lock for either read or write (UFFDIO_MOVE can modify it
+ * concurrently without folio lock protection). See folio_lock_anon_vma_read()
+ * which has already covered that, and comment above remap_pages().
*/
struct anon_vma *folio_get_anon_vma(struct folio *folio)
{
@@ -542,6 +548,7 @@ struct anon_vma *folio_lock_anon_vma_read(struct folio *folio,
struct anon_vma *root_anon_vma;
unsigned long anon_mapping;
+retry:
rcu_read_lock();
anon_mapping = (unsigned long)READ_ONCE(folio->mapping);
if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
@@ -553,6 +560,17 @@ struct anon_vma *folio_lock_anon_vma_read(struct folio *folio,
root_anon_vma = READ_ONCE(anon_vma->root);
if (down_read_trylock(&root_anon_vma->rwsem)) {
/*
+ * folio_move_anon_rmap() might have changed the anon_vma as we
+ * might not hold the folio lock here.
+ */
+ if (unlikely((unsigned long)READ_ONCE(folio->mapping) !=
+ anon_mapping)) {
+ up_read(&root_anon_vma->rwsem);
+ rcu_read_unlock();
+ goto retry;
+ }
+
+ /*
* If the folio is still mapped, then this anon_vma is still
* its anon_vma, and holding the mutex ensures that it will
* not go away, see anon_vma_free().
@@ -586,6 +604,18 @@ struct anon_vma *folio_lock_anon_vma_read(struct folio *folio,
rcu_read_unlock();
anon_vma_lock_read(anon_vma);
+ /*
+ * folio_move_anon_rmap() might have changed the anon_vma as we might
+ * not hold the folio lock here.
+ */
+ if (unlikely((unsigned long)READ_ONCE(folio->mapping) !=
+ anon_mapping)) {
+ anon_vma_unlock_read(anon_vma);
+ put_anon_vma(anon_vma);
+ anon_vma = NULL;
+ goto retry;
+ }
+
if (atomic_dec_and_test(&anon_vma->refcount)) {
/*
* Oops, we held the last refcount, release the lock
@@ -1127,6 +1157,48 @@ int folio_total_mapcount(struct folio *folio)
return mapcount;
}
+static __always_inline unsigned int __folio_add_rmap(struct folio *folio,
+ struct page *page, int nr_pages, enum rmap_level level,
+ int *nr_pmdmapped)
+{
+ atomic_t *mapped = &folio->_nr_pages_mapped;
+ int first, nr = 0;
+
+ __folio_rmap_sanity_checks(folio, page, nr_pages, level);
+
+ switch (level) {
+ case RMAP_LEVEL_PTE:
+ do {
+ first = atomic_inc_and_test(&page->_mapcount);
+ if (first && folio_test_large(folio)) {
+ first = atomic_inc_return_relaxed(mapped);
+ first = (first < ENTIRELY_MAPPED);
+ }
+
+ if (first)
+ nr++;
+ } while (page++, --nr_pages > 0);
+ break;
+ case RMAP_LEVEL_PMD:
+ first = atomic_inc_and_test(&folio->_entire_mapcount);
+ if (first) {
+ nr = atomic_add_return_relaxed(ENTIRELY_MAPPED, mapped);
+ if (likely(nr < ENTIRELY_MAPPED + ENTIRELY_MAPPED)) {
+ *nr_pmdmapped = folio_nr_pages(folio);
+ nr = *nr_pmdmapped - (nr & FOLIO_PAGES_MAPPED);
+ /* Raced ahead of a remove and another add? */
+ if (unlikely(nr < 0))
+ nr = 0;
+ } else {
+ /* Raced ahead of a remove of ENTIRELY_MAPPED */
+ nr = 0;
+ }
+ }
+ break;
+ }
+ return nr;
+}
+
/**
* folio_move_anon_rmap - move a folio to our anon_vma
* @folio: The folio to move to our anon_vma
@@ -1198,12 +1270,12 @@ static void __page_check_anon_rmap(struct folio *folio, struct page *page,
* The page's anon-rmap details (mapping and index) are guaranteed to
* be set up correctly at this point.
*
- * We have exclusion against page_add_anon_rmap because the caller
+ * We have exclusion against folio_add_anon_rmap_*() because the caller
* always holds the page locked.
*
- * We have exclusion against page_add_new_anon_rmap because those pages
+ * We have exclusion against folio_add_new_anon_rmap because those pages
* are initially only visible via the pagetables, and the pte is locked
- * over the call to page_add_new_anon_rmap.
+ * over the call to folio_add_new_anon_rmap.
*/
VM_BUG_ON_FOLIO(folio_anon_vma(folio)->root != vma->anon_vma->root,
folio);
@@ -1211,54 +1283,13 @@ static void __page_check_anon_rmap(struct folio *folio, struct page *page,
page);
}
-/**
- * page_add_anon_rmap - add pte mapping to an anonymous page
- * @page: the page to add the mapping to
- * @vma: the vm area in which the mapping is added
- * @address: the user virtual address mapped
- * @flags: the rmap flags
- *
- * The caller needs to hold the pte lock, and the page must be locked in
- * the anon_vma case: to serialize mapping,index checking after setting,
- * and to ensure that PageAnon is not being upgraded racily to PageKsm
- * (but PageKsm is never downgraded to PageAnon).
- */
-void page_add_anon_rmap(struct page *page, struct vm_area_struct *vma,
- unsigned long address, rmap_t flags)
+static __always_inline void __folio_add_anon_rmap(struct folio *folio,
+ struct page *page, int nr_pages, struct vm_area_struct *vma,
+ unsigned long address, rmap_t flags, enum rmap_level level)
{
- struct folio *folio = page_folio(page);
- atomic_t *mapped = &folio->_nr_pages_mapped;
- int nr = 0, nr_pmdmapped = 0;
- bool compound = flags & RMAP_COMPOUND;
- bool first;
-
- /* Is page being mapped by PTE? Is this its first map to be added? */
- if (likely(!compound)) {
- first = atomic_inc_and_test(&page->_mapcount);
- nr = first;
- if (first && folio_test_large(folio)) {
- nr = atomic_inc_return_relaxed(mapped);
- nr = (nr < COMPOUND_MAPPED);
- }
- } else if (folio_test_pmd_mappable(folio)) {
- /* That test is redundant: it's for safety or to optimize out */
-
- first = atomic_inc_and_test(&folio->_entire_mapcount);
- if (first) {
- nr = atomic_add_return_relaxed(COMPOUND_MAPPED, mapped);
- if (likely(nr < COMPOUND_MAPPED + COMPOUND_MAPPED)) {
- nr_pmdmapped = folio_nr_pages(folio);
- nr = nr_pmdmapped - (nr & FOLIO_PAGES_MAPPED);
- /* Raced ahead of a remove and another add? */
- if (unlikely(nr < 0))
- nr = 0;
- } else {
- /* Raced ahead of a remove of COMPOUND_MAPPED */
- nr = 0;
- }
- }
- }
+ int i, nr, nr_pmdmapped = 0;
+ nr = __folio_add_rmap(folio, page, nr_pages, level, &nr_pmdmapped);
if (nr_pmdmapped)
__lruvec_stat_mod_folio(folio, NR_ANON_THPS, nr_pmdmapped);
if (nr)
@@ -1272,18 +1303,34 @@ void page_add_anon_rmap(struct page *page, struct vm_area_struct *vma,
* folio->index right when not given the address of the head
* page.
*/
- VM_WARN_ON_FOLIO(folio_test_large(folio) && !compound, folio);
+ VM_WARN_ON_FOLIO(folio_test_large(folio) &&
+ level != RMAP_LEVEL_PMD, folio);
__folio_set_anon(folio, vma, address,
!!(flags & RMAP_EXCLUSIVE));
} else if (likely(!folio_test_ksm(folio))) {
__page_check_anon_rmap(folio, page, vma, address);
}
- if (flags & RMAP_EXCLUSIVE)
- SetPageAnonExclusive(page);
- /* While PTE-mapping a THP we have a PMD and a PTE mapping. */
- VM_WARN_ON_FOLIO((atomic_read(&page->_mapcount) > 0 ||
- (folio_test_large(folio) && folio_entire_mapcount(folio) > 1)) &&
- PageAnonExclusive(page), folio);
+
+ if (flags & RMAP_EXCLUSIVE) {
+ switch (level) {
+ case RMAP_LEVEL_PTE:
+ for (i = 0; i < nr_pages; i++)
+ SetPageAnonExclusive(page + i);
+ break;
+ case RMAP_LEVEL_PMD:
+ SetPageAnonExclusive(page);
+ break;
+ }
+ }
+ for (i = 0; i < nr_pages; i++) {
+ struct page *cur_page = page + i;
+
+ /* While PTE-mapping a THP we have a PMD and a PTE mapping. */
+ VM_WARN_ON_FOLIO((atomic_read(&cur_page->_mapcount) > 0 ||
+ (folio_test_large(folio) &&
+ folio_entire_mapcount(folio) > 1)) &&
+ PageAnonExclusive(cur_page), folio);
+ }
/*
* For large folio, only mlock it if it's fully mapped to VMA. It's
@@ -1296,182 +1343,200 @@ void page_add_anon_rmap(struct page *page, struct vm_area_struct *vma,
}
/**
+ * folio_add_anon_rmap_ptes - add PTE mappings to a page range of an anon folio
+ * @folio: The folio to add the mappings to
+ * @page: The first page to add
+ * @nr_pages: The number of pages which will be mapped
+ * @vma: The vm area in which the mappings are added
+ * @address: The user virtual address of the first page to map
+ * @flags: The rmap flags
+ *
+ * The page range of folio is defined by [first_page, first_page + nr_pages)
+ *
+ * The caller needs to hold the page table lock, and the page must be locked in
+ * the anon_vma case: to serialize mapping,index checking after setting,
+ * and to ensure that an anon folio is not being upgraded racily to a KSM folio
+ * (but KSM folios are never downgraded).
+ */
+void folio_add_anon_rmap_ptes(struct folio *folio, struct page *page,
+ int nr_pages, struct vm_area_struct *vma, unsigned long address,
+ rmap_t flags)
+{
+ __folio_add_anon_rmap(folio, page, nr_pages, vma, address, flags,
+ RMAP_LEVEL_PTE);
+}
+
+/**
+ * folio_add_anon_rmap_pmd - add a PMD mapping to a page range of an anon folio
+ * @folio: The folio to add the mapping to
+ * @page: The first page to add
+ * @vma: The vm area in which the mapping is added
+ * @address: The user virtual address of the first page to map
+ * @flags: The rmap flags
+ *
+ * The page range of folio is defined by [first_page, first_page + HPAGE_PMD_NR)
+ *
+ * The caller needs to hold the page table lock, and the page must be locked in
+ * the anon_vma case: to serialize mapping,index checking after setting.
+ */
+void folio_add_anon_rmap_pmd(struct folio *folio, struct page *page,
+ struct vm_area_struct *vma, unsigned long address, rmap_t flags)
+{
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ __folio_add_anon_rmap(folio, page, HPAGE_PMD_NR, vma, address, flags,
+ RMAP_LEVEL_PMD);
+#else
+ WARN_ON_ONCE(true);
+#endif
+}
+
+/**
* folio_add_new_anon_rmap - Add mapping to a new anonymous folio.
* @folio: The folio to add the mapping to.
* @vma: the vm area in which the mapping is added
* @address: the user virtual address mapped
*
- * Like page_add_anon_rmap() but must only be called on *new* folios.
+ * Like folio_add_anon_rmap_*() but must only be called on *new* folios.
* This means the inc-and-test can be bypassed.
* The folio does not have to be locked.
*
- * If the folio is large, it is accounted as a THP. As the folio
+ * If the folio is pmd-mappable, it is accounted as a THP. As the folio
* is new, it's assumed to be mapped exclusively by a single process.
*/
void folio_add_new_anon_rmap(struct folio *folio, struct vm_area_struct *vma,
unsigned long address)
{
- int nr;
+ int nr = folio_nr_pages(folio);
- VM_BUG_ON_VMA(address < vma->vm_start || address >= vma->vm_end, vma);
+ VM_WARN_ON_FOLIO(folio_test_hugetlb(folio), folio);
+ VM_BUG_ON_VMA(address < vma->vm_start ||
+ address + (nr << PAGE_SHIFT) > vma->vm_end, vma);
__folio_set_swapbacked(folio);
+ __folio_set_anon(folio, vma, address, true);
- if (likely(!folio_test_pmd_mappable(folio))) {
+ if (likely(!folio_test_large(folio))) {
/* increment count (starts at -1) */
atomic_set(&folio->_mapcount, 0);
- nr = 1;
+ SetPageAnonExclusive(&folio->page);
+ } else if (!folio_test_pmd_mappable(folio)) {
+ int i;
+
+ for (i = 0; i < nr; i++) {
+ struct page *page = folio_page(folio, i);
+
+ /* increment count (starts at -1) */
+ atomic_set(&page->_mapcount, 0);
+ SetPageAnonExclusive(page);
+ }
+
+ atomic_set(&folio->_nr_pages_mapped, nr);
} else {
/* increment count (starts at -1) */
atomic_set(&folio->_entire_mapcount, 0);
- atomic_set(&folio->_nr_pages_mapped, COMPOUND_MAPPED);
- nr = folio_nr_pages(folio);
+ atomic_set(&folio->_nr_pages_mapped, ENTIRELY_MAPPED);
+ SetPageAnonExclusive(&folio->page);
__lruvec_stat_mod_folio(folio, NR_ANON_THPS, nr);
}
__lruvec_stat_mod_folio(folio, NR_ANON_MAPPED, nr);
- __folio_set_anon(folio, vma, address, true);
- SetPageAnonExclusive(&folio->page);
}
-/**
- * folio_add_file_rmap_range - add pte mapping to page range of a folio
- * @folio: The folio to add the mapping to
- * @page: The first page to add
- * @nr_pages: The number of pages which will be mapped
- * @vma: the vm area in which the mapping is added
- * @compound: charge the page as compound or small page
- *
- * The page range of folio is defined by [first_page, first_page + nr_pages)
- *
- * The caller needs to hold the pte lock.
- */
-void folio_add_file_rmap_range(struct folio *folio, struct page *page,
- unsigned int nr_pages, struct vm_area_struct *vma,
- bool compound)
+static __always_inline void __folio_add_file_rmap(struct folio *folio,
+ struct page *page, int nr_pages, struct vm_area_struct *vma,
+ enum rmap_level level)
{
- atomic_t *mapped = &folio->_nr_pages_mapped;
- unsigned int nr_pmdmapped = 0, first;
- int nr = 0;
-
- VM_WARN_ON_FOLIO(compound && !folio_test_pmd_mappable(folio), folio);
-
- /* Is page being mapped by PTE? Is this its first map to be added? */
- if (likely(!compound)) {
- do {
- first = atomic_inc_and_test(&page->_mapcount);
- if (first && folio_test_large(folio)) {
- first = atomic_inc_return_relaxed(mapped);
- first = (first < COMPOUND_MAPPED);
- }
-
- if (first)
- nr++;
- } while (page++, --nr_pages > 0);
- } else if (folio_test_pmd_mappable(folio)) {
- /* That test is redundant: it's for safety or to optimize out */
+ int nr, nr_pmdmapped = 0;
- first = atomic_inc_and_test(&folio->_entire_mapcount);
- if (first) {
- nr = atomic_add_return_relaxed(COMPOUND_MAPPED, mapped);
- if (likely(nr < COMPOUND_MAPPED + COMPOUND_MAPPED)) {
- nr_pmdmapped = folio_nr_pages(folio);
- nr = nr_pmdmapped - (nr & FOLIO_PAGES_MAPPED);
- /* Raced ahead of a remove and another add? */
- if (unlikely(nr < 0))
- nr = 0;
- } else {
- /* Raced ahead of a remove of COMPOUND_MAPPED */
- nr = 0;
- }
- }
- }
+ VM_WARN_ON_FOLIO(folio_test_anon(folio), folio);
+ nr = __folio_add_rmap(folio, page, nr_pages, level, &nr_pmdmapped);
if (nr_pmdmapped)
__lruvec_stat_mod_folio(folio, folio_test_swapbacked(folio) ?
NR_SHMEM_PMDMAPPED : NR_FILE_PMDMAPPED, nr_pmdmapped);
if (nr)
__lruvec_stat_mod_folio(folio, NR_FILE_MAPPED, nr);
- /* See comments in page_add_anon_rmap() */
+ /* See comments in folio_add_anon_rmap_*() */
if (!folio_test_large(folio))
mlock_vma_folio(folio, vma);
}
/**
- * page_add_file_rmap - add pte mapping to a file page
- * @page: the page to add the mapping to
- * @vma: the vm area in which the mapping is added
- * @compound: charge the page as compound or small page
+ * folio_add_file_rmap_ptes - add PTE mappings to a page range of a folio
+ * @folio: The folio to add the mappings to
+ * @page: The first page to add
+ * @nr_pages: The number of pages that will be mapped using PTEs
+ * @vma: The vm area in which the mappings are added
+ *
+ * The page range of the folio is defined by [page, page + nr_pages)
*
- * The caller needs to hold the pte lock.
+ * The caller needs to hold the page table lock.
*/
-void page_add_file_rmap(struct page *page, struct vm_area_struct *vma,
- bool compound)
+void folio_add_file_rmap_ptes(struct folio *folio, struct page *page,
+ int nr_pages, struct vm_area_struct *vma)
{
- struct folio *folio = page_folio(page);
- unsigned int nr_pages;
-
- VM_WARN_ON_ONCE_PAGE(compound && !PageTransHuge(page), page);
-
- if (likely(!compound))
- nr_pages = 1;
- else
- nr_pages = folio_nr_pages(folio);
-
- folio_add_file_rmap_range(folio, page, nr_pages, vma, compound);
+ __folio_add_file_rmap(folio, page, nr_pages, vma, RMAP_LEVEL_PTE);
}
/**
- * page_remove_rmap - take down pte mapping from a page
- * @page: page to remove mapping from
- * @vma: the vm area from which the mapping is removed
- * @compound: uncharge the page as compound or small page
+ * folio_add_file_rmap_pmd - add a PMD mapping to a page range of a folio
+ * @folio: The folio to add the mapping to
+ * @page: The first page to add
+ * @vma: The vm area in which the mapping is added
*
- * The caller needs to hold the pte lock.
+ * The page range of the folio is defined by [page, page + HPAGE_PMD_NR)
+ *
+ * The caller needs to hold the page table lock.
*/
-void page_remove_rmap(struct page *page, struct vm_area_struct *vma,
- bool compound)
+void folio_add_file_rmap_pmd(struct folio *folio, struct page *page,
+ struct vm_area_struct *vma)
+{
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ __folio_add_file_rmap(folio, page, HPAGE_PMD_NR, vma, RMAP_LEVEL_PMD);
+#else
+ WARN_ON_ONCE(true);
+#endif
+}
+
+static __always_inline void __folio_remove_rmap(struct folio *folio,
+ struct page *page, int nr_pages, struct vm_area_struct *vma,
+ enum rmap_level level)
{
- struct folio *folio = page_folio(page);
atomic_t *mapped = &folio->_nr_pages_mapped;
- int nr = 0, nr_pmdmapped = 0;
- bool last;
+ int last, nr = 0, nr_pmdmapped = 0;
enum node_stat_item idx;
- VM_BUG_ON_PAGE(compound && !PageHead(page), page);
-
- /* Hugetlb pages are not counted in NR_*MAPPED */
- if (unlikely(folio_test_hugetlb(folio))) {
- /* hugetlb pages are always mapped with pmds */
- atomic_dec(&folio->_entire_mapcount);
- return;
- }
+ __folio_rmap_sanity_checks(folio, page, nr_pages, level);
- /* Is page being unmapped by PTE? Is this its last map to be removed? */
- if (likely(!compound)) {
- last = atomic_add_negative(-1, &page->_mapcount);
- nr = last;
- if (last && folio_test_large(folio)) {
- nr = atomic_dec_return_relaxed(mapped);
- nr = (nr < COMPOUND_MAPPED);
- }
- } else if (folio_test_pmd_mappable(folio)) {
- /* That test is redundant: it's for safety or to optimize out */
+ switch (level) {
+ case RMAP_LEVEL_PTE:
+ do {
+ last = atomic_add_negative(-1, &page->_mapcount);
+ if (last && folio_test_large(folio)) {
+ last = atomic_dec_return_relaxed(mapped);
+ last = (last < ENTIRELY_MAPPED);
+ }
+ if (last)
+ nr++;
+ } while (page++, --nr_pages > 0);
+ break;
+ case RMAP_LEVEL_PMD:
last = atomic_add_negative(-1, &folio->_entire_mapcount);
if (last) {
- nr = atomic_sub_return_relaxed(COMPOUND_MAPPED, mapped);
- if (likely(nr < COMPOUND_MAPPED)) {
+ nr = atomic_sub_return_relaxed(ENTIRELY_MAPPED, mapped);
+ if (likely(nr < ENTIRELY_MAPPED)) {
nr_pmdmapped = folio_nr_pages(folio);
nr = nr_pmdmapped - (nr & FOLIO_PAGES_MAPPED);
/* Raced ahead of another remove and an add? */
if (unlikely(nr < 0))
nr = 0;
} else {
- /* An add of COMPOUND_MAPPED raced ahead */
+ /* An add of ENTIRELY_MAPPED raced ahead */
nr = 0;
}
}
+ break;
}
if (nr_pmdmapped) {
@@ -1488,18 +1553,18 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma,
__lruvec_stat_mod_folio(folio, idx, -nr);
/*
- * Queue anon THP for deferred split if at least one
+ * Queue anon large folio for deferred split if at least one
* page of the folio is unmapped and at least one page
* is still mapped.
*/
- if (folio_test_pmd_mappable(folio) && folio_test_anon(folio))
- if (!compound || nr < nr_pmdmapped)
+ if (folio_test_large(folio) && folio_test_anon(folio))
+ if (level == RMAP_LEVEL_PTE || nr < nr_pmdmapped)
deferred_split_folio(folio);
}
/*
* It would be tidy to reset folio_test_anon mapping when fully
- * unmapped, but that might overwrite a racing page_add_anon_rmap
+ * unmapped, but that might overwrite a racing folio_add_anon_rmap_*()
* which increments mapcount after us but sets mapping before us:
* so leave the reset to free_pages_prepare, and remember that
* it's only reliable while mapped.
@@ -1508,6 +1573,43 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma,
munlock_vma_folio(folio, vma);
}
+/**
+ * folio_remove_rmap_ptes - remove PTE mappings from a page range of a folio
+ * @folio: The folio to remove the mappings from
+ * @page: The first page to remove
+ * @nr_pages: The number of pages that will be removed from the mapping
+ * @vma: The vm area from which the mappings are removed
+ *
+ * The page range of the folio is defined by [page, page + nr_pages)
+ *
+ * The caller needs to hold the page table lock.
+ */
+void folio_remove_rmap_ptes(struct folio *folio, struct page *page,
+ int nr_pages, struct vm_area_struct *vma)
+{
+ __folio_remove_rmap(folio, page, nr_pages, vma, RMAP_LEVEL_PTE);
+}
+
+/**
+ * folio_remove_rmap_pmd - remove a PMD mapping from a page range of a folio
+ * @folio: The folio to remove the mapping from
+ * @page: The first page to remove
+ * @vma: The vm area from which the mapping is removed
+ *
+ * The page range of the folio is defined by [page, page + HPAGE_PMD_NR)
+ *
+ * The caller needs to hold the page table lock.
+ */
+void folio_remove_rmap_pmd(struct folio *folio, struct page *page,
+ struct vm_area_struct *vma)
+{
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ __folio_remove_rmap(folio, page, HPAGE_PMD_NR, vma, RMAP_LEVEL_PMD);
+#else
+ WARN_ON_ONCE(true);
+#endif
+}
+
/*
* @arg: enum ttu_flags will be passed to this argument
*/
@@ -1526,7 +1628,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
/*
* When racing against e.g. zap_pte_range() on another cpu,
- * in between its ptep_get_and_clear_full() and page_remove_rmap(),
+ * in between its ptep_get_and_clear_full() and folio_remove_rmap_*(),
* try_to_unmap() may return before page_mapped() has become false,
* if page table locking is skipped: use TTU_SYNC to wait for that.
*/
@@ -1764,9 +1866,9 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
break;
}
- /* See page_try_share_anon_rmap(): clear PTE first. */
+ /* See folio_try_share_anon_rmap(): clear PTE first. */
if (anon_exclusive &&
- page_try_share_anon_rmap(subpage)) {
+ folio_try_share_anon_rmap_pte(folio, subpage)) {
swap_free(entry);
set_pte_at(mm, address, pvmw.pte, pteval);
ret = false;
@@ -1804,7 +1906,10 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
dec_mm_counter(mm, mm_counter_file(&folio->page));
}
discard:
- page_remove_rmap(subpage, vma, folio_test_hugetlb(folio));
+ if (unlikely(folio_test_hugetlb(folio)))
+ hugetlb_remove_rmap(folio);
+ else
+ folio_remove_rmap_pte(folio, subpage, vma);
if (vma->vm_flags & VM_LOCKED)
mlock_drain_local();
folio_put(folio);
@@ -1872,7 +1977,7 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
/*
* When racing against e.g. zap_pte_range() on another cpu,
- * in between its ptep_get_and_clear_full() and page_remove_rmap(),
+ * in between its ptep_get_and_clear_full() and folio_remove_rmap_*(),
* try_to_migrate() may return before page_mapped() has become false,
* if page table locking is skipped: use TTU_SYNC to wait for that.
*/
@@ -2037,7 +2142,8 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
pte_t swp_pte;
if (anon_exclusive)
- BUG_ON(page_try_share_anon_rmap(subpage));
+ WARN_ON_ONCE(folio_try_share_anon_rmap_pte(folio,
+ subpage));
/*
* Store the pfn of the page in a special migration
@@ -2108,14 +2214,19 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
VM_BUG_ON_PAGE(pte_write(pteval) && folio_test_anon(folio) &&
!anon_exclusive, subpage);
- /* See page_try_share_anon_rmap(): clear PTE first. */
- if (anon_exclusive &&
- page_try_share_anon_rmap(subpage)) {
- if (folio_test_hugetlb(folio))
+ /* See folio_try_share_anon_rmap_pte(): clear PTE first. */
+ if (folio_test_hugetlb(folio)) {
+ if (anon_exclusive &&
+ hugetlb_try_share_anon_rmap(folio)) {
set_huge_pte_at(mm, address, pvmw.pte,
pteval, hsz);
- else
- set_pte_at(mm, address, pvmw.pte, pteval);
+ ret = false;
+ page_vma_mapped_walk_done(&pvmw);
+ break;
+ }
+ } else if (anon_exclusive &&
+ folio_try_share_anon_rmap_pte(folio, subpage)) {
+ set_pte_at(mm, address, pvmw.pte, pteval);
ret = false;
page_vma_mapped_walk_done(&pvmw);
break;
@@ -2157,7 +2268,10 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
*/
}
- page_remove_rmap(subpage, vma, folio_test_hugetlb(folio));
+ if (unlikely(folio_test_hugetlb(folio)))
+ hugetlb_remove_rmap(folio);
+ else
+ folio_remove_rmap_pte(folio, subpage, vma);
if (vma->vm_flags & VM_LOCKED)
mlock_drain_local();
folio_put(folio);
@@ -2296,7 +2410,7 @@ static bool page_make_device_exclusive_one(struct folio *folio,
* There is a reference on the page for the swap entry which has
* been removed, so shouldn't take another.
*/
- page_remove_rmap(subpage, vma, false);
+ folio_remove_rmap_pte(folio, subpage, vma);
}
mmu_notifier_invalidate_range_end(&range);
@@ -2580,12 +2694,11 @@ void rmap_walk_locked(struct folio *folio, struct rmap_walk_control *rwc)
* The following two functions are for anonymous (private mapped) hugepages.
* Unlike common anonymous pages, anonymous hugepages have no accounting code
* and no lru code, because we handle hugepages differently from common pages.
- *
- * RMAP_COMPOUND is ignored.
*/
-void hugepage_add_anon_rmap(struct folio *folio, struct vm_area_struct *vma,
- unsigned long address, rmap_t flags)
+void hugetlb_add_anon_rmap(struct folio *folio, struct vm_area_struct *vma,
+ unsigned long address, rmap_t flags)
{
+ VM_WARN_ON_FOLIO(!folio_test_hugetlb(folio), folio);
VM_WARN_ON_FOLIO(!folio_test_anon(folio), folio);
atomic_inc(&folio->_entire_mapcount);
@@ -2595,9 +2708,11 @@ void hugepage_add_anon_rmap(struct folio *folio, struct vm_area_struct *vma,
PageAnonExclusive(&folio->page), folio);
}
-void hugepage_add_new_anon_rmap(struct folio *folio,
- struct vm_area_struct *vma, unsigned long address)
+void hugetlb_add_new_anon_rmap(struct folio *folio,
+ struct vm_area_struct *vma, unsigned long address)
{
+ VM_WARN_ON_FOLIO(!folio_test_hugetlb(folio), folio);
+
BUG_ON(address < vma->vm_start || address >= vma->vm_end);
/* increment count (starts at -1) */
atomic_set(&folio->_entire_mapcount, 0);