aboutsummaryrefslogtreecommitdiffstats
path: root/mm/rmap.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/rmap.c')
-rw-r--r--mm/rmap.c104
1 files changed, 70 insertions, 34 deletions
diff --git a/mm/rmap.c b/mm/rmap.c
index 1bd94ea62f7f..3c2a43938152 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -82,7 +82,8 @@ static inline struct anon_vma *anon_vma_alloc(void)
anon_vma = kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL);
if (anon_vma) {
atomic_set(&anon_vma->refcount, 1);
- anon_vma->degree = 1; /* Reference for first vma */
+ anon_vma->num_children = 0;
+ anon_vma->num_active_vmas = 0;
anon_vma->parent = anon_vma;
/*
* Initialise the anon_vma root to point to itself. If called
@@ -190,6 +191,7 @@ int __anon_vma_prepare(struct vm_area_struct *vma)
anon_vma = anon_vma_alloc();
if (unlikely(!anon_vma))
goto out_enomem_free_avc;
+ anon_vma->num_children++; /* self-parent link for new root */
allocated = anon_vma;
}
@@ -199,8 +201,7 @@ int __anon_vma_prepare(struct vm_area_struct *vma)
if (likely(!vma->anon_vma)) {
vma->anon_vma = anon_vma;
anon_vma_chain_link(vma, avc, anon_vma);
- /* vma reference or self-parent link for new root */
- anon_vma->degree++;
+ anon_vma->num_active_vmas++;
allocated = NULL;
avc = NULL;
}
@@ -279,19 +280,19 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
anon_vma_chain_link(dst, avc, anon_vma);
/*
- * Reuse existing anon_vma if its degree lower than two,
- * that means it has no vma and only one anon_vma child.
+ * Reuse existing anon_vma if it has no vma and only one
+ * anon_vma child.
*
- * Do not chose parent anon_vma, otherwise first child
- * will always reuse it. Root anon_vma is never reused:
+ * Root anon_vma is never reused:
* it has self-parent reference and at least one child.
*/
- if (!dst->anon_vma && anon_vma != src->anon_vma &&
- anon_vma->degree < 2)
+ if (!dst->anon_vma &&
+ anon_vma->num_children < 2 &&
+ anon_vma->num_active_vmas == 0)
dst->anon_vma = anon_vma;
}
if (dst->anon_vma)
- dst->anon_vma->degree++;
+ dst->anon_vma->num_active_vmas++;
unlock_anon_vma_root(root);
return 0;
@@ -341,6 +342,7 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
anon_vma = anon_vma_alloc();
if (!anon_vma)
goto out_error;
+ anon_vma->num_active_vmas++;
avc = anon_vma_chain_alloc(GFP_KERNEL);
if (!avc)
goto out_error_free_anon_vma;
@@ -361,7 +363,7 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
vma->anon_vma = anon_vma;
anon_vma_lock_write(anon_vma);
anon_vma_chain_link(vma, avc, anon_vma);
- anon_vma->parent->degree++;
+ anon_vma->parent->num_children++;
anon_vma_unlock_write(anon_vma);
return 0;
@@ -393,7 +395,7 @@ void unlink_anon_vmas(struct vm_area_struct *vma)
* to free them outside the lock.
*/
if (RB_EMPTY_ROOT(&anon_vma->rb_root.rb_root)) {
- anon_vma->parent->degree--;
+ anon_vma->parent->num_children--;
continue;
}
@@ -401,7 +403,7 @@ void unlink_anon_vmas(struct vm_area_struct *vma)
anon_vma_chain_free(avc);
}
if (vma->anon_vma)
- vma->anon_vma->degree--;
+ vma->anon_vma->num_active_vmas--;
unlock_anon_vma_root(root);
/*
@@ -412,7 +414,8 @@ void unlink_anon_vmas(struct vm_area_struct *vma)
list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
struct anon_vma *anon_vma = avc->anon_vma;
- VM_WARN_ON(anon_vma->degree);
+ VM_WARN_ON(anon_vma->num_children);
+ VM_WARN_ON(anon_vma->num_active_vmas);
put_anon_vma(anon_vma);
list_del(&avc->same_vma);
@@ -686,7 +689,6 @@ static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
*/
unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
{
- unsigned long address;
if (PageAnon(page)) {
struct anon_vma *page__anon_vma = page_anon_vma(page);
/*
@@ -696,15 +698,13 @@ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
if (!vma->anon_vma || !page__anon_vma ||
vma->anon_vma->root != page__anon_vma->root)
return -EFAULT;
- } else if (page->mapping) {
- if (!vma->vm_file || vma->vm_file->f_mapping != page->mapping)
- return -EFAULT;
- } else
+ } else if (!vma->vm_file) {
return -EFAULT;
- address = __vma_address(page, vma);
- if (unlikely(address < vma->vm_start || address >= vma->vm_end))
+ } else if (vma->vm_file->f_mapping != compound_head(page)->mapping) {
return -EFAULT;
- return address;
+ }
+
+ return vma_address(page, vma);
}
pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address)
@@ -896,7 +896,7 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,
* We have to assume the worse case ie pmd for invalidation. Note that
* the page can not be free from this function.
*/
- end = min(vma->vm_end, start + (PAGE_SIZE << compound_order(page)));
+ end = vma_address_end(page, vma);
mmu_notifier_invalidate_range_start(vma->vm_mm, start, end);
while (page_vma_mapped_walk(&pvmw)) {
@@ -1348,6 +1348,15 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
unsigned long start = address, end;
enum ttu_flags flags = (enum ttu_flags)arg;
+ /*
+ * When racing against e.g. zap_pte_range() on another cpu,
+ * in between its ptep_get_and_clear_full() and page_remove_rmap(),
+ * try_to_unmap() may return false when it is about to become true,
+ * if page table locking is skipped: use TTU_SYNC to wait for that.
+ */
+ if (flags & TTU_SYNC)
+ pvmw.flags = PVMW_SYNC;
+
/* munlock has nothing to gain from examining un-locked vmas */
if ((flags & TTU_MUNLOCK) && !(vma->vm_flags & VM_LOCKED))
return true;
@@ -1369,7 +1378,8 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
* Note that the page can not be free in this function as call of
* try_to_unmap() must hold a reference on the page.
*/
- end = min(vma->vm_end, start + (PAGE_SIZE << compound_order(page)));
+ end = PageKsm(page) ?
+ address + PAGE_SIZE : vma_address_end(page, vma);
if (PageHuge(page)) {
/*
* If sharing is possible, start and end will be adjusted
@@ -1587,7 +1597,30 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
/* MADV_FREE page check */
if (!PageSwapBacked(page)) {
- if (!PageDirty(page)) {
+ int ref_count, map_count;
+
+ /*
+ * Synchronize with gup_pte_range():
+ * - clear PTE; barrier; read refcount
+ * - inc refcount; barrier; read PTE
+ */
+ smp_mb();
+
+ ref_count = page_ref_count(page);
+ map_count = page_mapcount(page);
+
+ /*
+ * Order reads for page refcount and dirty flag
+ * (see comments in __remove_mapping()).
+ */
+ smp_rmb();
+
+ /*
+ * The only page refs must be one from isolation
+ * plus the rmap(s) (dropped by discard:).
+ */
+ if (ref_count == 1 + map_count &&
+ !PageDirty(page)) {
/* Invalidate as we cleared the pte */
mmu_notifier_invalidate_range(mm,
address, address + PAGE_SIZE);
@@ -1682,9 +1715,9 @@ static bool invalid_migration_vma(struct vm_area_struct *vma, void *arg)
return is_vma_temporary_stack(vma);
}
-static int page_mapcount_is_zero(struct page *page)
+static int page_not_mapped(struct page *page)
{
- return !total_mapcount(page);
+ return !page_mapped(page);
}
/**
@@ -1702,7 +1735,7 @@ bool try_to_unmap(struct page *page, enum ttu_flags flags)
struct rmap_walk_control rwc = {
.rmap_one = try_to_unmap_one,
.arg = (void *)flags,
- .done = page_mapcount_is_zero,
+ .done = page_not_mapped,
.anon_lock = page_lock_anon_vma_read,
};
@@ -1723,14 +1756,15 @@ bool try_to_unmap(struct page *page, enum ttu_flags flags)
else
rmap_walk(page, &rwc);
- return !page_mapcount(page) ? true : false;
+ /*
+ * When racing against e.g. zap_pte_range() on another cpu,
+ * in between its ptep_get_and_clear_full() and page_remove_rmap(),
+ * try_to_unmap() may return false when it is about to become true,
+ * if page table locking is skipped: use TTU_SYNC to wait for that.
+ */
+ return !page_mapcount(page);
}
-static int page_not_mapped(struct page *page)
-{
- return !page_mapped(page);
-};
-
/**
* try_to_munlock - try to munlock a page
* @page: the page to be munlocked
@@ -1825,6 +1859,7 @@ static void rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc,
struct vm_area_struct *vma = avc->vma;
unsigned long address = vma_address(page, vma);
+ VM_BUG_ON_VMA(address == -EFAULT, vma);
cond_resched();
if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
@@ -1879,6 +1914,7 @@ static void rmap_walk_file(struct page *page, struct rmap_walk_control *rwc,
pgoff_start, pgoff_end) {
unsigned long address = vma_address(page, vma);
+ VM_BUG_ON_VMA(address == -EFAULT, vma);
cond_resched();
if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))