summaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/dmapool.c2
-rw-r--r--mm/filemap.c34
-rw-r--r--mm/hugetlb.c8
-rw-r--r--mm/kasan/kasan.c3
-rw-r--r--mm/memcontrol.c32
-rw-r--r--mm/memory.c2
-rw-r--r--mm/migrate.c14
-rw-r--r--mm/mmap.c11
-rw-r--r--mm/readahead.c8
-rw-r--r--mm/slab.c13
-rw-r--r--mm/vmscan.c2
-rw-r--r--mm/vmstat.c7
12 files changed, 80 insertions, 56 deletions
diff --git a/mm/dmapool.c b/mm/dmapool.c
index 71a8998cd03a..312a716fa14c 100644
--- a/mm/dmapool.c
+++ b/mm/dmapool.c
@@ -394,7 +394,7 @@ static struct dma_page *pool_find_page(struct dma_pool *pool, dma_addr_t dma)
list_for_each_entry(page, &pool->page_list, page_list) {
if (dma < page->dma)
continue;
- if (dma < (page->dma + pool->allocation))
+ if ((dma - page->dma) < pool->allocation)
return page;
}
return NULL;
diff --git a/mm/filemap.c b/mm/filemap.c
index 72940fb38666..1cc5467cf36c 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2473,6 +2473,21 @@ ssize_t generic_perform_write(struct file *file,
iov_iter_count(i));
again:
+ /*
+ * Bring in the user page that we will copy from _first_.
+ * Otherwise there's a nasty deadlock on copying from the
+ * same page as we're writing to, without it being marked
+ * up-to-date.
+ *
+ * Not only is this an optimisation, but it is also required
+ * to check that the address is actually valid, when atomic
+ * usercopies are used, below.
+ */
+ if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
+ status = -EFAULT;
+ break;
+ }
+
status = a_ops->write_begin(file, mapping, pos, bytes, flags,
&page, &fsdata);
if (unlikely(status < 0))
@@ -2480,17 +2495,8 @@ again:
if (mapping_writably_mapped(mapping))
flush_dcache_page(page);
- /*
- * 'page' is now locked. If we are trying to copy from a
- * mapping of 'page' in userspace, the copy might fault and
- * would need PageUptodate() to complete. But, page can not be
- * made Uptodate without acquiring the page lock, which we hold.
- * Deadlock. Avoid with pagefault_disable(). Fix up below with
- * iov_iter_fault_in_readable().
- */
- pagefault_disable();
+
copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes);
- pagefault_enable();
flush_dcache_page(page);
status = a_ops->write_end(file, mapping, pos, bytes, copied,
@@ -2513,14 +2519,6 @@ again:
*/
bytes = min_t(unsigned long, PAGE_CACHE_SIZE - offset,
iov_iter_single_seg_count(i));
- /*
- * This is the fallback to recover if the copy from
- * userspace above faults.
- */
- if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
- status = -EFAULT;
- break;
- }
goto again;
}
pos += copied;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 999fb0aef8f1..9cc773483624 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3202,6 +3202,14 @@ static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
continue;
/*
+ * Shared VMAs have their own reserves and do not affect
+ * MAP_PRIVATE accounting but it is possible that a shared
+ * VMA is using the same page so check and skip such VMAs.
+ */
+ if (iter_vma->vm_flags & VM_MAYSHARE)
+ continue;
+
+ /*
* Unmap the page from other VMAs without their own reserves.
* They get marked to be SIGKILLed if they fault in these
* areas. This is because a future no-page fault on this VMA
diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c
index 7b28e9cdf1c7..8da211411b57 100644
--- a/mm/kasan/kasan.c
+++ b/mm/kasan/kasan.c
@@ -135,12 +135,11 @@ static __always_inline bool memory_is_poisoned_16(unsigned long addr)
if (unlikely(*shadow_addr)) {
u16 shadow_first_bytes = *(u16 *)shadow_addr;
- s8 last_byte = (addr + 15) & KASAN_SHADOW_MASK;
if (unlikely(shadow_first_bytes))
return true;
- if (likely(!last_byte))
+ if (likely(IS_ALIGNED(addr, 8)))
return false;
return memory_is_poisoned_1(addr + 15);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 6ddaeba34e09..d9b5c817dce8 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -644,12 +644,14 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
}
/*
+ * Return page count for single (non recursive) @memcg.
+ *
* Implementation Note: reading percpu statistics for memcg.
*
* Both of vmstat[] and percpu_counter has threshold and do periodic
* synchronization to implement "quick" read. There are trade-off between
* reading cost and precision of value. Then, we may have a chance to implement
- * a periodic synchronizion of counter in memcg's counter.
+ * a periodic synchronization of counter in memcg's counter.
*
* But this _read() function is used for user interface now. The user accounts
* memory usage by memory cgroup and he _always_ requires exact value because
@@ -659,17 +661,24 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
*
* If there are kernel internal actions which can make use of some not-exact
* value, and reading all cpu value can be performance bottleneck in some
- * common workload, threashold and synchonization as vmstat[] should be
+ * common workload, threshold and synchronization as vmstat[] should be
* implemented.
*/
-static long mem_cgroup_read_stat(struct mem_cgroup *memcg,
- enum mem_cgroup_stat_index idx)
+static unsigned long
+mem_cgroup_read_stat(struct mem_cgroup *memcg, enum mem_cgroup_stat_index idx)
{
long val = 0;
int cpu;
+ /* Per-cpu values can be negative, use a signed accumulator */
for_each_possible_cpu(cpu)
val += per_cpu(memcg->stat->count[idx], cpu);
+ /*
+ * Summing races with updates, so val may be negative. Avoid exposing
+ * transient negative values.
+ */
+ if (val < 0)
+ val = 0;
return val;
}
@@ -1254,7 +1263,7 @@ void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p)
for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) {
if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account)
continue;
- pr_cont(" %s:%ldKB", mem_cgroup_stat_names[i],
+ pr_cont(" %s:%luKB", mem_cgroup_stat_names[i],
K(mem_cgroup_read_stat(iter, i)));
}
@@ -2819,14 +2828,11 @@ static unsigned long tree_stat(struct mem_cgroup *memcg,
enum mem_cgroup_stat_index idx)
{
struct mem_cgroup *iter;
- long val = 0;
+ unsigned long val = 0;
- /* Per-cpu values can be negative, use a signed accumulator */
for_each_mem_cgroup_tree(iter, memcg)
val += mem_cgroup_read_stat(iter, idx);
- if (val < 0) /* race ? */
- val = 0;
return val;
}
@@ -3169,7 +3175,7 @@ static int memcg_stat_show(struct seq_file *m, void *v)
for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) {
if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account)
continue;
- seq_printf(m, "%s %ld\n", mem_cgroup_stat_names[i],
+ seq_printf(m, "%s %lu\n", mem_cgroup_stat_names[i],
mem_cgroup_read_stat(memcg, i) * PAGE_SIZE);
}
@@ -3194,13 +3200,13 @@ static int memcg_stat_show(struct seq_file *m, void *v)
(u64)memsw * PAGE_SIZE);
for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) {
- long long val = 0;
+ unsigned long long val = 0;
if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account)
continue;
for_each_mem_cgroup_tree(mi, memcg)
val += mem_cgroup_read_stat(mi, i) * PAGE_SIZE;
- seq_printf(m, "total_%s %lld\n", mem_cgroup_stat_names[i], val);
+ seq_printf(m, "total_%s %llu\n", mem_cgroup_stat_names[i], val);
}
for (i = 0; i < MEM_CGROUP_EVENTS_NSTATS; i++) {
@@ -3381,6 +3387,7 @@ static int __mem_cgroup_usage_register_event(struct mem_cgroup *memcg,
ret = page_counter_memparse(args, "-1", &threshold);
if (ret)
return ret;
+ threshold <<= PAGE_SHIFT;
mutex_lock(&memcg->thresholds_lock);
@@ -4179,7 +4186,6 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
if (memcg_wb_domain_init(memcg, GFP_KERNEL))
goto out_free_stat;
- spin_lock_init(&memcg->pcp_counter_lock);
return memcg;
out_free_stat:
diff --git a/mm/memory.c b/mm/memory.c
index 9cb27470fee9..deb679c31f2a 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2426,6 +2426,8 @@ void unmap_mapping_range(struct address_space *mapping,
if (details.last_index < details.first_index)
details.last_index = ULONG_MAX;
+
+ /* DAX uses i_mmap_lock to serialise file truncate vs page fault */
i_mmap_lock_write(mapping);
if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap)))
unmap_mapping_range_tree(&mapping->i_mmap, &details);
diff --git a/mm/migrate.c b/mm/migrate.c
index c3cb566af3e2..842ecd7aaf7f 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -740,6 +740,15 @@ static int move_to_new_page(struct page *newpage, struct page *page,
if (PageSwapBacked(page))
SetPageSwapBacked(newpage);
+ /*
+ * Indirectly called below, migrate_page_copy() copies PG_dirty and thus
+ * needs newpage's memcg set to transfer memcg dirty page accounting.
+ * So perform memcg migration in two steps:
+ * 1. set newpage->mem_cgroup (here)
+ * 2. clear page->mem_cgroup (below)
+ */
+ set_page_memcg(newpage, page_memcg(page));
+
mapping = page_mapping(page);
if (!mapping)
rc = migrate_page(mapping, newpage, page, mode);
@@ -756,9 +765,10 @@ static int move_to_new_page(struct page *newpage, struct page *page,
rc = fallback_migrate_page(mapping, newpage, page, mode);
if (rc != MIGRATEPAGE_SUCCESS) {
+ set_page_memcg(newpage, NULL);
newpage->mapping = NULL;
} else {
- mem_cgroup_migrate(page, newpage, false);
+ set_page_memcg(page, NULL);
if (page_was_mapped)
remove_migration_ptes(page, newpage);
page->mapping = NULL;
@@ -1075,7 +1085,7 @@ out:
if (rc != MIGRATEPAGE_SUCCESS && put_new_page)
put_new_page(new_hpage, private);
else
- put_page(new_hpage);
+ putback_active_hugepage(new_hpage);
if (result) {
if (rc)
diff --git a/mm/mmap.c b/mm/mmap.c
index 971dd2cb77d2..79bcc9f92e48 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -612,8 +612,6 @@ static unsigned long count_vma_pages_range(struct mm_struct *mm,
void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
struct rb_node **rb_link, struct rb_node *rb_parent)
{
- WARN_ONCE(vma->vm_file && !vma->vm_ops, "missing vma->vm_ops");
-
/* Update tracking information for the gap following the new vma. */
if (vma->vm_next)
vma_gap_update(vma->vm_next);
@@ -1492,13 +1490,14 @@ SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg)
int vma_wants_writenotify(struct vm_area_struct *vma)
{
vm_flags_t vm_flags = vma->vm_flags;
+ const struct vm_operations_struct *vm_ops = vma->vm_ops;
/* If it was private or non-writable, the write bit is already clear */
if ((vm_flags & (VM_WRITE|VM_SHARED)) != ((VM_WRITE|VM_SHARED)))
return 0;
/* The backer wishes to know when pages are first written to? */
- if (vma->vm_ops && vma->vm_ops->page_mkwrite)
+ if (vm_ops && (vm_ops->page_mkwrite || vm_ops->pfn_mkwrite))
return 1;
/* The open routine did something to the protections that pgprot_modify
@@ -1638,12 +1637,6 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
*/
WARN_ON_ONCE(addr != vma->vm_start);
- /* All file mapping must have ->vm_ops set */
- if (!vma->vm_ops) {
- static const struct vm_operations_struct dummy_ops = {};
- vma->vm_ops = &dummy_ops;
- }
-
addr = vma->vm_start;
vm_flags = vma->vm_flags;
} else if (vm_flags & VM_SHARED) {
diff --git a/mm/readahead.c b/mm/readahead.c
index 60cd846a9a44..24682f6f4cfd 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -89,8 +89,8 @@ int read_cache_pages(struct address_space *mapping, struct list_head *pages,
while (!list_empty(pages)) {
page = list_to_page(pages);
list_del(&page->lru);
- if (add_to_page_cache_lru(page, mapping,
- page->index, GFP_KERNEL)) {
+ if (add_to_page_cache_lru(page, mapping, page->index,
+ GFP_KERNEL & mapping_gfp_mask(mapping))) {
read_cache_pages_invalidate_page(mapping, page);
continue;
}
@@ -127,8 +127,8 @@ static int read_pages(struct address_space *mapping, struct file *filp,
for (page_idx = 0; page_idx < nr_pages; page_idx++) {
struct page *page = list_to_page(pages);
list_del(&page->lru);
- if (!add_to_page_cache_lru(page, mapping,
- page->index, GFP_KERNEL)) {
+ if (!add_to_page_cache_lru(page, mapping, page->index,
+ GFP_KERNEL & mapping_gfp_mask(mapping))) {
mapping->a_ops->readpage(filp, page);
}
page_cache_release(page);
diff --git a/mm/slab.c b/mm/slab.c
index c77ebe6cc87c..4fcc5dd8d5a6 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -2190,9 +2190,16 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
size += BYTES_PER_WORD;
}
#if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC)
- if (size >= kmalloc_size(INDEX_NODE + 1)
- && cachep->object_size > cache_line_size()
- && ALIGN(size, cachep->align) < PAGE_SIZE) {
+ /*
+ * To activate debug pagealloc, off-slab management is necessary
+ * requirement. In early phase of initialization, small sized slab
+ * doesn't get initialized so it would not be possible. So, we need
+ * to check size >= 256. It guarantees that all necessary small
+ * sized slab is initialized in current slab initialization sequence.
+ */
+ if (!slab_early_init && size >= kmalloc_size(INDEX_NODE) &&
+ size >= 256 && cachep->object_size > cache_line_size() &&
+ ALIGN(size, cachep->align) < PAGE_SIZE) {
cachep->obj_offset += PAGE_SIZE - ALIGN(size, cachep->align);
size = PAGE_SIZE;
}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 2d978b28a410..7f63a9381f71 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -175,7 +175,7 @@ static bool sane_reclaim(struct scan_control *sc)
if (!memcg)
return true;
#ifdef CONFIG_CGROUP_WRITEBACK
- if (memcg->css.cgroup)
+ if (cgroup_on_dfl(memcg->css.cgroup))
return true;
#endif
return false;
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 4f5cd974e11a..fbf14485a049 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1363,15 +1363,16 @@ static cpumask_var_t cpu_stat_off;
static void vmstat_update(struct work_struct *w)
{
- if (refresh_cpu_vm_stats())
+ if (refresh_cpu_vm_stats()) {
/*
* Counters were updated so we expect more updates
* to occur in the future. Keep on running the
* update worker thread.
*/
- schedule_delayed_work(this_cpu_ptr(&vmstat_work),
+ schedule_delayed_work_on(smp_processor_id(),
+ this_cpu_ptr(&vmstat_work),
round_jiffies_relative(sysctl_stat_interval));
- else {
+ } else {
/*
* We did not update any counters so the app may be in
* a mode where it does not cause counter updates.