aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/mm/numa.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/mm/numa.c')
-rw-r--r--arch/powerpc/mm/numa.c275
1 files changed, 80 insertions, 195 deletions
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 649666d5d1c2..0257a7d659ef 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -8,6 +8,8 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
+#define pr_fmt(fmt) "numa: " fmt
+
#include <linux/threads.h>
#include <linux/bootmem.h>
#include <linux/init.h>
@@ -132,28 +134,6 @@ static int __init fake_numa_create_new_node(unsigned long end_pfn,
return 0;
}
-/*
- * get_node_active_region - Return active region containing pfn
- * Active range returned is empty if none found.
- * @pfn: The page to return the region for
- * @node_ar: Returned set to the active region containing @pfn
- */
-static void __init get_node_active_region(unsigned long pfn,
- struct node_active_region *node_ar)
-{
- unsigned long start_pfn, end_pfn;
- int i, nid;
-
- for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) {
- if (pfn >= start_pfn && pfn < end_pfn) {
- node_ar->nid = nid;
- node_ar->start_pfn = start_pfn;
- node_ar->end_pfn = end_pfn;
- break;
- }
- }
-}
-
static void reset_numa_cpu_lookup_table(void)
{
unsigned int cpu;
@@ -926,134 +906,48 @@ static void __init dump_numa_memory_topology(void)
}
}
-/*
- * Allocate some memory, satisfying the memblock or bootmem allocator where
- * required. nid is the preferred node and end is the physical address of
- * the highest address in the node.
- *
- * Returns the virtual address of the memory.
- */
-static void __init *careful_zallocation(int nid, unsigned long size,
- unsigned long align,
- unsigned long end_pfn)
-{
- void *ret;
- int new_nid;
- unsigned long ret_paddr;
-
- ret_paddr = __memblock_alloc_base(size, align, end_pfn << PAGE_SHIFT);
-
- /* retry over all memory */
- if (!ret_paddr)
- ret_paddr = __memblock_alloc_base(size, align, memblock_end_of_DRAM());
-
- if (!ret_paddr)
- panic("numa.c: cannot allocate %lu bytes for node %d",
- size, nid);
-
- ret = __va(ret_paddr);
-
- /*
- * We initialize the nodes in numeric order: 0, 1, 2...
- * and hand over control from the MEMBLOCK allocator to the
- * bootmem allocator. If this function is called for
- * node 5, then we know that all nodes <5 are using the
- * bootmem allocator instead of the MEMBLOCK allocator.
- *
- * So, check the nid from which this allocation came
- * and double check to see if we need to use bootmem
- * instead of the MEMBLOCK. We don't free the MEMBLOCK memory
- * since it would be useless.
- */
- new_nid = early_pfn_to_nid(ret_paddr >> PAGE_SHIFT);
- if (new_nid < nid) {
- ret = __alloc_bootmem_node(NODE_DATA(new_nid),
- size, align, 0);
-
- dbg("alloc_bootmem %p %lx\n", ret, size);
- }
-
- memset(ret, 0, size);
- return ret;
-}
-
static struct notifier_block ppc64_numa_nb = {
.notifier_call = cpu_numa_callback,
.priority = 1 /* Must run before sched domains notifier. */
};
-static void __init mark_reserved_regions_for_nid(int nid)
+/* Initialize NODE_DATA for a node on the local memory */
+static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn)
{
- struct pglist_data *node = NODE_DATA(nid);
- struct memblock_region *reg;
-
- for_each_memblock(reserved, reg) {
- unsigned long physbase = reg->base;
- unsigned long size = reg->size;
- unsigned long start_pfn = physbase >> PAGE_SHIFT;
- unsigned long end_pfn = PFN_UP(physbase + size);
- struct node_active_region node_ar;
- unsigned long node_end_pfn = pgdat_end_pfn(node);
-
- /*
- * Check to make sure that this memblock.reserved area is
- * within the bounds of the node that we care about.
- * Checking the nid of the start and end points is not
- * sufficient because the reserved area could span the
- * entire node.
- */
- if (end_pfn <= node->node_start_pfn ||
- start_pfn >= node_end_pfn)
- continue;
-
- get_node_active_region(start_pfn, &node_ar);
- while (start_pfn < end_pfn &&
- node_ar.start_pfn < node_ar.end_pfn) {
- unsigned long reserve_size = size;
- /*
- * if reserved region extends past active region
- * then trim size to active region
- */
- if (end_pfn > node_ar.end_pfn)
- reserve_size = (node_ar.end_pfn << PAGE_SHIFT)
- - physbase;
- /*
- * Only worry about *this* node, others may not
- * yet have valid NODE_DATA().
- */
- if (node_ar.nid == nid) {
- dbg("reserve_bootmem %lx %lx nid=%d\n",
- physbase, reserve_size, node_ar.nid);
- reserve_bootmem_node(NODE_DATA(node_ar.nid),
- physbase, reserve_size,
- BOOTMEM_DEFAULT);
- }
- /*
- * if reserved region is contained in the active region
- * then done.
- */
- if (end_pfn <= node_ar.end_pfn)
- break;
-
- /*
- * reserved region extends past the active region
- * get next active region that contains this
- * reserved region
- */
- start_pfn = node_ar.end_pfn;
- physbase = start_pfn << PAGE_SHIFT;
- size = size - reserve_size;
- get_node_active_region(start_pfn, &node_ar);
- }
- }
+ u64 spanned_pages = end_pfn - start_pfn;
+ const size_t nd_size = roundup(sizeof(pg_data_t), SMP_CACHE_BYTES);
+ u64 nd_pa;
+ void *nd;
+ int tnid;
+
+ if (spanned_pages)
+ pr_info("Initmem setup node %d [mem %#010Lx-%#010Lx]\n",
+ nid, start_pfn << PAGE_SHIFT,
+ (end_pfn << PAGE_SHIFT) - 1);
+ else
+ pr_info("Initmem setup node %d\n", nid);
+
+ nd_pa = memblock_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid);
+ nd = __va(nd_pa);
+
+ /* report and initialize */
+ pr_info(" NODE_DATA [mem %#010Lx-%#010Lx]\n",
+ nd_pa, nd_pa + nd_size - 1);
+ tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT);
+ if (tnid != nid)
+ pr_info(" NODE_DATA(%d) on node %d\n", nid, tnid);
+
+ node_data[nid] = nd;
+ memset(NODE_DATA(nid), 0, sizeof(pg_data_t));
+ NODE_DATA(nid)->node_id = nid;
+ NODE_DATA(nid)->node_start_pfn = start_pfn;
+ NODE_DATA(nid)->node_spanned_pages = spanned_pages;
}
-
-void __init do_init_bootmem(void)
+void __init initmem_init(void)
{
int nid, cpu;
- min_low_pfn = 0;
max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
max_pfn = max_low_pfn;
@@ -1062,64 +956,18 @@ void __init do_init_bootmem(void)
else
dump_numa_memory_topology();
+ memblock_dump_all();
+
for_each_online_node(nid) {
unsigned long start_pfn, end_pfn;
- void *bootmem_vaddr;
- unsigned long bootmap_pages;
get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
-
- /*
- * Allocate the node structure node local if possible
- *
- * Be careful moving this around, as it relies on all
- * previous nodes' bootmem to be initialized and have
- * all reserved areas marked.
- */
- NODE_DATA(nid) = careful_zallocation(nid,
- sizeof(struct pglist_data),
- SMP_CACHE_BYTES, end_pfn);
-
- dbg("node %d\n", nid);
- dbg("NODE_DATA() = %p\n", NODE_DATA(nid));
-
- NODE_DATA(nid)->bdata = &bootmem_node_data[nid];
- NODE_DATA(nid)->node_start_pfn = start_pfn;
- NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn;
-
- if (NODE_DATA(nid)->node_spanned_pages == 0)
- continue;
-
- dbg("start_paddr = %lx\n", start_pfn << PAGE_SHIFT);
- dbg("end_paddr = %lx\n", end_pfn << PAGE_SHIFT);
-
- bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
- bootmem_vaddr = careful_zallocation(nid,
- bootmap_pages << PAGE_SHIFT,
- PAGE_SIZE, end_pfn);
-
- dbg("bootmap_vaddr = %p\n", bootmem_vaddr);
-
- init_bootmem_node(NODE_DATA(nid),
- __pa(bootmem_vaddr) >> PAGE_SHIFT,
- start_pfn, end_pfn);
-
- free_bootmem_with_active_regions(nid, end_pfn);
- /*
- * Be very careful about moving this around. Future
- * calls to careful_zallocation() depend on this getting
- * done correctly.
- */
- mark_reserved_regions_for_nid(nid);
+ setup_node_data(nid, start_pfn, end_pfn);
sparse_memory_present_with_active_regions(nid);
}
- init_bootmem_done = 1;
+ sparse_init();
- /*
- * Now bootmem is initialised we can create the node to cpumask
- * lookup tables and setup the cpu callback to populate them.
- */
setup_node_to_cpumask_map();
reset_numa_cpu_lookup_table();
@@ -1153,6 +1001,22 @@ static int __init early_numa(char *p)
}
early_param("numa", early_numa);
+static bool topology_updates_enabled = true;
+
+static int __init early_topology_updates(char *p)
+{
+ if (!p)
+ return 0;
+
+ if (!strcmp(p, "off")) {
+ pr_info("Disabling topology updates\n");
+ topology_updates_enabled = false;
+ }
+
+ return 0;
+}
+early_param("topology_updates", early_topology_updates);
+
#ifdef CONFIG_MEMORY_HOTPLUG
/*
* Find the node associated with a hot added memory section for
@@ -1442,8 +1306,11 @@ static long hcall_vphn(unsigned long cpu, __be32 *associativity)
long retbuf[PLPAR_HCALL9_BUFSIZE] = {0};
u64 flags = 1;
int hwcpu = get_hard_smp_processor_id(cpu);
+ int i;
rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, retbuf, flags, hwcpu);
+ for (i = 0; i < 6; i++)
+ retbuf[i] = cpu_to_be64(retbuf[i]);
vphn_unpack_associativity(retbuf, associativity);
return rc;
@@ -1488,11 +1355,14 @@ static int update_cpu_topology(void *data)
cpu = smp_processor_id();
for (update = data; update; update = update->next) {
+ int new_nid = update->new_nid;
if (cpu != update->cpu)
continue;
- unmap_cpu_from_node(update->cpu);
- map_cpu_to_node(update->cpu, update->new_nid);
+ unmap_cpu_from_node(cpu);
+ map_cpu_to_node(cpu, new_nid);
+ set_cpu_numa_node(cpu, new_nid);
+ set_cpu_numa_mem(cpu, local_memory_node(new_nid));
vdso_getcpu_init();
}
@@ -1539,6 +1409,9 @@ int arch_update_cpu_topology(void)
struct device *dev;
int weight, new_nid, i = 0;
+ if (!prrn_enabled && !vphn_enabled)
+ return 0;
+
weight = cpumask_weight(&cpu_associativity_changes_mask);
if (!weight)
return 0;
@@ -1592,6 +1465,15 @@ int arch_update_cpu_topology(void)
cpu = cpu_last_thread_sibling(cpu);
}
+ pr_debug("Topology update for the following CPUs:\n");
+ if (cpumask_weight(&updated_cpus)) {
+ for (ud = &updates[0]; ud; ud = ud->next) {
+ pr_debug("cpu %d moving from node %d "
+ "to %d\n", ud->cpu,
+ ud->old_nid, ud->new_nid);
+ }
+ }
+
/*
* In cases where we have nothing to update (because the updates list
* is too short or because the new topology is same as the old one),
@@ -1675,12 +1557,11 @@ static void stage_topology_update(int core_id)
static int dt_update_callback(struct notifier_block *nb,
unsigned long action, void *data)
{
- struct of_prop_reconfig *update;
+ struct of_reconfig_data *update = data;
int rc = NOTIFY_DONE;
switch (action) {
case OF_RECONFIG_UPDATE_PROPERTY:
- update = (struct of_prop_reconfig *)data;
if (!of_prop_cmp(update->dn->type, "cpu") &&
!of_prop_cmp(update->prop->name, "ibm,associativity")) {
u32 core_id;
@@ -1800,8 +1681,12 @@ static const struct file_operations topology_ops = {
static int topology_update_init(void)
{
- start_topology_update();
- proc_create("powerpc/topology_updates", 0644, NULL, &topology_ops);
+ /* Do not poll for changes if disabled at boot */
+ if (topology_updates_enabled)
+ start_topology_update();
+
+ if (!proc_create("powerpc/topology_updates", 0644, NULL, &topology_ops))
+ return -ENOMEM;
return 0;
}