aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/stable/sysfs-devices-node87
-rw-r--r--Documentation/admin-guide/mm/numaperf.rst169
-rw-r--r--Documentation/filesystems/debugfs.txt16
-rw-r--r--arch/arm64/kernel/acpi_numa.c2
-rw-r--r--arch/arm64/kernel/smp.c4
-rw-r--r--arch/ia64/kernel/acpi.c14
-rw-r--r--arch/x86/kernel/acpi/boot.c36
-rw-r--r--block/blk-integrity.c3
-rw-r--r--block/blk-mq-sysfs.c8
-rw-r--r--block/blk-sysfs.c3
-rw-r--r--drivers/acpi/Kconfig1
-rw-r--r--drivers/acpi/Makefile1
-rw-r--r--drivers/acpi/hmat/Kconfig11
-rw-r--r--drivers/acpi/hmat/Makefile1
-rw-r--r--drivers/acpi/hmat/hmat.c666
-rw-r--r--drivers/acpi/numa.c16
-rw-r--r--drivers/acpi/scan.c4
-rw-r--r--drivers/acpi/tables.c76
-rw-r--r--drivers/base/Kconfig9
-rw-r--r--drivers/base/arch_topology.c36
-rw-r--r--drivers/base/core.c5
-rw-r--r--drivers/base/dd.c5
-rw-r--r--drivers/base/firmware_loader/Kconfig1
-rw-r--r--drivers/base/firmware_loader/builtin/.gitignore1
-rw-r--r--drivers/base/firmware_loader/fallback.c6
-rw-r--r--drivers/base/node.c352
-rw-r--r--drivers/base/platform.c12
-rw-r--r--drivers/base/power/clock_ops.c3
-rw-r--r--drivers/base/power/common.c4
-rw-r--r--drivers/base/power/domain.c4
-rw-r--r--drivers/base/power/domain_governor.c4
-rw-r--r--drivers/base/power/generic_ops.c4
-rw-r--r--drivers/base/power/main.c4
-rw-r--r--drivers/base/power/qos.c6
-rw-r--r--drivers/base/power/runtime.c4
-rw-r--r--drivers/base/power/sysfs.c6
-rw-r--r--drivers/base/power/trace.c2
-rw-r--r--drivers/base/power/wakeirq.c15
-rw-r--r--drivers/base/power/wakeup.c4
-rw-r--r--drivers/base/test/Makefile1
-rw-r--r--drivers/irqchip/irq-gic-v2m.c2
-rw-r--r--drivers/irqchip/irq-gic-v3-its-pci-msi.c2
-rw-r--r--drivers/irqchip/irq-gic-v3-its-platform-msi.c2
-rw-r--r--drivers/irqchip/irq-gic-v3-its.c6
-rw-r--r--drivers/irqchip/irq-gic-v3.c10
-rw-r--r--drivers/irqchip/irq-gic.c4
-rw-r--r--drivers/mailbox/pcc.c2
-rw-r--r--fs/debugfs/file.c77
-rw-r--r--fs/kernfs/dir.c5
-rw-r--r--include/acpi/actbl1.h2
-rw-r--r--include/linux/acpi.h6
-rw-r--r--include/linux/device.h16
-rw-r--r--include/linux/kernfs.h2
-rw-r--r--include/linux/kobject.h3
-rw-r--r--include/linux/node.h71
-rw-r--r--init/Kconfig11
-rw-r--r--kernel/.gitignore1
-rw-r--r--kernel/Makefile10
-rwxr-xr-xkernel/gen_ikh_data.sh89
-rw-r--r--kernel/irq/irqdesc.c3
-rw-r--r--kernel/kheaders.c74
-rw-r--r--kernel/livepatch/core.c3
-rw-r--r--kernel/padata.c3
-rw-r--r--kernel/sched/cpufreq_schedutil.c5
-rw-r--r--lib/kobject.c93
-rw-r--r--lib/kobject_uevent.c11
-rw-r--r--net/core/net-sysfs.c6
-rw-r--r--samples/kobject/kset-example.c3
68 files changed, 1854 insertions, 274 deletions
diff --git a/Documentation/ABI/stable/sysfs-devices-node b/Documentation/ABI/stable/sysfs-devices-node
index 3e90e1f3bf0a..f7ce68fbd4b9 100644
--- a/Documentation/ABI/stable/sysfs-devices-node
+++ b/Documentation/ABI/stable/sysfs-devices-node
@@ -90,4 +90,89 @@ Date: December 2009
Contact: Lee Schermerhorn <lee.schermerhorn@hp.com>
Description:
The node's huge page size control/query attributes.
- See Documentation/admin-guide/mm/hugetlbpage.rst \ No newline at end of file
+ See Documentation/admin-guide/mm/hugetlbpage.rst
+
+What: /sys/devices/system/node/nodeX/accessY/
+Date: December 2018
+Contact: Keith Busch <keith.busch@intel.com>
+Description:
+ The node's relationship to other nodes for access class "Y".
+
+What: /sys/devices/system/node/nodeX/accessY/initiators/
+Date: December 2018
+Contact: Keith Busch <keith.busch@intel.com>
+Description:
+ The directory containing symlinks to memory initiator
+ nodes that have class "Y" access to this target node's
+ memory. CPUs and other memory initiators in nodes not in
+ the list accessing this node's memory may have different
+ performance.
+
+What: /sys/devices/system/node/nodeX/accessY/targets/
+Date: December 2018
+Contact: Keith Busch <keith.busch@intel.com>
+Description:
+ The directory containing symlinks to memory targets that
+ this initiator node has class "Y" access.
+
+What: /sys/devices/system/node/nodeX/accessY/initiators/read_bandwidth
+Date: December 2018
+Contact: Keith Busch <keith.busch@intel.com>
+Description:
+ This node's read bandwidth in MB/s when accessed from
+ nodes found in this access class's linked initiators.
+
+What: /sys/devices/system/node/nodeX/accessY/initiators/read_latency
+Date: December 2018
+Contact: Keith Busch <keith.busch@intel.com>
+Description:
+ This node's read latency in nanoseconds when accessed
+ from nodes found in this access class's linked initiators.
+
+What: /sys/devices/system/node/nodeX/accessY/initiators/write_bandwidth
+Date: December 2018
+Contact: Keith Busch <keith.busch@intel.com>
+Description:
+ This node's write bandwidth in MB/s when accessed from
+ found in this access class's linked initiators.
+
+What: /sys/devices/system/node/nodeX/accessY/initiators/write_latency
+Date: December 2018
+Contact: Keith Busch <keith.busch@intel.com>
+Description:
+ This node's write latency in nanoseconds when access
+ from nodes found in this class's linked initiators.
+
+What: /sys/devices/system/node/nodeX/memory_side_cache/indexY/
+Date: December 2018
+Contact: Keith Busch <keith.busch@intel.com>
+Description:
+ The directory containing attributes for the memory-side cache
+ level 'Y'.
+
+What: /sys/devices/system/node/nodeX/memory_side_cache/indexY/indexing
+Date: December 2018
+Contact: Keith Busch <keith.busch@intel.com>
+Description:
+ The caches associativity indexing: 0 for direct mapped,
+ non-zero if indexed.
+
+What: /sys/devices/system/node/nodeX/memory_side_cache/indexY/line_size
+Date: December 2018
+Contact: Keith Busch <keith.busch@intel.com>
+Description:
+ The number of bytes accessed from the next cache level on a
+ cache miss.
+
+What: /sys/devices/system/node/nodeX/memory_side_cache/indexY/size
+Date: December 2018
+Contact: Keith Busch <keith.busch@intel.com>
+Description:
+ The size of this memory side cache in bytes.
+
+What: /sys/devices/system/node/nodeX/memory_side_cache/indexY/write_policy
+Date: December 2018
+Contact: Keith Busch <keith.busch@intel.com>
+Description:
+ The cache write policy: 0 for write-back, 1 for write-through,
+ other or unknown.
diff --git a/Documentation/admin-guide/mm/numaperf.rst b/Documentation/admin-guide/mm/numaperf.rst
new file mode 100644
index 000000000000..b79f70c04397
--- /dev/null
+++ b/Documentation/admin-guide/mm/numaperf.rst
@@ -0,0 +1,169 @@
+.. _numaperf:
+
+=============
+NUMA Locality
+=============
+
+Some platforms may have multiple types of memory attached to a compute
+node. These disparate memory ranges may share some characteristics, such
+as CPU cache coherence, but may have different performance. For example,
+different media types and buses affect bandwidth and latency.
+
+A system supports such heterogeneous memory by grouping each memory type
+under different domains, or "nodes", based on locality and performance
+characteristics. Some memory may share the same node as a CPU, and others
+are provided as memory only nodes. While memory only nodes do not provide
+CPUs, they may still be local to one or more compute nodes relative to
+other nodes. The following diagram shows one such example of two compute
+nodes with local memory and a memory only node for each of compute node:
+
+ +------------------+ +------------------+
+ | Compute Node 0 +-----+ Compute Node 1 |
+ | Local Node0 Mem | | Local Node1 Mem |
+ +--------+---------+ +--------+---------+
+ | |
+ +--------+---------+ +--------+---------+
+ | Slower Node2 Mem | | Slower Node3 Mem |
+ +------------------+ +--------+---------+
+
+A "memory initiator" is a node containing one or more devices such as
+CPUs or separate memory I/O devices that can initiate memory requests.
+A "memory target" is a node containing one or more physical address
+ranges accessible from one or more memory initiators.
+
+When multiple memory initiators exist, they may not all have the same
+performance when accessing a given memory target. Each initiator-target
+pair may be organized into different ranked access classes to represent
+this relationship. The highest performing initiator to a given target
+is considered to be one of that target's local initiators, and given
+the highest access class, 0. Any given target may have one or more
+local initiators, and any given initiator may have multiple local
+memory targets.
+
+To aid applications matching memory targets with their initiators, the
+kernel provides symlinks to each other. The following example lists the
+relationship for the access class "0" memory initiators and targets::
+
+ # symlinks -v /sys/devices/system/node/nodeX/access0/targets/
+ relative: /sys/devices/system/node/nodeX/access0/targets/nodeY -> ../../nodeY
+
+ # symlinks -v /sys/devices/system/node/nodeY/access0/initiators/
+ relative: /sys/devices/system/node/nodeY/access0/initiators/nodeX -> ../../nodeX
+
+A memory initiator may have multiple memory targets in the same access
+class. The target memory's initiators in a given class indicate the
+nodes' access characteristics share the same performance relative to other
+linked initiator nodes. Each target within an initiator's access class,
+though, do not necessarily perform the same as each other.
+
+================
+NUMA Performance
+================
+
+Applications may wish to consider which node they want their memory to
+be allocated from based on the node's performance characteristics. If
+the system provides these attributes, the kernel exports them under the
+node sysfs hierarchy by appending the attributes directory under the
+memory node's access class 0 initiators as follows::
+
+ /sys/devices/system/node/nodeY/access0/initiators/
+
+These attributes apply only when accessed from nodes that have the
+are linked under the this access's inititiators.
+
+The performance characteristics the kernel provides for the local initiators
+are exported are as follows::
+
+ # tree -P "read*|write*" /sys/devices/system/node/nodeY/access0/initiators/
+ /sys/devices/system/node/nodeY/access0/initiators/
+ |-- read_bandwidth
+ |-- read_latency
+ |-- write_bandwidth
+ `-- write_latency
+
+The bandwidth attributes are provided in MiB/second.
+
+The latency attributes are provided in nanoseconds.
+
+The values reported here correspond to the rated latency and bandwidth
+for the platform.
+
+==========
+NUMA Cache
+==========
+
+System memory may be constructed in a hierarchy of elements with various
+performance characteristics in order to provide large address space of
+slower performing memory cached by a smaller higher performing memory. The
+system physical addresses memory initiators are aware of are provided
+by the last memory level in the hierarchy. The system meanwhile uses
+higher performing memory to transparently cache access to progressively
+slower levels.
+
+The term "far memory" is used to denote the last level memory in the
+hierarchy. Each increasing cache level provides higher performing
+initiator access, and the term "near memory" represents the fastest
+cache provided by the system.
+
+This numbering is different than CPU caches where the cache level (ex:
+L1, L2, L3) uses the CPU-side view where each increased level is lower
+performing. In contrast, the memory cache level is centric to the last
+level memory, so the higher numbered cache level corresponds to memory
+nearer to the CPU, and further from far memory.
+
+The memory-side caches are not directly addressable by software. When
+software accesses a system address, the system will return it from the
+near memory cache if it is present. If it is not present, the system
+accesses the next level of memory until there is either a hit in that
+cache level, or it reaches far memory.
+
+An application does not need to know about caching attributes in order
+to use the system. Software may optionally query the memory cache
+attributes in order to maximize the performance out of such a setup.
+If the system provides a way for the kernel to discover this information,
+for example with ACPI HMAT (Heterogeneous Memory Attribute Table),
+the kernel will append these attributes to the NUMA node memory target.
+
+When the kernel first registers a memory cache with a node, the kernel
+will create the following directory::
+
+ /sys/devices/system/node/nodeX/memory_side_cache/
+
+If that directory is not present, the system either does not not provide
+a memory-side cache, or that information is not accessible to the kernel.
+
+The attributes for each level of cache is provided under its cache
+level index::
+
+ /sys/devices/system/node/nodeX/memory_side_cache/indexA/
+ /sys/devices/system/node/nodeX/memory_side_cache/indexB/
+ /sys/devices/system/node/nodeX/memory_side_cache/indexC/
+
+Each cache level's directory provides its attributes. For example, the
+following shows a single cache level and the attributes available for
+software to query::
+
+ # tree sys/devices/system/node/node0/memory_side_cache/
+ /sys/devices/system/node/node0/memory_side_cache/
+ |-- index1
+ | |-- indexing
+ | |-- line_size
+ | |-- size
+ | `-- write_policy
+
+The "indexing" will be 0 if it is a direct-mapped cache, and non-zero
+for any other indexed based, multi-way associativity.
+
+The "line_size" is the number of bytes accessed from the next cache
+level on a miss.
+
+The "size" is the number of bytes provided by this cache level.
+
+The "write_policy" will be 0 for write-back, and non-zero for
+write-through caching.
+
+========
+See Also
+========
+.. [1] https://www.uefi.org/sites/default/files/resources/ACPI_6_2.pdf
+ Section 5.2.27
diff --git a/Documentation/filesystems/debugfs.txt b/Documentation/filesystems/debugfs.txt
index 4f45f71149cb..4a0a9c3f4af6 100644
--- a/Documentation/filesystems/debugfs.txt
+++ b/Documentation/filesystems/debugfs.txt
@@ -31,10 +31,10 @@ This call, if successful, will make a directory called name underneath the
indicated parent directory. If parent is NULL, the directory will be
created in the debugfs root. On success, the return value is a struct
dentry pointer which can be used to create files in the directory (and to
-clean it up at the end). A NULL return value indicates that something went
-wrong. If ERR_PTR(-ENODEV) is returned, that is an indication that the
-kernel has been built without debugfs support and none of the functions
-described below will work.
+clean it up at the end). An ERR_PTR(-ERROR) return value indicates that
+something went wrong. If ERR_PTR(-ENODEV) is returned, that is an
+indication that the kernel has been built without debugfs support and none
+of the functions described below will work.
The most general way to create a file within a debugfs directory is with:
@@ -48,8 +48,9 @@ should hold the file, data will be stored in the i_private field of the
resulting inode structure, and fops is a set of file operations which
implement the file's behavior. At a minimum, the read() and/or write()
operations should be provided; others can be included as needed. Again,
-the return value will be a dentry pointer to the created file, NULL for
-error, or ERR_PTR(-ENODEV) if debugfs support is missing.
+the return value will be a dentry pointer to the created file,
+ERR_PTR(-ERROR) on error, or ERR_PTR(-ENODEV) if debugfs support is
+missing.
Create a file with an initial size, the following function can be used
instead:
@@ -214,7 +215,8 @@ can be removed with:
void debugfs_remove(struct dentry *dentry);
-The dentry value can be NULL, in which case nothing will be removed.
+The dentry value can be NULL or an error value, in which case nothing will
+be removed.
Once upon a time, debugfs users were required to remember the dentry
pointer for every debugfs file they created so that all files could be
diff --git a/arch/arm64/kernel/acpi_numa.c b/arch/arm64/kernel/acpi_numa.c
index eac1d0cc595c..7ff800045434 100644
--- a/arch/arm64/kernel/acpi_numa.c
+++ b/arch/arm64/kernel/acpi_numa.c
@@ -45,7 +45,7 @@ static inline int get_cpu_for_acpi_id(u32 uid)
return -EINVAL;
}
-static int __init acpi_parse_gicc_pxm(struct acpi_subtable_header *header,
+static int __init acpi_parse_gicc_pxm(union acpi_subtable_headers *header,
const unsigned long end)
{
struct acpi_srat_gicc_affinity *pa;
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 824de7038967..bb4b3f07761a 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -586,7 +586,7 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor)
}
static int __init
-acpi_parse_gic_cpu_interface(struct acpi_subtable_header *header,
+acpi_parse_gic_cpu_interface(union acpi_subtable_headers *header,
const unsigned long end)
{
struct acpi_madt_generic_interrupt *processor;
@@ -595,7 +595,7 @@ acpi_parse_gic_cpu_interface(struct acpi_subtable_header *header,
if (BAD_MADT_GICC_ENTRY(processor, end))
return -EINVAL;
- acpi_table_print_madt_entry(header);
+ acpi_table_print_madt_entry(&header->common);
acpi_map_gic_cpu_interface(processor);
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index 41eb281709da..1435e7a1a8cd 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -177,7 +177,7 @@ struct acpi_table_madt *acpi_madt __initdata;
static u8 has_8259;
static int __init
-acpi_parse_lapic_addr_ovr(struct acpi_subtable_header * header,
+acpi_parse_lapic_addr_ovr(union acpi_subtable_headers * header,
const unsigned long end)
{
struct acpi_madt_local_apic_override *lapic;
@@ -195,7 +195,7 @@ acpi_parse_lapic_addr_ovr(struct acpi_subtable_header * header,
}
static int __init
-acpi_parse_lsapic(struct acpi_subtable_header * header, const unsigned long end)
+acpi_parse_lsapic(union acpi_subtable_headers *header, const unsigned long end)
{
struct acpi_madt_local_sapic *lsapic;
@@ -216,7 +216,7 @@ acpi_parse_lsapic(struct acpi_subtable_header * header, const unsigned long end)
}
static int __init
-acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long end)
+acpi_parse_lapic_nmi(union acpi_subtable_headers * header, const unsigned long end)
{
struct acpi_madt_local_apic_nmi *lacpi_nmi;
@@ -230,7 +230,7 @@ acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long e
}
static int __init
-acpi_parse_iosapic(struct acpi_subtable_header * header, const unsigned long end)
+acpi_parse_iosapic(union acpi_subtable_headers * header, const unsigned long end)
{
struct acpi_madt_io_sapic *iosapic;
@@ -245,7 +245,7 @@ acpi_parse_iosapic(struct acpi_subtable_header * header, const unsigned long end
static unsigned int __initdata acpi_madt_rev;
static int __init
-acpi_parse_plat_int_src(struct acpi_subtable_header * header,
+acpi_parse_plat_int_src(union acpi_subtable_headers * header,
const unsigned long end)
{
struct acpi_madt_interrupt_source *plintsrc;
@@ -329,7 +329,7 @@ unsigned int get_cpei_target_cpu(void)
}
static int __init
-acpi_parse_int_src_ovr(struct acpi_subtable_header * header,
+acpi_parse_int_src_ovr(union acpi_subtable_headers * header,
const unsigned long end)
{
struct acpi_madt_interrupt_override *p;
@@ -350,7 +350,7 @@ acpi_parse_int_src_ovr(struct acpi_subtable_header * header,
}
static int __init
-acpi_parse_nmi_src(struct acpi_subtable_header * header, const unsigned long end)
+acpi_parse_nmi_src(union acpi_subtable_headers * header, const unsigned long end)
{
struct acpi_madt_nmi_source *nmi_src;
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 8dcbf6890714..9fc92e4539d8 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -197,7 +197,7 @@ static int acpi_register_lapic(int id, u32 acpiid, u8 enabled)
}
static int __init
-acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end)
+acpi_parse_x2apic(union acpi_subtable_headers *header, const unsigned long end)
{
struct acpi_madt_local_x2apic *processor = NULL;
#ifdef CONFIG_X86_X2APIC
@@ -210,7 +210,7 @@ acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end)
if (BAD_MADT_ENTRY(processor, end))
return -EINVAL;
- acpi_table_print_madt_entry(header);
+ acpi_table_print_madt_entry(&header->common);
#ifdef CONFIG_X86_X2APIC
apic_id = processor->local_apic_id;
@@ -242,7 +242,7 @@ acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end)
}
static int __init
-acpi_parse_lapic(struct acpi_subtable_header * header, const unsigned long end)
+acpi_parse_lapic(union acpi_subtable_headers * header, const unsigned long end)
{
struct acpi_madt_local_apic *processor = NULL;
@@ -251,7 +251,7 @@ acpi_parse_lapic(struct acpi_subtable_header * header, const unsigned long end)
if (BAD_MADT_ENTRY(processor, end))
return -EINVAL;
- acpi_table_print_madt_entry(header);
+ acpi_table_print_madt_entry(&header->common);
/* Ignore invalid ID */
if (processor->id == 0xff)
@@ -272,7 +272,7 @@ acpi_parse_lapic(struct acpi_subtable_header * header, const unsigned long end)
}
static int __init
-acpi_parse_sapic(struct acpi_subtable_header *header, const unsigned long end)
+acpi_parse_sapic(union acpi_subtable_headers *header, const unsigned long end)
{
struct acpi_madt_local_sapic *processor = NULL;
@@ -281,7 +281,7 @@ acpi_parse_sapic(struct acpi_subtable_header *header, const unsigned long end)
if (BAD_MADT_ENTRY(processor, end))
return -EINVAL;
- acpi_table_print_madt_entry(header);
+ acpi_table_print_madt_entry(&header->common);
acpi_register_lapic((processor->id << 8) | processor->eid,/* APIC ID */
processor->processor_id, /* ACPI ID */
@@ -291,7 +291,7 @@ acpi_parse_sapic(struct acpi_subtable_header *header, const unsigned long end)
}
static int __init
-acpi_parse_lapic_addr_ovr(struct acpi_subtable_header * header,
+acpi_parse_lapic_addr_ovr(union acpi_subtable_headers * header,
const unsigned long end)
{
struct acpi_madt_local_apic_override *lapic_addr_ovr = NULL;
@@ -301,7 +301,7 @@ acpi_parse_lapic_addr_ovr(struct acpi_subtable_header * header,
if (BAD_MADT_ENTRY(lapic_addr_ovr, end))
return -EINVAL;
- acpi_table_print_madt_entry(header);
+ acpi_table_print_madt_entry(&header->common);
acpi_lapic_addr = lapic_addr_ovr->address;
@@ -309,7 +309,7 @@ acpi_parse_lapic_addr_ovr(struct acpi_subtable_header * header,
}
static int __init
-acpi_parse_x2apic_nmi(struct acpi_subtable_header *header,
+acpi_parse_x2apic_nmi(union acpi_subtable_headers *header,
const unsigned long end)
{
struct acpi_madt_local_x2apic_nmi *x2apic_nmi = NULL;
@@ -319,7 +319,7 @@ acpi_parse_x2apic_nmi(struct acpi_subtable_header *header,
if (BAD_MADT_ENTRY(x2apic_nmi, end))
return -EINVAL;
- acpi_table_print_madt_entry(header);
+ acpi_table_print_madt_entry(&header->common);
if (x2apic_nmi->lint != 1)
printk(KERN_WARNING PREFIX "NMI not connected to LINT 1!\n");
@@ -328,7 +328,7 @@ acpi_parse_x2apic_nmi(struct acpi_subtable_header *header,
}
static int __init
-acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long end)
+acpi_parse_lapic_nmi(union acpi_subtable_headers * header, const unsigned long end)
{
struct acpi_madt_local_apic_nmi *lapic_nmi = NULL;
@@ -337,7 +337,7 @@ acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long e
if (BAD_MADT_ENTRY(lapic_nmi, end))
return -EINVAL;
- acpi_table_print_madt_entry(header);
+ acpi_table_print_madt_entry(&header->common);
if (lapic_nmi->lint != 1)
printk(KERN_WARNING PREFIX "NMI not connected to LINT 1!\n");
@@ -449,7 +449,7 @@ static int __init mp_register_ioapic_irq(u8 bus_irq, u8 polarity,
}
static int __init
-acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end)
+acpi_parse_ioapic(union acpi_subtable_headers * header, const unsigned long end)
{
struct acpi_madt_io_apic *ioapic = NULL;
struct ioapic_domain_cfg cfg = {
@@ -462,7 +462,7 @@ acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end)
if (BAD_MADT_ENTRY(ioapic, end))
return -EINVAL;
- acpi_table_print_madt_entry(header);
+ acpi_table_print_madt_entry(&header->common);
/* Statically assign IRQ numbers for IOAPICs hosting legacy IRQs */
if (ioapic->global_irq_base < nr_legacy_irqs())
@@ -508,7 +508,7 @@ static void __init acpi_sci_ioapic_setup(u8 bus_irq, u16 polarity, u16 trigger,
}
static int __init
-acpi_parse_int_src_ovr(struct acpi_subtable_header * header,
+acpi_parse_int_src_ovr(union acpi_subtable_headers * header,
const unsigned long end)
{
struct acpi_madt_interrupt_override *intsrc = NULL;
@@ -518,7 +518,7 @@ acpi_parse_int_src_ovr(struct acpi_subtable_header * header,
if (BAD_MADT_ENTRY(intsrc, end))
return -EINVAL;
- acpi_table_print_madt_entry(header);
+ acpi_table_print_madt_entry(&header->common);
if (intsrc->source_irq == acpi_gbl_FADT.sci_interrupt) {
acpi_sci_ioapic_setup(intsrc->source_irq,
@@ -550,7 +550,7 @@ acpi_parse_int_src_ovr(struct acpi_subtable_header * header,
}
static int __init
-acpi_parse_nmi_src(struct acpi_subtable_header * header, const unsigned long end)
+acpi_parse_nmi_src(union acpi_subtable_headers * header, const unsigned long end)
{
struct acpi_madt_nmi_source *nmi_src = NULL;
@@ -559,7 +559,7 @@ acpi_parse_nmi_src(struct acpi_subtable_header * header, const unsigned long end
if (BAD_MADT_ENTRY(nmi_src, end))
return -EINVAL;
- acpi_table_print_madt_entry(header);
+ acpi_table_print_madt_entry(&header->common);
/* TBD: Support nimsrc entries? */
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index d1ab089e0919..85864c71e858 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -365,6 +365,7 @@ static struct attribute *integrity_attrs[] = {
&integrity_device_entry.attr,
NULL,
};
+ATTRIBUTE_GROUPS(integrity);
static const struct sysfs_ops integrity_ops = {
.show = &integrity_attr_show,
@@ -372,7 +373,7 @@ static const struct sysfs_ops integrity_ops = {
};
static struct kobj_type integrity_ktype = {
- .default_attrs = integrity_attrs,
+ .default_groups = integrity_groups,
.sysfs_ops = &integrity_ops,
};
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index 3f9c3f4ac44c..5315e538b3b1 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -173,10 +173,6 @@ static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page)
return ret;
}
-static struct attribute *default_ctx_attrs[] = {
- NULL,
-};
-
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_nr_tags = {
.attr = {.name = "nr_tags", .mode = 0444 },
.show = blk_mq_hw_sysfs_nr_tags_show,
@@ -196,6 +192,7 @@ static struct attribute *default_hw_ctx_attrs[] = {
&blk_mq_hw_sysfs_cpus.attr,
NULL,
};
+ATTRIBUTE_GROUPS(default_hw_ctx);
static const struct sysfs_ops blk_mq_sysfs_ops = {
.show = blk_mq_sysfs_show,
@@ -214,13 +211,12 @@ static struct kobj_type blk_mq_ktype = {
static struct kobj_type blk_mq_ctx_ktype = {
.sysfs_ops = &blk_mq_sysfs_ops,
- .default_attrs = default_ctx_attrs,
.release = blk_mq_ctx_sysfs_release,
};
static struct kobj_type blk_mq_hw_ktype = {
.sysfs_ops = &blk_mq_hw_sysfs_ops,
- .default_attrs = default_hw_ctx_attrs,
+ .default_groups = default_hw_ctx_groups,
.release = blk_mq_hw_sysfs_release,
};
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 422327089e0f..7a95a1eb27e1 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -769,6 +769,7 @@ static struct attribute *default_attrs[] = {
#endif
NULL,
};
+ATTRIBUTE_GROUPS(default);
#define to_queue(atr) container_of((atr), struct queue_sysfs_entry, attr)
@@ -890,7 +891,7 @@ static const struct sysfs_ops queue_sysfs_ops = {
struct kobj_type blk_queue_ktype = {
.sysfs_ops = &queue_sysfs_ops,
- .default_attrs = default_attrs,
+ .default_groups = default_groups,
.release = blk_release_queue,
};
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index 4e015c77e48e..283ee94224c6 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -475,6 +475,7 @@ config ACPI_REDUCED_HARDWARE_ONLY
If you are unsure what to do, do not enable this option.
source "drivers/acpi/nfit/Kconfig"
+source "drivers/acpi/hmat/Kconfig"
source "drivers/acpi/apei/Kconfig"
source "drivers/acpi/dptf/Kconfig"
diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index bb857421c2e8..5d361e4e3405 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -80,6 +80,7 @@ obj-$(CONFIG_ACPI_PROCESSOR) += processor.o
obj-$(CONFIG_ACPI) += container.o
obj-$(CONFIG_ACPI_THERMAL) += thermal.o
obj-$(CONFIG_ACPI_NFIT) += nfit/
+obj-$(CONFIG_ACPI_HMAT) += hmat/
obj-$(CONFIG_ACPI) += acpi_memhotplug.o
obj-$(CONFIG_ACPI_HOTPLUG_IOAPIC) += ioapic.o
obj-$(CONFIG_ACPI_BATTERY) += battery.o
diff --git a/drivers/acpi/hmat/Kconfig b/drivers/acpi/hmat/Kconfig
new file mode 100644
index 000000000000..95a29964dbea
--- /dev/null
+++ b/drivers/acpi/hmat/Kconfig
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
+config ACPI_HMAT
+ bool "ACPI Heterogeneous Memory Attribute Table Support"
+ depends on ACPI_NUMA
+ select HMEM_REPORTING
+ help
+ If set, this option has the kernel parse and report the
+ platform's ACPI HMAT (Heterogeneous Memory Attributes Table),
+ register memory initiators with their targets, and export
+ performance attributes through the node's sysfs device if
+ provided.
diff --git a/drivers/acpi/hmat/Makefile b/drivers/acpi/hmat/Makefile
new file mode 100644
index 000000000000..e909051d3d00
--- /dev/null
+++ b/drivers/acpi/hmat/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_ACPI_HMAT) := hmat.o
diff --git a/drivers/acpi/hmat/hmat.c b/drivers/acpi/hmat/hmat.c
new file mode 100644
index 000000000000..96b7d39a97c6
--- /dev/null
+++ b/drivers/acpi/hmat/hmat.c
@@ -0,0 +1,666 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2019, Intel Corporation.
+ *
+ * Heterogeneous Memory Attributes Table (HMAT) representation
+ *
+ * This program parses and reports the platform's HMAT tables, and registers
+ * the applicable attributes with the node's interfaces.
+ */
+
+#include <linux/acpi.h>
+#include <linux/bitops.h>
+#include <linux/device.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/list_sort.h>
+#include <linux/node.h>
+#include <linux/sysfs.h>
+
+static __initdata u8 hmat_revision;
+
+static __initdata LIST_HEAD(targets);
+static __initdata LIST_HEAD(initiators);
+static __initdata LIST_HEAD(localities);
+
+/*
+ * The defined enum order is used to prioritize attributes to break ties when
+ * selecting the best performing node.
+ */
+enum locality_types {
+ WRITE_LATENCY,
+ READ_LATENCY,
+ WRITE_BANDWIDTH,
+ READ_BANDWIDTH,
+};
+
+static struct memory_locality *localities_types[4];
+
+struct memory_target {
+ struct list_head node;
+ unsigned int memory_pxm;
+ unsigned int processor_pxm;
+ struct node_hmem_attrs hmem_attrs;
+};
+
+struct memory_initiator {
+ struct list_head node;
+ unsigned int processor_pxm;
+};
+
+struct memory_locality {
+ struct list_head node;
+ struct acpi_hmat_locality *hmat_loc;
+};
+
+static __init struct memory_initiator *find_mem_initiator(unsigned int cpu_pxm)
+{
+ struct memory_initiator *initiator;
+
+ list_for_each_entry(initiator, &initiators, node)
+ if (initiator->processor_pxm == cpu_pxm)
+ return initiator;
+ return NULL;
+}
+
+static __init struct memory_target *find_mem_target(unsigned int mem_pxm)
+{
+ struct memory_target *target;
+
+ list_for_each_entry(target, &targets, node)
+ if (target->memory_pxm == mem_pxm)
+ return target;
+ return NULL;
+}
+
+static __init void alloc_memory_initiator(unsigned int cpu_pxm)
+{
+ struct memory_initiator *initiator;
+
+ if (pxm_to_node(cpu_pxm) == NUMA_NO_NODE)
+ return;
+
+ initiator = find_mem_initiator(cpu_pxm);
+ if (initiator)
+ return;
+
+ initiator = kzalloc(sizeof(*initiator), GFP_KERNEL);
+ if (!initiator)
+ return;
+
+ initiator->processor_pxm = cpu_pxm;
+ list_add_tail(&initiator->node, &initiators);
+}
+
+static __init void alloc_memory_target(unsigned int mem_pxm)
+{
+ struct memory_target *target;
+
+ if (pxm_to_node(mem_pxm) == NUMA_NO_NODE)
+ return;
+
+ target = find_mem_target(mem_pxm);
+ if (target)
+ return;
+
+ target = kzalloc(sizeof(*target), GFP_KERNEL);
+ if (!target)
+ return;
+
+ target->memory_pxm = mem_pxm;
+ target->processor_pxm = PXM_INVAL;
+ list_add_tail(&target->node, &targets);
+}
+
+static __init const char *hmat_data_type(u8 type)
+{
+ switch (type) {
+ case ACPI_HMAT_ACCESS_LATENCY:
+ return "Access Latency";
+ case ACPI_HMAT_READ_LATENCY:
+ return "Read Latency";
+ case ACPI_HMAT_WRITE_LATENCY:
+ return "Write Latency";
+ case ACPI_HMAT_ACCESS_BANDWIDTH:
+ return "Access Bandwidth";
+ case ACPI_HMAT_READ_BANDWIDTH:
+ return "Read Bandwidth";
+ case ACPI_HMAT_WRITE_BANDWIDTH:
+ return "Write Bandwidth";
+ default:
+ return "Reserved";
+ }
+}
+
+static __init const char *hmat_data_type_suffix(u8 type)
+{
+ switch (type) {
+ case ACPI_HMAT_ACCESS_LATENCY:
+ case ACPI_HMAT_READ_LATENCY:
+ case ACPI_HMAT_WRITE_LATENCY:
+ return " nsec";
+ case ACPI_HMAT_ACCESS_BANDWIDTH:
+ case ACPI_HMAT_READ_BANDWIDTH:
+ case ACPI_HMAT_WRITE_BANDWIDTH:
+ return " MB/s";
+ default:
+ return "";
+ }
+}
+
+static __init u32 hmat_normalize(u16 entry, u64 base, u8 type)
+{
+ u32 value;
+
+ /*
+ * Check for invalid and overflow values
+ */
+ if (entry == 0xffff || !entry)
+ return 0;
+ else if (base > (UINT_MAX / (entry)))
+ return 0;
+
+ /*
+ * Divide by the base unit for version 1, convert latency from
+ * picosenonds to nanoseconds if revision 2.
+ */
+ value = entry * base;
+ if (hmat_revision == 1) {
+ if (value < 10)
+ return 0;
+ value = DIV_ROUND_UP(value, 10);
+ } else if (hmat_revision == 2) {
+ switch (type) {
+ case ACPI_HMAT_ACCESS_LATENCY:
+ case ACPI_HMAT_READ_LATENCY:
+ case ACPI_HMAT_WRITE_LATENCY:
+ value = DIV_ROUND_UP(value, 1000);
+ break;
+ default:
+ break;
+ }
+ }
+ return value;
+}
+
+static __init void hmat_update_target_access(struct memory_target *target,
+ u8 type, u32 value)
+{
+ switch (type) {
+ case ACPI_HMAT_ACCESS_LATENCY:
+ target->hmem_attrs.read_latency = value;
+ target->hmem_attrs.write_latency = value;
+ break;
+ case ACPI_HMAT_READ_LATENCY:
+ target->hmem_attrs.read_latency = value;
+ break;
+ case ACPI_HMAT_WRITE_LATENCY:
+ target->hmem_attrs.write_latency = value;
+ break;
+ case ACPI_HMAT_ACCESS_BANDWIDTH:
+ target->hmem_attrs.read_bandwidth = value;
+ target->hmem_attrs.write_bandwidth = value;
+ break;
+ case ACPI_HMAT_READ_BANDWIDTH:
+ target->hmem_attrs.read_bandwidth = value;
+ break;
+ case ACPI_HMAT_WRITE_BANDWIDTH:
+ target->hmem_attrs.write_bandwidth = value;
+ break;
+ default:
+ break;
+ }
+}
+
+static __init void hmat_add_locality(struct acpi_hmat_locality *hmat_loc)
+{
+ struct memory_locality *loc;
+
+ loc = kzalloc(sizeof(*loc), GFP_KERNEL);
+ if (!loc) {
+ pr_notice_once("Failed to allocate HMAT locality\n");
+ return;
+ }
+
+ loc->hmat_loc = hmat_loc;
+ list_add_tail(&loc->node, &localities);
+
+ switch (hmat_loc->data_type) {
+ case ACPI_HMAT_ACCESS_LATENCY:
+ localities_types[READ_LATENCY] = loc;
+ localities_types[WRITE_LATENCY] = loc;
+ break;
+ case ACPI_HMAT_READ_LATENCY:
+ localities_types[READ_LATENCY] = loc;
+ break;
+ case ACPI_HMAT_WRITE_LATENCY:
+ localities_types[WRITE_LATENCY] = loc;
+ break;
+ case ACPI_HMAT_ACCESS_BANDWIDTH:
+ localities_types[READ_BANDWIDTH] = loc;
+ localities_types[WRITE_BANDWIDTH] = loc;
+ break;
+ case ACPI_HMAT_READ_BANDWIDTH:
+ localities_types[READ_BANDWIDTH] = loc;
+ break;
+ case ACPI_HMAT_WRITE_BANDWIDTH:
+ localities_types[WRITE_BANDWIDTH] = loc;
+ break;
+ default:
+ break;
+ }
+}
+
+static __init int hmat_parse_locality(union acpi_subtable_headers *header,
+ const unsigned long end)
+{
+ struct acpi_hmat_locality *hmat_loc = (void *)header;
+ struct memory_target *target;
+ unsigned int init, targ, total_size, ipds, tpds;
+ u32 *inits, *targs, value;
+ u16 *entries;
+ u8 type, mem_hier;
+
+ if (hmat_loc->header.length < sizeof(*hmat_loc)) {
+ pr_notice("HMAT: Unexpected locality header length: %d\n",
+ hmat_loc->header.length);
+ return -EINVAL;
+ }
+
+ type = hmat_loc->data_type;
+ mem_hier = hmat_loc->flags & ACPI_HMAT_MEMORY_HIERARCHY;
+ ipds = hmat_loc->number_of_initiator_Pds;
+ tpds = hmat_loc->number_of_target_Pds;
+ total_size = sizeof(*hmat_loc) + sizeof(*entries) * ipds * tpds +
+ sizeof(*inits) * ipds + sizeof(*targs) * tpds;
+ if (hmat_loc->header.length < total_size) {
+ pr_notice("HMAT: Unexpected locality header length:%d, minimum required:%d\n",
+ hmat_loc->header.length, total_size);
+ return -EINVAL;
+ }
+
+ pr_info("HMAT: Locality: Flags:%02x Type:%s Initiator Domains:%d Target Domains:%d Base:%lld\n",
+ hmat_loc->flags, hmat_data_type(type), ipds, tpds,
+ hmat_loc->entry_base_unit);
+
+ inits = (u32 *)(hmat_loc + 1);
+ targs = inits + ipds;
+ entries = (u16 *)(targs + tpds);
+ for (init = 0; init < ipds; init++) {
+ alloc_memory_initiator(inits[init]);
+ for (targ = 0; targ < tpds; targ++) {
+ value = hmat_normalize(entries[init * tpds + targ],
+ hmat_loc->entry_base_unit,
+ type);
+ pr_info(" Initiator-Target[%d-%d]:%d%s\n",
+ inits[init], targs[targ], value,
+ hmat_data_type_suffix(type));
+
+ if (mem_hier == ACPI_HMAT_MEMORY) {
+ target = find_mem_target(targs[targ]);
+ if (target && target->processor_pxm == inits[init])
+ hmat_update_target_access(target, type, value);
+ }
+ }
+ }
+
+ if (mem_hier == ACPI_HMAT_MEMORY)
+ hmat_add_locality(hmat_loc);
+
+ return 0;
+}
+
+static __init int hmat_parse_cache(union acpi_subtable_headers *header,
+ const unsigned long end)
+{
+ struct acpi_hmat_cache *cache = (void *)header;
+ struct node_cache_attrs cache_attrs;
+ u32 attrs;
+
+ if (cache->header.length < sizeof(*cache)) {
+ pr_notice("HMAT: Unexpected cache header length: %d\n",
+ cache->header.length);
+ return -EINVAL;
+ }
+
+ attrs = cache->cache_attributes;
+ pr_info("HMAT: Cache: Domain:%d Size:%llu Attrs:%08x SMBIOS Handles:%d\n",
+ cache->memory_PD, cache->cache_size, attrs,
+ cache->number_of_SMBIOShandles);
+
+ cache_attrs.size = cache->cache_size;
+ cache_attrs.level = (attrs & ACPI_HMAT_CACHE_LEVEL) >> 4;
+ cache_attrs.line_size = (attrs & ACPI_HMAT_CACHE_LINE_SIZE) >> 16;
+
+ switch ((attrs & ACPI_HMAT_CACHE_ASSOCIATIVITY) >> 8) {
+ case ACPI_HMAT_CA_DIRECT_MAPPED:
+ cache_attrs.indexing = NODE_CACHE_DIRECT_MAP;
+ break;
+ case ACPI_HMAT_CA_COMPLEX_CACHE_INDEXING:
+ cache_attrs.indexing = NODE_CACHE_INDEXED;
+ break;
+ case ACPI_HMAT_CA_NONE:
+ default:
+ cache_attrs.indexing = NODE_CACHE_OTHER;
+ break;
+ }
+
+ switch ((attrs & ACPI_HMAT_WRITE_POLICY) >> 12) {
+ case ACPI_HMAT_CP_WB:
+ cache_attrs.write_policy = NODE_CACHE_WRITE_BACK;
+ break;
+ case ACPI_HMAT_CP_WT:
+ cache_attrs.write_policy = NODE_CACHE_WRITE_THROUGH;
+ break;
+ case ACPI_HMAT_CP_NONE:
+ default:
+ cache_attrs.write_policy = NODE_CACHE_WRITE_OTHER;
+ break;
+ }
+
+ node_add_cache(pxm_to_node(cache->memory_PD), &cache_attrs);
+ return 0;
+}
+
+static int __init hmat_parse_proximity_domain(union acpi_subtable_headers *header,
+ const unsigned long end)
+{
+ struct acpi_hmat_proximity_domain *p = (void *)header;
+ struct memory_target *target = NULL;
+
+ if (p->header.length != sizeof(*p)) {
+ pr_notice("HMAT: Unexpected address range header length: %d\n",
+ p->header.length);
+ return -EINVAL;
+ }
+
+ if (hmat_revision == 1)
+ pr_info("HMAT: Memory (%#llx length %#llx) Flags:%04x Processor Domain:%d Memory Domain:%d\n",
+ p->reserved3, p->reserved4, p->flags, p->processor_PD,
+ p->memory_PD);
+ else
+ pr_info("HMAT: Memory Flags:%04x Processor Domain:%d Memory Domain:%d\n",
+ p->flags, p->processor_PD, p->memory_PD);
+
+ if (p->flags & ACPI_HMAT_MEMORY_PD_VALID) {
+ target = find_mem_target(p->memory_PD);
+ if (!target) {
+ pr_debug("HMAT: Memory Domain missing from SRAT\n");
+ return -EINVAL;
+ }
+ }
+ if (target && p->flags & ACPI_HMAT_PROCESSOR_PD_VALID) {
+ int p_node = pxm_to_node(p->processor_PD);
+
+ if (p_node == NUMA_NO_NODE) {
+ pr_debug("HMAT: Invalid Processor Domain\n");
+ return -EINVAL;
+ }
+ target->processor_pxm = p_node;
+ }
+
+ return 0;
+}
+
+static int __init hmat_parse_subtable(union acpi_subtable_headers *header,
+ const unsigned long end)
+{
+ struct acpi_hmat_structure *hdr = (void *)header;
+
+ if (!hdr)
+ return -EINVAL;
+
+ switch (hdr->type) {
+ case ACPI_HMAT_TYPE_PROXIMITY:
+ return hmat_parse_proximity_domain(header, end);
+ case ACPI_HMAT_TYPE_LOCALITY:
+ return hmat_parse_locality(header, end);
+ case ACPI_HMAT_TYPE_CACHE:
+ return hmat_parse_cache(header, end);
+ default:
+ return -EINVAL;
+ }
+}
+
+static __init int srat_parse_mem_affinity(union acpi_subtable_headers *header,
+ const unsigned long end)
+{
+ struct acpi_srat_mem_affinity *ma = (void *)header;
+
+ if (!ma)
+ return -EINVAL;
+ if (!(ma->flags & ACPI_SRAT_MEM_ENABLED))
+ return 0;
+ alloc_memory_target(ma->proximity_domain);
+ return 0;
+}
+
+static __init u32 hmat_initiator_perf(struct memory_target *target,
+ struct memory_initiator *initiator,
+ struct acpi_hmat_locality *hmat_loc)
+{
+ unsigned int ipds, tpds, i, idx = 0, tdx = 0;
+ u32 *inits, *targs;
+ u16 *entries;
+
+ ipds = hmat_loc->number_of_initiator_Pds;
+ tpds = hmat_loc->number_of_target_Pds;
+ inits = (u32 *)(hmat_loc + 1);
+ targs = inits + ipds;
+ entries = (u16 *)(targs + tpds);
+
+ for (i = 0; i < ipds; i++) {
+ if (inits[i] == initiator->processor_pxm) {
+ idx = i;
+ break;
+ }
+ }
+
+ if (i == ipds)
+ return 0;
+
+ for (i = 0; i < tpds; i++) {
+ if (targs[i] == target->memory_pxm) {
+ tdx = i;
+ break;
+ }
+ }
+ if (i == tpds)
+ return 0;
+
+ return hmat_normalize(entries[idx * tpds + tdx],
+ hmat_loc->entry_base_unit,
+ hmat_loc->data_type);
+}
+
+static __init bool hmat_update_best(u8 type, u32 value, u32 *best)
+{
+ bool updated = false;
+
+ if (!value)
+ return false;
+
+ switch (type) {
+ case ACPI_HMAT_ACCESS_LATENCY:
+ case ACPI_HMAT_READ_LATENCY:
+ case ACPI_HMAT_WRITE_LATENCY:
+ if (!*best || *best > value) {
+ *best = value;
+ updated = true;
+ }
+ break;
+ case ACPI_HMAT_ACCESS_BANDWIDTH:
+ case ACPI_HMAT_READ_BANDWIDTH:
+ case ACPI_HMAT_WRITE_BANDWIDTH:
+ if (!*best || *best < value) {
+ *best = value;
+ updated = true;
+ }
+ break;
+ }
+
+ return updated;
+}
+
+static int initiator_cmp(void *priv, struct list_head *a, struct list_head *b)
+{
+ struct memory_initiator *ia;
+ struct memory_initiator *ib;
+ unsigned long *p_nodes = priv;
+
+ ia = list_entry(a, struct memory_initiator, node);
+ ib = list_entry(b, struct memory_initiator, node);
+
+ set_bit(ia->processor_pxm, p_nodes);
+ set_bit(ib->processor_pxm, p_nodes);
+
+ return ia->processor_pxm - ib->processor_pxm;
+}
+
+static __init void hmat_register_target_initiators(struct memory_target *target)
+{
+ static DECLARE_BITMAP(p_nodes, MAX_NUMNODES);
+ struct memory_initiator *initiator;
+ unsigned int mem_nid, cpu_nid;
+ struct memory_locality *loc = NULL;
+ u32 best = 0;
+ int i;
+
+ mem_nid = pxm_to_node(target->memory_pxm);
+ /*
+ * If the Address Range Structure provides a local processor pxm, link
+ * only that one. Otherwise, find the best performance attributes and
+ * register all initiators that match.
+ */
+ if (target->processor_pxm != PXM_INVAL) {
+ cpu_nid = pxm_to_node(target->processor_pxm);
+ register_memory_node_under_compute_node(mem_nid, cpu_nid, 0);
+ return;
+ }
+
+ if (list_empty(&localities))
+ return;
+
+ /*
+ * We need the initiator list sorted so we can use bitmap_clear for
+ * previously set initiators when we find a better memory accessor.
+ * We'll also use the sorting to prime the candidate nodes with known
+ * initiators.
+ */
+ bitmap_zero(p_nodes, MAX_NUMNODES);
+ list_sort(p_nodes, &initiators, initiator_cmp);
+ for (i = WRITE_LATENCY; i <= READ_BANDWIDTH; i++) {
+ loc = localities_types[i];
+ if (!loc)
+ continue;
+
+ best = 0;
+ list_for_each_entry(initiator, &initiators, node) {
+ u32 value;
+
+ if (!test_bit(initiator->processor_pxm, p_nodes))
+ continue;
+
+ value = hmat_initiator_perf(target, initiator, loc->hmat_loc);
+ if (hmat_update_best(loc->hmat_loc->data_type, value, &best))
+ bitmap_clear(p_nodes, 0, initiator->processor_pxm);
+ if (value != best)
+ clear_bit(initiator->processor_pxm, p_nodes);
+ }
+ if (best)
+ hmat_update_target_access(target, loc->hmat_loc->data_type, best);
+ }
+
+ for_each_set_bit(i, p_nodes, MAX_NUMNODES) {
+ cpu_nid = pxm_to_node(i);
+ register_memory_node_under_compute_node(mem_nid, cpu_nid, 0);
+ }
+}
+
+static __init void hmat_register_target_perf(struct memory_target *target)
+{
+ unsigned mem_nid = pxm_to_node(target->memory_pxm);
+ node_set_perf_attrs(mem_nid, &target->hmem_attrs, 0);
+}
+
+static __init void hmat_register_targets(void)
+{
+ struct memory_target *target;
+
+ list_for_each_entry(target, &targets, node) {
+ hmat_register_target_initiators(target);
+ hmat_register_target_perf(target);
+ }
+}
+
+static __init void hmat_free_structures(void)
+{
+ struct memory_target *target, *tnext;
+ struct memory_locality *loc, *lnext;
+ struct memory_initiator *initiator, *inext;
+
+ list_for_each_entry_safe(target, tnext, &targets, node) {
+ list_del(&target->node);
+ kfree(target);
+ }
+
+ list_for_each_entry_safe(initiator, inext, &initiators, node) {
+ list_del(&initiator->node);
+ kfree(initiator);
+ }
+
+ list_for_each_entry_safe(loc, lnext, &localities, node) {
+ list_del(&loc->node);
+ kfree(loc);
+ }
+}
+
+static __init int hmat_init(void)
+{
+ struct acpi_table_header *tbl;
+ enum acpi_hmat_type i;
+ acpi_status status;
+
+ if (srat_disabled())
+ return 0;
+
+ status = acpi_get_table(ACPI_SIG_SRAT, 0, &tbl);
+ if (ACPI_FAILURE(status))
+ return 0;
+
+ if (acpi_table_parse_entries(ACPI_SIG_SRAT,
+ sizeof(struct acpi_table_srat),
+ ACPI_SRAT_TYPE_MEMORY_AFFINITY,
+ srat_parse_mem_affinity, 0) < 0)
+ goto out_put;
+ acpi_put_table(tbl);
+
+ status = acpi_get_table(ACPI_SIG_HMAT, 0, &tbl);
+ if (ACPI_FAILURE(status))
+ goto out_put;
+
+ hmat_revision = tbl->revision;
+ switch (hmat_revision) {
+ case 1:
+ case 2:
+ break;
+ default:
+ pr_notice("Ignoring HMAT: Unknown revision:%d\n", hmat_revision);
+ goto out_put;
+ }
+
+ for (i = ACPI_HMAT_TYPE_PROXIMITY; i < ACPI_HMAT_TYPE_RESERVED; i++) {
+ if (acpi_table_parse_entries(ACPI_SIG_HMAT,
+ sizeof(struct acpi_table_hmat), i,
+ hmat_parse_subtable, 0) < 0) {
+ pr_notice("Ignoring HMAT: Invalid table");
+ goto out_put;
+ }
+ }
+ hmat_register_targets();
+out_put:
+ hmat_free_structures();
+ acpi_put_table(tbl);
+ return 0;
+}
+subsys_initcall(hmat_init);
diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c
index 867f6e3f2b4f..30995834ad70 100644
--- a/drivers/acpi/numa.c
+++ b/drivers/acpi/numa.c
@@ -339,7 +339,7 @@ acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa)
}
static int __init
-acpi_parse_x2apic_affinity(struct acpi_subtable_header *header,
+acpi_parse_x2apic_affinity(union acpi_subtable_headers *header,
const unsigned long end)
{
struct acpi_srat_x2apic_cpu_affinity *processor_affinity;
@@ -348,7 +348,7 @@ acpi_parse_x2apic_affinity(struct acpi_subtable_header *header,
if (!processor_affinity)
return -EINVAL;
- acpi_table_print_srat_entry(header);
+ acpi_table_print_srat_entry(&header->common);
/* let architecture-dependent part to do it */
acpi_numa_x2apic_affinity_init(processor_affinity);
@@ -357,7 +357,7 @@ acpi_parse_x2apic_affinity(struct acpi_subtable_header *header,
}
static int __init
-acpi_parse_processor_affinity(struct acpi_subtable_header *header,
+acpi_parse_processor_affinity(union acpi_subtable_headers *header,
const unsigned long end)
{
struct acpi_srat_cpu_affinity *processor_affinity;
@@ -366,7 +366,7 @@ acpi_parse_processor_affinity(struct acpi_subtable_header *header,
if (!processor_affinity)
return -EINVAL;
- acpi_table_print_srat_entry(header);
+ acpi_table_print_srat_entry(&header->common);
/* let architecture-dependent part to do it */
acpi_numa_processor_affinity_init(processor_affinity);
@@ -375,7 +375,7 @@ acpi_parse_processor_affinity(struct acpi_subtable_header *header,
}
static int __init
-acpi_parse_gicc_affinity(struct acpi_subtable_header *header,
+acpi_parse_gicc_affinity(union acpi_subtable_headers *header,
const unsigned long end)
{
struct acpi_srat_gicc_affinity *processor_affinity;
@@ -384,7 +384,7 @@ acpi_parse_gicc_affinity(struct acpi_subtable_header *header,
if (!processor_affinity)
return -EINVAL;
- acpi_table_print_srat_entry(header);
+ acpi_table_print_srat_entry(&header->common);
/* let architecture-dependent part to do it */
acpi_numa_gicc_affinity_init(processor_affinity);
@@ -395,7 +395,7 @@ acpi_parse_gicc_affinity(struct acpi_subtable_header *header,
static int __initdata parsed_numa_memblks;
static int __init
-acpi_parse_memory_affinity(struct acpi_subtable_header * header,
+acpi_parse_memory_affinity(union acpi_subtable_headers * header,
const unsigned long end)
{
struct acpi_srat_mem_affinity *memory_affinity;
@@ -404,7 +404,7 @@ acpi_parse_memory_affinity(struct acpi_subtable_header * header,
if (!memory_affinity)
return -EINVAL;
- acpi_table_print_srat_entry(header);
+ acpi_table_print_srat_entry(&header->common);
/* let architecture-dependent part to do it */
if (!acpi_numa_memory_affinity_init(memory_affinity))
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index b845dc3e0ba9..566270d0e91a 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -2238,10 +2238,10 @@ static struct acpi_probe_entry *ape;
static int acpi_probe_count;
static DEFINE_MUTEX(acpi_probe_mutex);
-static int __init acpi_match_madt(struct acpi_subtable_header *header,
+static int __init acpi_match_madt(union acpi_subtable_headers *header,
const unsigned long end)
{
- if (!ape->subtable_valid || ape->subtable_valid(header, ape))
+ if (!ape->subtable_valid || ape->subtable_valid(&header->common, ape))
if (!ape->probe_subtbl(header, end))
acpi_probe_count++;
diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c
index d7bf936b1646..3b5d04fd5e3e 100644
--- a/drivers/acpi/tables.c
+++ b/drivers/acpi/tables.c
@@ -49,6 +49,16 @@ static struct acpi_table_desc initial_tables[ACPI_MAX_TABLES] __initdata;
static int acpi_apic_instance __initdata;
+enum acpi_subtable_type {
+ ACPI_SUBTABLE_COMMON,
+ ACPI_SUBTABLE_HMAT,
+};
+
+struct acpi_subtable_entry {
+ union acpi_subtable_headers *hdr;
+ enum acpi_subtable_type type;
+};
+
/*
* Disable table checksum verification for the early stage due to the size
* limitation of the current x86 early mapping implementation.
@@ -217,6 +227,50 @@ void acpi_table_print_madt_entry(struct acpi_subtable_header *header)
}
}
+static unsigned long __init
+acpi_get_entry_type(struct acpi_subtable_entry *entry)
+{
+ switch (entry->type) {
+ case ACPI_SUBTABLE_COMMON:
+ return entry->hdr->common.type;
+ case ACPI_SUBTABLE_HMAT:
+ return entry->hdr->hmat.type;
+ }
+ return 0;
+}
+
+static unsigned long __init
+acpi_get_entry_length(struct acpi_subtable_entry *entry)
+{
+ switch (entry->type) {
+ case ACPI_SUBTABLE_COMMON:
+ return entry->hdr->common.length;
+ case ACPI_SUBTABLE_HMAT:
+ return entry->hdr->hmat.length;
+ }
+ return 0;
+}
+
+static unsigned long __init
+acpi_get_subtable_header_length(struct acpi_subtable_entry *entry)
+{
+ switch (entry->type) {
+ case ACPI_SUBTABLE_COMMON:
+ return sizeof(entry->hdr->common);
+ case ACPI_SUBTABLE_HMAT:
+ return sizeof(entry->hdr->hmat);
+ }
+ return 0;
+}
+
+static enum acpi_subtable_type __init
+acpi_get_subtable_type(char *id)
+{
+ if (strncmp(id, ACPI_SIG_HMAT, 4) == 0)
+ return ACPI_SUBTABLE_HMAT;
+ return ACPI_SUBTABLE_COMMON;
+}
+
/**
* acpi_parse_entries_array - for each proc_num find a suitable subtable
*
@@ -245,8 +299,8 @@ static int __init acpi_parse_entries_array(char *id, unsigned long table_size,
struct acpi_subtable_proc *proc, int proc_num,
unsigned int max_entries)
{
- struct acpi_subtable_header *entry;
- unsigned long table_end;
+ struct acpi_subtable_entry entry;
+ unsigned long table_end, subtable_len, entry_len;
int count = 0;
int errs = 0;
int i;
@@ -269,19 +323,20 @@ static int __init acpi_parse_entries_array(char *id, unsigned long table_size,
/* Parse all entries looking for a match. */
- entry = (struct acpi_subtable_header *)
+ entry.type = acpi_get_subtable_type(id);
+ entry.hdr = (union acpi_subtable_headers *)
((unsigned long)table_header + table_size);
+ subtable_len = acpi_get_subtable_header_length(&entry);
- while (((unsigned long)entry) + sizeof(struct acpi_subtable_header) <
- table_end) {
+ while (((unsigned long)entry.hdr) + subtable_len < table_end) {
if (max_entries && count >= max_entries)
break;
for (i = 0; i < proc_num; i++) {
- if (entry->type != proc[i].id)
+ if (acpi_get_entry_type(&entry) != proc[i].id)
continue;
if (!proc[i].handler ||
- (!errs && proc[i].handler(entry, table_end))) {
+ (!errs && proc[i].handler(entry.hdr, table_end))) {
errs++;
continue;
}
@@ -296,13 +351,14 @@ static int __init acpi_parse_entries_array(char *id, unsigned long table_size,
* If entry->length is 0, break from this loop to avoid
* infinite loop.
*/
- if (entry->length == 0) {
+ entry_len = acpi_get_entry_length(&entry);
+ if (entry_len == 0) {
pr_err("[%4.4s:0x%02x] Invalid zero length\n", id, proc->id);
return -EINVAL;
}
- entry = (struct acpi_subtable_header *)
- ((unsigned long)entry + entry->length);
+ entry.hdr = (union acpi_subtable_headers *)
+ ((unsigned long)entry.hdr + entry_len);
}
if (max_entries && count > max_entries) {
diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
index 03f067da12ee..dc404492381d 100644
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig
@@ -3,7 +3,6 @@ menu "Generic Driver Options"
config UEVENT_HELPER
bool "Support for uevent helper"
- default y
help
The uevent helper program is forked by the kernel for
every uevent.
@@ -149,6 +148,14 @@ config DEBUG_TEST_DRIVER_REMOVE
unusable. You should say N here unless you are explicitly looking to
test this functionality.
+config HMEM_REPORTING
+ bool
+ default n
+ depends on NUMA
+ help
+ Enable reporting for heterogenous memory access attributes under
+ their non-uniform memory nodes.
+
source "drivers/base/test/Kconfig"
config SYS_HYPERVISOR
diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
index edfcf8d982e4..1739d7e1952a 100644
--- a/drivers/base/arch_topology.c
+++ b/drivers/base/arch_topology.c
@@ -7,7 +7,6 @@
*/
#include <linux/acpi.h>
-#include <linux/arch_topology.h>
#include <linux/cpu.h>
#include <linux/cpufreq.h>
#include <linux/device.h>
@@ -31,7 +30,6 @@ void arch_set_freq_scale(struct cpumask *cpus, unsigned long cur_freq,
per_cpu(freq_scale, i) = scale;
}
-static DEFINE_MUTEX(cpu_scale_mutex);
DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE;
void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity)
@@ -51,37 +49,7 @@ static ssize_t cpu_capacity_show(struct device *dev,
static void update_topology_flags_workfn(struct work_struct *work);
static DECLARE_WORK(update_topology_flags_work, update_topology_flags_workfn);
-static ssize_t cpu_capacity_store(struct device *dev,
- struct device_attribute *attr,
- const char *buf,
- size_t count)
-{
- struct cpu *cpu = container_of(dev, struct cpu, dev);
- int this_cpu = cpu->dev.id;
- int i;
- unsigned long new_capacity;
- ssize_t ret;
-
- if (!count)
- return 0;
-
- ret = kstrtoul(buf, 0, &new_capacity);
- if (ret)
- return ret;
- if (new_capacity > SCHED_CAPACITY_SCALE)
- return -EINVAL;
-
- mutex_lock(&cpu_scale_mutex);
- for_each_cpu(i, &cpu_topology[this_cpu].core_sibling)
- topology_set_cpu_scale(i, new_capacity);
- mutex_unlock(&cpu_scale_mutex);
-
- schedule_work(&update_topology_flags_work);
-
- return count;
-}
-
-static DEVICE_ATTR_RW(cpu_capacity);
+static DEVICE_ATTR_RO(cpu_capacity);
static int register_cpu_capacity_sysctl(void)
{
@@ -141,7 +109,6 @@ void topology_normalize_cpu_scale(void)
return;
pr_debug("cpu_capacity: capacity_scale=%u\n", capacity_scale);
- mutex_lock(&cpu_scale_mutex);
for_each_possible_cpu(cpu) {
pr_debug("cpu_capacity: cpu=%d raw_capacity=%u\n",
cpu, raw_capacity[cpu]);
@@ -151,7 +118,6 @@ void topology_normalize_cpu_scale(void)
pr_debug("cpu_capacity: CPU%d cpu_capacity=%lu\n",
cpu, topology_get_cpu_scale(NULL, cpu));
}
- mutex_unlock(&cpu_scale_mutex);
}
bool __init topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu)
diff --git a/drivers/base/core.c b/drivers/base/core.c
index 4aeaa0c92bda..fd7511e04e62 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -1999,6 +1999,11 @@ static int device_private_init(struct device *dev)
* NOTE: _Never_ directly free @dev after calling this function, even
* if it returned an error! Always use put_device() to give up your
* reference instead.
+ *
+ * Rule of thumb is: if device_add() succeeds, you should call
+ * device_del() when you want to get rid of it. If device_add() has
+ * *not* succeeded, use *only* put_device() to drop the reference
+ * count.
*/
int device_add(struct device *dev)
{
diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index a823f469e53f..0df9b4461766 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -490,7 +490,7 @@ re_probe:
if (dev->bus->dma_configure) {
ret = dev->bus->dma_configure(dev);
if (ret)
- goto dma_failed;
+ goto probe_failed;
}
if (driver_sysfs_add(dev)) {
@@ -546,14 +546,13 @@ re_probe:
goto done;
probe_failed:
- arch_teardown_dma_ops(dev);
-dma_failed:
if (dev->bus)
blocking_notifier_call_chain(&dev->bus->p->bus_notifier,
BUS_NOTIFY_DRIVER_NOT_BOUND, dev);
pinctrl_bind_failed:
device_links_no_driver(dev);
devres_release_all(dev);
+ arch_teardown_dma_ops(dev);
driver_sysfs_remove(dev);
dev->driver = NULL;
dev_set_drvdata(dev, NULL);
diff --git a/drivers/base/firmware_loader/Kconfig b/drivers/base/firmware_loader/Kconfig
index eb15d976a9ea..38f2da6f5c2b 100644
--- a/drivers/base/firmware_loader/Kconfig
+++ b/drivers/base/firmware_loader/Kconfig
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
menu "Firmware loader"
config FW_LOADER
diff --git a/drivers/base/firmware_loader/builtin/.gitignore b/drivers/base/firmware_loader/builtin/.gitignore
index 9c8bdb9fdcc3..166f76b43049 100644
--- a/drivers/base/firmware_loader/builtin/.gitignore
+++ b/drivers/base/firmware_loader/builtin/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
*.gen.S
diff --git a/drivers/base/firmware_loader/fallback.c b/drivers/base/firmware_loader/fallback.c
index b5c865fe263b..f962488546b6 100644
--- a/drivers/base/firmware_loader/fallback.c
+++ b/drivers/base/firmware_loader/fallback.c
@@ -674,8 +674,8 @@ static bool fw_run_sysfs_fallback(enum fw_opt opt_flags)
*
* This function is called if direct lookup for the firmware failed, it enables
* a fallback mechanism through userspace by exposing a sysfs loading
- * interface. Userspace is in charge of loading the firmware through the syfs
- * loading interface. This syfs fallback mechanism may be disabled completely
+ * interface. Userspace is in charge of loading the firmware through the sysfs
+ * loading interface. This sysfs fallback mechanism may be disabled completely
* on a system by setting the proc sysctl value ignore_sysfs_fallback to true.
* If this false we check if the internal API caller set the @FW_OPT_NOFALLBACK
* flag, if so it would also disable the fallback mechanism. A system may want
@@ -693,7 +693,7 @@ int firmware_fallback_sysfs(struct firmware *fw, const char *name,
return ret;
if (!(opt_flags & FW_OPT_NO_WARN))
- dev_warn(device, "Falling back to syfs fallback for: %s\n",
+ dev_warn(device, "Falling back to sysfs fallback for: %s\n",
name);
else
dev_dbg(device, "Falling back to sysfs fallback for: %s\n",
diff --git a/drivers/base/node.c b/drivers/base/node.c
index 86d6cd92ce3d..8598fcbd2a17 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -17,6 +17,7 @@
#include <linux/nodemask.h>
#include <linux/cpu.h>
#include <linux/device.h>
+#include <linux/pm_runtime.h>
#include <linux/swap.h>
#include <linux/slab.h>
@@ -59,6 +60,302 @@ static inline ssize_t node_read_cpulist(struct device *dev,
static DEVICE_ATTR(cpumap, S_IRUGO, node_read_cpumask, NULL);
static DEVICE_ATTR(cpulist, S_IRUGO, node_read_cpulist, NULL);
+/**
+ * struct node_access_nodes - Access class device to hold user visible
+ * relationships to other nodes.
+ * @dev: Device for this memory access class
+ * @list_node: List element in the node's access list
+ * @access: The access class rank
+ */
+struct node_access_nodes {
+ struct device dev;
+ struct list_head list_node;
+ unsigned access;
+#ifdef CONFIG_HMEM_REPORTING
+ struct node_hmem_attrs hmem_attrs;
+#endif
+};
+#define to_access_nodes(dev) container_of(dev, struct node_access_nodes, dev)
+
+static struct attribute *node_init_access_node_attrs[] = {
+ NULL,
+};
+
+static struct attribute *node_targ_access_node_attrs[] = {
+ NULL,
+};
+
+static const struct attribute_group initiators = {
+ .name = "initiators",
+ .attrs = node_init_access_node_attrs,
+};
+
+static const struct attribute_group targets = {
+ .name = "targets",
+ .attrs = node_targ_access_node_attrs,
+};
+
+static const struct attribute_group *node_access_node_groups[] = {
+ &initiators,
+ &targets,
+ NULL,
+};
+
+static void node_remove_accesses(struct node *node)
+{
+ struct node_access_nodes *c, *cnext;
+
+ list_for_each_entry_safe(c, cnext, &node->access_list, list_node) {
+ list_del(&c->list_node);
+ device_unregister(&c->dev);
+ }
+}
+
+static void node_access_release(struct device *dev)
+{
+ kfree(to_access_nodes(dev));
+}
+
+static struct node_access_nodes *node_init_node_access(struct node *node,
+ unsigned access)
+{
+ struct node_access_nodes *access_node;
+ struct device *dev;
+
+ list_for_each_entry(access_node, &node->access_list, list_node)
+ if (access_node->access == access)
+ return access_node;
+
+ access_node = kzalloc(sizeof(*access_node), GFP_KERNEL);
+ if (!access_node)
+ return NULL;
+
+ access_node->access = access;
+ dev = &access_node->dev;
+ dev->parent = &node->dev;
+ dev->release = node_access_release;
+ dev->groups = node_access_node_groups;
+ if (dev_set_name(dev, "access%u", access))
+ goto free;
+
+ if (device_register(dev))
+ goto free_name;
+
+ pm_runtime_no_callbacks(dev);
+ list_add_tail(&access_node->list_node, &node->access_list);
+ return access_node;
+free_name:
+ kfree_const(dev->kobj.name);
+free:
+ kfree(access_node);
+ return NULL;
+}
+
+#ifdef CONFIG_HMEM_REPORTING
+#define ACCESS_ATTR(name) \
+static ssize_t name##_show(struct device *dev, \
+ struct device_attribute *attr, \
+ char *buf) \
+{ \
+ return sprintf(buf, "%u\n", to_access_nodes(dev)->hmem_attrs.name); \
+} \
+static DEVICE_ATTR_RO(name);
+
+ACCESS_ATTR(read_bandwidth)
+ACCESS_ATTR(read_latency)
+ACCESS_ATTR(write_bandwidth)
+ACCESS_ATTR(write_latency)
+
+static struct attribute *access_attrs[] = {
+ &dev_attr_read_bandwidth.attr,
+ &dev_attr_read_latency.attr,
+ &dev_attr_write_bandwidth.attr,
+ &dev_attr_write_latency.attr,
+ NULL,
+};
+
+/**
+ * node_set_perf_attrs - Set the performance values for given access class
+ * @nid: Node identifier to be set
+ * @hmem_attrs: Heterogeneous memory performance attributes
+ * @access: The access class the for the given attributes
+ */
+void node_set_perf_attrs(unsigned int nid, struct node_hmem_attrs *hmem_attrs,
+ unsigned access)
+{
+ struct node_access_nodes *c;
+ struct node *node;
+ int i;
+
+ if (WARN_ON_ONCE(!node_online(nid)))
+ return;
+
+ node = node_devices[nid];
+ c = node_init_node_access(node, access);
+ if (!c)
+ return;
+
+ c->hmem_attrs = *hmem_attrs;
+ for (i = 0; access_attrs[i] != NULL; i++) {
+ if (sysfs_add_file_to_group(&c->dev.kobj, access_attrs[i],
+ "initiators")) {
+ pr_info("failed to add performance attribute to node %d\n",
+ nid);
+ break;
+ }
+ }
+}
+
+/**
+ * struct node_cache_info - Internal tracking for memory node caches
+ * @dev: Device represeting the cache level
+ * @node: List element for tracking in the node
+ * @cache_attrs:Attributes for this cache level
+ */
+struct node_cache_info {
+ struct device dev;
+ struct list_head node;
+ struct node_cache_attrs cache_attrs;
+};
+#define to_cache_info(device) container_of(device, struct node_cache_info, dev)
+
+#define CACHE_ATTR(name, fmt) \
+static ssize_t name##_show(struct device *dev, \
+ struct device_attribute *attr, \
+ char *buf) \
+{ \
+ return sprintf(buf, fmt "\n", to_cache_info(dev)->cache_attrs.name);\
+} \
+DEVICE_ATTR_RO(name);
+
+CACHE_ATTR(size, "%llu")
+CACHE_ATTR(line_size, "%u")
+CACHE_ATTR(indexing, "%u")
+CACHE_ATTR(write_policy, "%u")
+
+static struct attribute *cache_attrs[] = {
+ &dev_attr_indexing.attr,
+ &dev_attr_size.attr,
+ &dev_attr_line_size.attr,
+ &dev_attr_write_policy.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(cache);
+
+static void node_cache_release(struct device *dev)
+{
+ kfree(dev);
+}
+
+static void node_cacheinfo_release(struct device *dev)
+{
+ struct node_cache_info *info = to_cache_info(dev);
+ kfree(info);
+}
+
+static void node_init_cache_dev(struct node *node)
+{
+ struct device *dev;
+
+ dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+ if (!dev)
+ return;
+
+ dev->parent = &node->dev;
+ dev->release = node_cache_release;
+ if (dev_set_name(dev, "memory_side_cache"))
+ goto free_dev;
+
+ if (device_register(dev))
+ goto free_name;
+
+ pm_runtime_no_callbacks(dev);
+ node->cache_dev = dev;
+ return;
+free_name:
+ kfree_const(dev->kobj.name);
+free_dev:
+ kfree(dev);
+}
+
+/**
+ * node_add_cache() - add cache attribute to a memory node
+ * @nid: Node identifier that has new cache attributes
+ * @cache_attrs: Attributes for the cache being added
+ */
+void node_add_cache(unsigned int nid, struct node_cache_attrs *cache_attrs)
+{
+ struct node_cache_info *info;
+ struct device *dev;
+ struct node *node;
+
+ if (!node_online(nid) || !node_devices[nid])
+ return;
+
+ node = node_devices[nid];
+ list_for_each_entry(info, &node->cache_attrs, node) {
+ if (info->cache_attrs.level == cache_attrs->level) {
+ dev_warn(&node->dev,
+ "attempt to add duplicate cache level:%d\n",
+ cache_attrs->level);
+ return;
+ }
+ }
+
+ if (!node->cache_dev)
+ node_init_cache_dev(node);
+ if (!node->cache_dev)
+ return;
+
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
+ if (!info)
+ return;
+
+ dev = &info->dev;
+ dev->parent = node->cache_dev;
+ dev->release = node_cacheinfo_release;
+ dev->groups = cache_groups;
+ if (dev_set_name(dev, "index%d", cache_attrs->level))
+ goto free_cache;
+
+ info->cache_attrs = *cache_attrs;
+ if (device_register(dev)) {
+ dev_warn(&node->dev, "failed to add cache level:%d\n",
+ cache_attrs->level);
+ goto free_name;
+ }
+ pm_runtime_no_callbacks(dev);
+ list_add_tail(&info->node, &node->cache_attrs);
+ return;
+free_name:
+ kfree_const(dev->kobj.name);
+free_cache:
+ kfree(info);
+}
+
+static void node_remove_caches(struct node *node)
+{
+ struct node_cache_info *info, *next;
+
+ if (!node->cache_dev)
+ return;
+
+ list_for_each_entry_safe(info, next, &node->cache_attrs, node) {
+ list_del(&info->node);
+ device_unregister(&info->dev);
+ }
+ device_unregister(node->cache_dev);
+}
+
+static void node_init_caches(unsigned int nid)
+{
+ INIT_LIST_HEAD(&node_devices[nid]->cache_attrs);
+}
+#else
+static void node_init_caches(unsigned int nid) { }
+static void node_remove_caches(struct node *node) { }
+#endif
+
#define K(x) ((x) << (PAGE_SHIFT - 10))
static ssize_t node_read_meminfo(struct device *dev,
struct device_attribute *attr, char *buf)
@@ -340,7 +637,8 @@ static int register_node(struct node *node, int num)
void unregister_node(struct node *node)
{
hugetlb_unregister_node(node); /* no-op, if memoryless node */
-
+ node_remove_accesses(node);
+ node_remove_caches(node);
device_unregister(&node->dev);
}
@@ -372,6 +670,56 @@ int register_cpu_under_node(unsigned int cpu, unsigned int nid)
kobject_name(&node_devices[nid]->dev.kobj));
}
+/**
+ * register_memory_node_under_compute_node - link memory node to its compute
+ * node for a given access class.
+ * @mem_node: Memory node number
+ * @cpu_node: Cpu node number
+ * @access: Access class to register
+ *
+ * Description:
+ * For use with platforms that may have separate memory and compute nodes.
+ * This function will export node relationships linking which memory
+ * initiator nodes can access memory targets at a given ranked access
+ * class.
+ */
+int register_memory_node_under_compute_node(unsigned int mem_nid,
+ unsigned int cpu_nid,
+ unsigned access)
+{
+ struct node *init_node, *targ_node;
+ struct node_access_nodes *initiator, *target;
+ int ret;
+
+ if (!node_online(cpu_nid) || !node_online(mem_nid))
+ return -ENODEV;
+
+ init_node = node_devices[cpu_nid];
+ targ_node = node_devices[mem_nid];
+ initiator = node_init_node_access(init_node, access);
+ target = node_init_node_access(targ_node, access);
+ if (!initiator || !target)
+ return -ENOMEM;
+
+ ret = sysfs_add_link_to_group(&initiator->dev.kobj, "targets",
+ &targ_node->dev.kobj,
+ dev_name(&targ_node->dev));
+ if (ret)
+ return ret;
+
+ ret = sysfs_add_link_to_group(&target->dev.kobj, "initiators",
+ &init_node->dev.kobj,
+ dev_name(&init_node->dev));
+ if (ret)
+ goto err;
+
+ return 0;
+ err:
+ sysfs_remove_link_from_group(&initiator->dev.kobj, "targets",
+ dev_name(&targ_node->dev));
+ return ret;
+}
+
int unregister_cpu_under_node(unsigned int cpu, unsigned int nid)
{
struct device *obj;
@@ -580,8 +928,10 @@ int __register_one_node(int nid)
register_cpu_under_node(cpu, nid);
}
+ INIT_LIST_HEAD(&node_devices[nid]->access_list);
/* initialize work queue for memory hot plug */
init_node_hugetlb_work(nid);
+ node_init_caches(nid);
return error;
}
diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index dab0a5abc391..4d1729853d1a 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -84,7 +84,7 @@ EXPORT_SYMBOL_GPL(platform_get_resource);
* device
*
* @pdev: platform device to use both for memory resource lookup as well as
- * resource managemend
+ * resource management
* @index: resource index
*/
#ifdef CONFIG_HAS_IOMEM
@@ -438,10 +438,12 @@ int platform_device_add(struct platform_device *pdev)
p = &ioport_resource;
}
- if (p && insert_resource(p, r)) {
- dev_err(&pdev->dev, "failed to claim resource %d: %pR\n", i, r);
- ret = -EBUSY;
- goto failed;
+ if (p) {
+ ret = insert_resource(p, r);
+ if (ret) {
+ dev_err(&pdev->dev, "failed to claim resource %d: %pR\n", i, r);
+ goto failed;
+ }
}
}
diff --git a/drivers/base/power/clock_ops.c b/drivers/base/power/clock_ops.c
index 365ad751ce0f..59d19dd64928 100644
--- a/drivers/base/power/clock_ops.c
+++ b/drivers/base/power/clock_ops.c
@@ -1,9 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* drivers/base/power/clock_ops.c - Generic clock manipulation PM callbacks
*
* Copyright (c) 2011 Rafael J. Wysocki <rjw@sisk.pl>, Renesas Electronics Corp.
- *
- * This file is released under the GPLv2.
*/
#include <linux/kernel.h>
diff --git a/drivers/base/power/common.c b/drivers/base/power/common.c
index 22aedb28aad7..8db98a1f83dc 100644
--- a/drivers/base/power/common.c
+++ b/drivers/base/power/common.c
@@ -1,11 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* drivers/base/power/common.c - Common device power management code.
*
* Copyright (C) 2011 Rafael J. Wysocki <rjw@sisk.pl>, Renesas Electronics Corp.
- *
- * This file is released under the GPLv2.
*/
-
#include <linux/kernel.h>
#include <linux/device.h>
#include <linux/export.h>
diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 3d899e8abd58..7a6aa2318915 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -1,11 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* drivers/base/power/domain.c - Common code related to device power domains.
*
* Copyright (C) 2011 Rafael J. Wysocki <rjw@sisk.pl>, Renesas Electronics Corp.
- *
- * This file is released under the GPLv2.
*/
-
#define pr_fmt(fmt) "PM: " fmt
#include <linux/delay.h>
diff --git a/drivers/base/power/domain_governor.c b/drivers/base/power/domain_governor.c
index 7912bc957244..3838045c9277 100644
--- a/drivers/base/power/domain_governor.c
+++ b/drivers/base/power/domain_governor.c
@@ -1,11 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* drivers/base/power/domain_governor.c - Governors for device PM domains.
*
* Copyright (C) 2011 Rafael J. Wysocki <rjw@sisk.pl>, Renesas Electronics Corp.
- *
- * This file is released under the GPLv2.
*/
-
#include <linux/kernel.h>
#include <linux/pm_domain.h>
#include <linux/pm_qos.h>
diff --git a/drivers/base/power/generic_ops.c b/drivers/base/power/generic_ops.c
index b2ed606265a8..4fa525668cb7 100644
--- a/drivers/base/power/generic_ops.c
+++ b/drivers/base/power/generic_ops.c
@@ -1,11 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* drivers/base/power/generic_ops.c - Generic PM callbacks for subsystems
*
* Copyright (c) 2010 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
- *
- * This file is released under the GPLv2.
*/
-
#include <linux/pm.h>
#include <linux/pm_runtime.h>
#include <linux/export.h>
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 10528a7747bf..dcfc0a36c8f7 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -1,12 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* drivers/base/power/main.c - Where the driver meets power management.
*
* Copyright (c) 2003 Patrick Mochel
* Copyright (c) 2003 Open Source Development Lab
*
- * This file is released under the GPLv2
- *
- *
* The driver model core calls device_pm_add() when a device is registered.
* This will initialize the embedded device_pm_info object in the device
* and add it to the list of power-controlled devices. sysfs entries for
diff --git a/drivers/base/power/qos.c b/drivers/base/power/qos.c
index f80e402ef778..6c91f8df1d59 100644
--- a/drivers/base/power/qos.c
+++ b/drivers/base/power/qos.c
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Devices PM QoS constraints management
*
* Copyright (C) 2011 Texas Instruments, Inc.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- *
* This module exposes the interface to kernel space for specifying
* per-device PM QoS dependencies. It provides infrastructure for registration
* of:
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index 977db40378b0..952a1e7057c7 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -1,12 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* drivers/base/power/runtime.c - Helper functions for device runtime PM
*
* Copyright (c) 2009 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
* Copyright (C) 2010 Alan Stern <stern@rowland.harvard.edu>
- *
- * This file is released under the GPLv2.
*/
-
#include <linux/sched/mm.h>
#include <linux/ktime.h>
#include <linux/hrtimer.h>
diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c
index 1226e441ddfe..1b9c281cbe41 100644
--- a/drivers/base/power/sysfs.c
+++ b/drivers/base/power/sysfs.c
@@ -1,7 +1,5 @@
-/*
- * drivers/base/power/sysfs.c - sysfs entries for device PM
- */
-
+// SPDX-License-Identifier: GPL-2.0
+/* sysfs entries for device PM */
#include <linux/device.h>
#include <linux/string.h>
#include <linux/export.h>
diff --git a/drivers/base/power/trace.c b/drivers/base/power/trace.c
index 2bd9d2c744ca..977d27bd1a22 100644
--- a/drivers/base/power/trace.c
+++ b/drivers/base/power/trace.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* drivers/base/power/trace.c
*
@@ -6,7 +7,6 @@
* Trace facility for suspend/resume problems, when none of the
* devices may be working.
*/
-
#define pr_fmt(fmt) "PM: " fmt
#include <linux/pm-trace.h>
diff --git a/drivers/base/power/wakeirq.c b/drivers/base/power/wakeirq.c
index b8fa5c0f2d13..5ce77d1ef9fc 100644
--- a/drivers/base/power/wakeirq.c
+++ b/drivers/base/power/wakeirq.c
@@ -1,16 +1,5 @@
-/*
- * wakeirq.c - Device wakeirq helper functions
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed "as is" WITHOUT ANY WARRANTY of any
- * kind, whether express or implied; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- */
-
+// SPDX-License-Identifier: GPL-2.0
+/* Device wakeirq helper functions */
#include <linux/device.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c
index 23c243a4c675..5b2b6a05a4f3 100644
--- a/drivers/base/power/wakeup.c
+++ b/drivers/base/power/wakeup.c
@@ -1,11 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* drivers/base/power/wakeup.c - System wakeup events framework
*
* Copyright (c) 2010 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
- *
- * This file is released under the GPLv2.
*/
-
#define pr_fmt(fmt) "PM: " fmt
#include <linux/device.h>
diff --git a/drivers/base/test/Makefile b/drivers/base/test/Makefile
index 90477c5fd9f9..0f1f7277a013 100644
--- a/drivers/base/test/Makefile
+++ b/drivers/base/test/Makefile
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_TEST_ASYNC_DRIVER_PROBE) += test_async_driver_probe.o
diff --git a/drivers/irqchip/irq-gic-v2m.c b/drivers/irqchip/irq-gic-v2m.c
index f5fe0100f9ff..de14e06fd9ec 100644
--- a/drivers/irqchip/irq-gic-v2m.c
+++ b/drivers/irqchip/irq-gic-v2m.c
@@ -446,7 +446,7 @@ static struct fwnode_handle *gicv2m_get_fwnode(struct device *dev)
}
static int __init
-acpi_parse_madt_msi(struct acpi_subtable_header *header,
+acpi_parse_madt_msi(union acpi_subtable_headers *header,
const unsigned long end)
{
int ret;
diff --git a/drivers/irqchip/irq-gic-v3-its-pci-msi.c b/drivers/irqchip/irq-gic-v3-its-pci-msi.c
index 8d6d009d1d58..c81d5b81da56 100644
--- a/drivers/irqchip/irq-gic-v3-its-pci-msi.c
+++ b/drivers/irqchip/irq-gic-v3-its-pci-msi.c
@@ -159,7 +159,7 @@ static int __init its_pci_of_msi_init(void)
#ifdef CONFIG_ACPI
static int __init
-its_pci_msi_parse_madt(struct acpi_subtable_header *header,
+its_pci_msi_parse_madt(union acpi_subtable_headers *header,
const unsigned long end)
{
struct acpi_madt_generic_translator *its_entry;
diff --git a/drivers/irqchip/irq-gic-v3-its-platform-msi.c b/drivers/irqchip/irq-gic-v3-its-platform-msi.c
index 7b8e87b493fe..9cdcda5bb3bd 100644
--- a/drivers/irqchip/irq-gic-v3-its-platform-msi.c
+++ b/drivers/irqchip/irq-gic-v3-its-platform-msi.c
@@ -117,7 +117,7 @@ static int __init its_pmsi_init_one(struct fwnode_handle *fwnode,
#ifdef CONFIG_ACPI
static int __init
-its_pmsi_parse_madt(struct acpi_subtable_header *header,
+its_pmsi_parse_madt(union acpi_subtable_headers *header,
const unsigned long end)
{
struct acpi_madt_generic_translator *its_entry;
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 7577755bdcf4..128ac893d7e4 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -3830,13 +3830,13 @@ static int __init acpi_get_its_numa_node(u32 its_id)
return NUMA_NO_NODE;
}
-static int __init gic_acpi_match_srat_its(struct acpi_subtable_header *header,
+static int __init gic_acpi_match_srat_its(union acpi_subtable_headers *header,
const unsigned long end)
{
return 0;
}
-static int __init gic_acpi_parse_srat_its(struct acpi_subtable_header *header,
+static int __init gic_acpi_parse_srat_its(union acpi_subtable_headers *header,
const unsigned long end)
{
int node;
@@ -3903,7 +3903,7 @@ static int __init acpi_get_its_numa_node(u32 its_id) { return NUMA_NO_NODE; }
static void __init acpi_its_srat_maps_free(void) { }
#endif
-static int __init gic_acpi_parse_madt_its(struct acpi_subtable_header *header,
+static int __init gic_acpi_parse_madt_its(union acpi_subtable_headers *header,
const unsigned long end)
{
struct acpi_madt_generic_translator *its_entry;
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index 15e55d327505..f44cd89cfc40 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -1593,7 +1593,7 @@ gic_acpi_register_redist(phys_addr_t phys_base, void __iomem *redist_base)
}
static int __init
-gic_acpi_parse_madt_redist(struct acpi_subtable_header *header,
+gic_acpi_parse_madt_redist(union acpi_subtable_headers *header,
const unsigned long end)
{
struct acpi_madt_generic_redistributor *redist =
@@ -1611,7 +1611,7 @@ gic_acpi_parse_madt_redist(struct acpi_subtable_header *header,
}
static int __init
-gic_acpi_parse_madt_gicc(struct acpi_subtable_header *header,
+gic_acpi_parse_madt_gicc(union acpi_subtable_headers *header,
const unsigned long end)
{
struct acpi_madt_generic_interrupt *gicc =
@@ -1653,14 +1653,14 @@ static int __init gic_acpi_collect_gicr_base(void)
return -ENODEV;
}
-static int __init gic_acpi_match_gicr(struct acpi_subtable_header *header,
+static int __init gic_acpi_match_gicr(union acpi_subtable_headers *header,
const unsigned long end)
{
/* Subtable presence means that redist exists, that's it */
return 0;
}
-static int __init gic_acpi_match_gicc(struct acpi_subtable_header *header,
+static int __init gic_acpi_match_gicc(union acpi_subtable_headers *header,
const unsigned long end)
{
struct acpi_madt_generic_interrupt *gicc =
@@ -1726,7 +1726,7 @@ static bool __init acpi_validate_gic_table(struct acpi_subtable_header *header,
return true;
}
-static int __init gic_acpi_parse_virt_madt_gicc(struct acpi_subtable_header *header,
+static int __init gic_acpi_parse_virt_madt_gicc(union acpi_subtable_headers *header,
const unsigned long end)
{
struct acpi_madt_generic_interrupt *gicc =
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index fd3110c171ba..c6dbe5018972 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -1495,7 +1495,7 @@ static struct
} acpi_data __initdata;
static int __init
-gic_acpi_parse_madt_cpu(struct acpi_subtable_header *header,
+gic_acpi_parse_madt_cpu(union acpi_subtable_headers *header,
const unsigned long end)
{
struct acpi_madt_generic_interrupt *processor;
@@ -1527,7 +1527,7 @@ gic_acpi_parse_madt_cpu(struct acpi_subtable_header *header,
}
/* The things you have to do to just *count* something... */
-static int __init acpi_dummy_func(struct acpi_subtable_header *header,
+static int __init acpi_dummy_func(union acpi_subtable_headers *header,
const unsigned long end)
{
return 0;
diff --git a/drivers/mailbox/pcc.c b/drivers/mailbox/pcc.c
index 256f18b67e8a..08a0a3517138 100644
--- a/drivers/mailbox/pcc.c
+++ b/drivers/mailbox/pcc.c
@@ -382,7 +382,7 @@ static const struct mbox_chan_ops pcc_chan_ops = {
*
* This gets called for each entry in the PCC table.
*/
-static int parse_pcc_subspace(struct acpi_subtable_header *header,
+static int parse_pcc_subspace(union acpi_subtable_headers *header,
const unsigned long end)
{
struct acpi_pcct_subspace *ss = (struct acpi_pcct_subspace *) header;
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 4fce1da7db23..ddd708b09fa1 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -394,12 +394,11 @@ DEFINE_DEBUGFS_ATTRIBUTE(fops_u8_wo, NULL, debugfs_u8_set, "%llu\n");
* This function will return a pointer to a dentry if it succeeds. This
* pointer must be passed to the debugfs_remove() function when the file is
* to be removed (no automatic cleanup happens if your module is unloaded,
- * you are responsible here.) If an error occurs, %NULL will be returned.
+ * you are responsible here.) If an error occurs, %ERR_PTR(-ERROR) will be
+ * returned.
*
- * If debugfs is not enabled in the kernel, the value -%ENODEV will be
- * returned. It is not wise to check for this value, but rather, check for
- * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling
- * code.
+ * If debugfs is not enabled in the kernel, the value %ERR_PTR(-ENODEV) will
+ * be returned.
*/
struct dentry *debugfs_create_u8(const char *name, umode_t mode,
struct dentry *parent, u8 *value)
@@ -440,12 +439,11 @@ DEFINE_DEBUGFS_ATTRIBUTE(fops_u16_wo, NULL, debugfs_u16_set, "%llu\n");
* This function will return a pointer to a dentry if it succeeds. This
* pointer must be passed to the debugfs_remove() function when the file is
* to be removed (no automatic cleanup happens if your module is unloaded,
- * you are responsible here.) If an error occurs, %NULL will be returned.
+ * you are responsible here.) If an error occurs, %ERR_PTR(-ERROR) will be
+ * returned.
*
- * If debugfs is not enabled in the kernel, the value -%ENODEV will be
- * returned. It is not wise to check for this value, but rather, check for
- * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling
- * code.
+ * If debugfs is not enabled in the kernel, the value %ERR_PTR(-ENODEV) will
+ * be returned.
*/
struct dentry *debugfs_create_u16(const char *name, umode_t mode,
struct dentry *parent, u16 *value)
@@ -486,12 +484,11 @@ DEFINE_DEBUGFS_ATTRIBUTE(fops_u32_wo, NULL, debugfs_u32_set, "%llu\n");
* This function will return a pointer to a dentry if it succeeds. This
* pointer must be passed to the debugfs_remove() function when the file is
* to be removed (no automatic cleanup happens if your module is unloaded,
- * you are responsible here.) If an error occurs, %NULL will be returned.
+ * you are responsible here.) If an error occurs, %ERR_PTR(-ERROR) will be
+ * returned.
*
- * If debugfs is not enabled in the kernel, the value -%ENODEV will be
- * returned. It is not wise to check for this value, but rather, check for
- * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling
- * code.
+ * If debugfs is not enabled in the kernel, the value %ERR_PTR(-ENODEV) will
+ * be returned.
*/
struct dentry *debugfs_create_u32(const char *name, umode_t mode,
struct dentry *parent, u32 *value)
@@ -533,12 +530,11 @@ DEFINE_DEBUGFS_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n");
* This function will return a pointer to a dentry if it succeeds. This
* pointer must be passed to the debugfs_remove() function when the file is
* to be removed (no automatic cleanup happens if your module is unloaded,
- * you are responsible here.) If an error occurs, %NULL will be returned.
+ * you are responsible here.) If an error occurs, %ERR_PTR(-ERROR) will be
+ * returned.
*
- * If debugfs is not enabled in the kernel, the value -%ENODEV will be
- * returned. It is not wise to check for this value, but rather, check for
- * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling
- * code.
+ * If debugfs is not enabled in the kernel, the value %ERR_PTR(-ENODEV) will
+ * be returned.
*/
struct dentry *debugfs_create_u64(const char *name, umode_t mode,
struct dentry *parent, u64 *value)
@@ -582,12 +578,11 @@ DEFINE_DEBUGFS_ATTRIBUTE(fops_ulong_wo, NULL, debugfs_ulong_set, "%llu\n");
* This function will return a pointer to a dentry if it succeeds. This
* pointer must be passed to the debugfs_remove() function when the file is
* to be removed (no automatic cleanup happens if your module is unloaded,
- * you are responsible here.) If an error occurs, %NULL will be returned.
+ * you are responsible here.) If an error occurs, %ERR_PTR(-ERROR) will be
+ * returned.
*
- * If debugfs is not enabled in the kernel, the value -%ENODEV will be
- * returned. It is not wise to check for this value, but rather, check for
- * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling
- * code.
+ * If debugfs is not enabled in the kernel, the value %ERR_PTR(-ENODEV) will
+ * be returned.
*/
struct dentry *debugfs_create_ulong(const char *name, umode_t mode,
struct dentry *parent, unsigned long *value)
@@ -850,12 +845,11 @@ static const struct file_operations fops_bool_wo = {
* This function will return a pointer to a dentry if it succeeds. This
* pointer must be passed to the debugfs_remove() function when the file is
* to be removed (no automatic cleanup happens if your module is unloaded,
- * you are responsible here.) If an error occurs, %NULL will be returned.
+ * you are responsible here.) If an error occurs, %ERR_PTR(-ERROR) will be
+ * returned.
*
- * If debugfs is not enabled in the kernel, the value -%ENODEV will be
- * returned. It is not wise to check for this value, but rather, check for
- * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling
- * code.
+ * If debugfs is not enabled in the kernel, the value %ERR_PTR(-ENODEV) will
+ * be returned.
*/
struct dentry *debugfs_create_bool(const char *name, umode_t mode,
struct dentry *parent, bool *value)
@@ -904,12 +898,11 @@ static const struct file_operations fops_blob = {
* This function will return a pointer to a dentry if it succeeds. This
* pointer must be passed to the debugfs_remove() function when the file is
* to be removed (no automatic cleanup happens if your module is unloaded,
- * you are responsible here.) If an error occurs, %NULL will be returned.
+ * you are responsible here.) If an error occurs, %ERR_PTR(-ERROR) will be
+ * returned.
*
- * If debugfs is not enabled in the kernel, the value -%ENODEV will be
- * returned. It is not wise to check for this value, but rather, check for
- * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling
- * code.
+ * If debugfs is not enabled in the kernel, the value %ERR_PTR(-ENODEV) will
+ * be returned.
*/
struct dentry *debugfs_create_blob(const char *name, umode_t mode,
struct dentry *parent,
@@ -1005,8 +998,9 @@ static const struct file_operations u32_array_fops = {
* Writing is not supported. Seek within the file is also not supported.
* Once array is created its size can not be changed.
*
- * The function returns a pointer to dentry on success. If debugfs is not
- * enabled in the kernel, the value -%ENODEV will be returned.
+ * The function returns a pointer to dentry on success. If an error occurs,
+ * %ERR_PTR(-ERROR) or NULL will be returned. If debugfs is not enabled in
+ * the kernel, the value %ERR_PTR(-ENODEV) will be returned.
*/
struct dentry *debugfs_create_u32_array(const char *name, umode_t mode,
struct dentry *parent,
@@ -1102,12 +1096,11 @@ static const struct file_operations fops_regset32 = {
* This function will return a pointer to a dentry if it succeeds. This
* pointer must be passed to the debugfs_remove() function when the file is
* to be removed (no automatic cleanup happens if your module is unloaded,
- * you are responsible here.) If an error occurs, %NULL will be returned.
+ * you are responsible here.) If an error occurs, %ERR_PTR(-ERROR) will be
+ * returned.
*
- * If debugfs is not enabled in the kernel, the value -%ENODEV will be
- * returned. It is not wise to check for this value, but rather, check for
- * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling
- * code.
+ * If debugfs is not enabled in the kernel, the value %ERR_PTR(-ENODEV) will
+ * be returned.
*/
struct dentry *debugfs_create_regset32(const char *name, umode_t mode,
struct dentry *parent,
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index b84d635567d3..1e7a74b8e064 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -650,11 +650,10 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
kn->id.generation = gen;
/*
- * set ino first. This barrier is paired with atomic_inc_not_zero in
+ * set ino first. This RELEASE is paired with atomic_inc_not_zero in
* kernfs_find_and_get_node_by_ino
*/
- smp_mb__before_atomic();
- atomic_set(&kn->count, 1);
+ atomic_set_release(&kn->count, 1);
atomic_set(&kn->active, KN_DEACTIVATED_BIAS);
RB_CLEAR_NODE(&kn->rb);
diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h
index d14037ddf108..22c039ebc6c5 100644
--- a/include/acpi/actbl1.h
+++ b/include/acpi/actbl1.h
@@ -1395,7 +1395,7 @@ struct acpi_table_hmat {
/* Values for HMAT structure types */
enum acpi_hmat_type {
- ACPI_HMAT_TYPE_ADDRESS_RANGE = 0, /* Memory subsystem address range */
+ ACPI_HMAT_TYPE_PROXIMITY = 0, /* Memory proximity domain attributes */
ACPI_HMAT_TYPE_LOCALITY = 1, /* System locality latency and bandwidth information */
ACPI_HMAT_TYPE_CACHE = 2, /* Memory side cache information */
ACPI_HMAT_TYPE_RESERVED = 3 /* 3 and greater are reserved */
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index ca55ae00f8c9..e22c237be46a 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -141,10 +141,14 @@ enum acpi_address_range_id {
/* Table Handlers */
+union acpi_subtable_headers {
+ struct acpi_subtable_header common;
+ struct acpi_hmat_structure hmat;
+};
typedef int (*acpi_tbl_table_handler)(struct acpi_table_header *table);
-typedef int (*acpi_tbl_entry_handler)(struct acpi_subtable_header *header,
+typedef int (*acpi_tbl_entry_handler)(union acpi_subtable_headers *header,
const unsigned long end);
/* Debugger support */
diff --git a/include/linux/device.h b/include/linux/device.h
index 4e6987e11f68..4457e560bc2b 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -976,18 +976,14 @@ struct dev_links_info {
* a higher-level representation of the device.
*/
struct device {
+ struct kobject kobj;
struct device *parent;
struct device_private *p;
- struct kobject kobj;
const char *init_name; /* initial name of the device */
const struct device_type *type;
- struct mutex mutex; /* mutex to synchronize calls to
- * its driver.
- */
-
struct bus_type *bus; /* type of bus device is on */
struct device_driver *driver; /* which driver has allocated this
device */
@@ -995,6 +991,10 @@ struct device {
core doesn't touch it */
void *driver_data; /* Driver data, set and get with
dev_set_drvdata/dev_get_drvdata */
+ struct mutex mutex; /* mutex to synchronize calls to
+ * its driver.
+ */
+
struct dev_links_info links;
struct dev_pm_info power;
struct dev_pm_domain *pm_domain;
@@ -1009,9 +1009,6 @@ struct device {
struct list_head msi_list;
#endif
-#ifdef CONFIG_NUMA
- int numa_node; /* NUMA node this device is close to */
-#endif
const struct dma_map_ops *dma_ops;
u64 *dma_mask; /* dma mask (if dma'able device) */
u64 coherent_dma_mask;/* Like dma_mask, but for
@@ -1040,6 +1037,9 @@ struct device {
struct device_node *of_node; /* associated device tree node */
struct fwnode_handle *fwnode; /* firmware device node */
+#ifdef CONFIG_NUMA
+ int numa_node; /* NUMA node this device is close to */
+#endif
dev_t devt; /* dev_t, creates the sysfs "dev" */
u32 id; /* device instance */
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index c8893f663470..e446ab97ee0c 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -64,7 +64,7 @@ enum kernfs_root_flag {
KERNFS_ROOT_CREATE_DEACTIVATED = 0x0001,
/*
- * For regular flies, if the opener has CAP_DAC_OVERRIDE, open(2)
+ * For regular files, if the opener has CAP_DAC_OVERRIDE, open(2)
* succeeds regardless of the RW permissions. sysfs had an extra
* layer of enforcement where open(2) fails with -EACCES regardless
* of CAP_DAC_OVERRIDE if the permission doesn't have the
diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index 1ab0d624fb36..e2ca0a292e21 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -139,7 +139,8 @@ static inline bool kobject_has_children(struct kobject *kobj)
struct kobj_type {
void (*release)(struct kobject *kobj);
const struct sysfs_ops *sysfs_ops;
- struct attribute **default_attrs;
+ struct attribute **default_attrs; /* use default_groups instead */
+ const struct attribute_group **default_groups;
const struct kobj_ns_type_operations *(*child_ns_type)(struct kobject *kobj);
const void *(*namespace)(struct kobject *kobj);
void (*get_ownership)(struct kobject *kobj, kuid_t *uid, kgid_t *gid);
diff --git a/include/linux/node.h b/include/linux/node.h
index 257bb3d6d014..1a557c589ecb 100644
--- a/include/linux/node.h
+++ b/include/linux/node.h
@@ -17,14 +17,81 @@
#include <linux/device.h>
#include <linux/cpumask.h>
+#include <linux/list.h>
#include <linux/workqueue.h>
+/**
+ * struct node_hmem_attrs - heterogeneous memory performance attributes
+ *
+ * @read_bandwidth: Read bandwidth in MB/s
+ * @write_bandwidth: Write bandwidth in MB/s
+ * @read_latency: Read latency in nanoseconds
+ * @write_latency: Write latency in nanoseconds
+ */
+struct node_hmem_attrs {
+ unsigned int read_bandwidth;
+ unsigned int write_bandwidth;
+ unsigned int read_latency;
+ unsigned int write_latency;
+};
+
+enum cache_indexing {
+ NODE_CACHE_DIRECT_MAP,
+ NODE_CACHE_INDEXED,
+ NODE_CACHE_OTHER,
+};
+
+enum cache_write_policy {
+ NODE_CACHE_WRITE_BACK,
+ NODE_CACHE_WRITE_THROUGH,
+ NODE_CACHE_WRITE_OTHER,
+};
+
+/**
+ * struct node_cache_attrs - system memory caching attributes
+ *
+ * @indexing: The ways memory blocks may be placed in cache
+ * @write_policy: Write back or write through policy
+ * @size: Total size of cache in bytes
+ * @line_size: Number of bytes fetched on a cache miss
+ * @level: The cache hierarchy level
+ */
+struct node_cache_attrs {
+ enum cache_indexing indexing;
+ enum cache_write_policy write_policy;
+ u64 size;
+ u16 line_size;
+ u8 level;
+};
+
+#ifdef CONFIG_HMEM_REPORTING
+void node_add_cache(unsigned int nid, struct node_cache_attrs *cache_attrs);
+void node_set_perf_attrs(unsigned int nid, struct node_hmem_attrs *hmem_attrs,
+ unsigned access);
+#else
+static inline void node_add_cache(unsigned int nid,
+ struct node_cache_attrs *cache_attrs)
+{
+}
+
+static inline void node_set_perf_attrs(unsigned int nid,
+ struct node_hmem_attrs *hmem_attrs,
+ unsigned access)
+{
+}
+#endif
+
struct node {
struct device dev;
+ struct list_head access_list;
#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HUGETLBFS)
struct work_struct node_work;
#endif
+#ifdef CONFIG_HMEM_REPORTING
+ struct list_head cache_attrs;
+ struct device *cache_dev;
+#endif
};
struct memory_block;
@@ -75,6 +142,10 @@ extern int register_mem_sect_under_node(struct memory_block *mem_blk,
extern int unregister_mem_sect_under_nodes(struct memory_block *mem_blk,
unsigned long phys_index);
+extern int register_memory_node_under_compute_node(unsigned int mem_nid,
+ unsigned int cpu_nid,
+ unsigned access);
+
#ifdef CONFIG_HUGETLBFS
extern void register_hugetlbfs_with_node(node_registration_func_t doregister,
node_registration_func_t unregister);
diff --git a/init/Kconfig b/init/Kconfig
index be8f97e37a76..82b84e5ee30d 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -562,7 +562,6 @@ config BUILD_BIN2C
config IKCONFIG
tristate "Kernel .config support"
- select BUILD_BIN2C
---help---
This option enables the complete Linux kernel ".config" file
contents to be saved in the kernel. It provides documentation
@@ -580,6 +579,16 @@ config IKCONFIG_PROC
This option enables access to the kernel configuration file
through /proc/config.gz.
+config IKHEADERS_PROC
+ tristate "Enable kernel header artifacts through /proc/kheaders.tar.xz"
+ depends on PROC_FS
+ help
+ This option enables access to the kernel header and other artifacts that
+ are generated during the build process. These can be used to build eBPF
+ tracing programs, or similar programs. If you build the headers as a
+ module, a module called kheaders.ko is built which can be loaded on-demand
+ to get access to the headers.
+
config LOG_BUF_SHIFT
int "Kernel log buffer size (16 => 64KB, 17 => 128KB)"
range 12 25
diff --git a/kernel/.gitignore b/kernel/.gitignore
index 6e699100872f..34d1e77ee9df 100644
--- a/kernel/.gitignore
+++ b/kernel/.gitignore
@@ -1,5 +1,6 @@
#
# Generated files
#
+kheaders.md5
timeconst.h
hz.bc
diff --git a/kernel/Makefile b/kernel/Makefile
index 62471e75a2b0..298437bb2c6a 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -71,6 +71,7 @@ obj-$(CONFIG_UTS_NS) += utsname.o
obj-$(CONFIG_USER_NS) += user_namespace.o
obj-$(CONFIG_PID_NS) += pid_namespace.o
obj-$(CONFIG_IKCONFIG) += configs.o
+obj-$(CONFIG_IKHEADERS_PROC) += kheaders.o
obj-$(CONFIG_SMP) += stop_machine.o
obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o
obj-$(CONFIG_AUDIT) += audit.o auditfilter.o
@@ -122,3 +123,12 @@ $(obj)/configs.o: $(obj)/config_data.gz
targets += config_data.gz
$(obj)/config_data.gz: $(KCONFIG_CONFIG) FORCE
$(call if_changed,gzip)
+
+$(obj)/kheaders.o: $(obj)/kheaders_data.tar.xz
+
+quiet_cmd_genikh = CHK $(obj)/kheaders_data.tar.xz
+cmd_genikh = $(srctree)/kernel/gen_ikh_data.sh $@
+$(obj)/kheaders_data.tar.xz: FORCE
+ $(call cmd,genikh)
+
+clean-files := kheaders_data.tar.xz kheaders.md5
diff --git a/kernel/gen_ikh_data.sh b/kernel/gen_ikh_data.sh
new file mode 100755
index 000000000000..591a94f7b387
--- /dev/null
+++ b/kernel/gen_ikh_data.sh
@@ -0,0 +1,89 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This script generates an archive consisting of kernel headers
+# for CONFIG_IKHEADERS_PROC.
+set -e
+spath="$(dirname "$(readlink -f "$0")")"
+kroot="$spath/.."
+outdir="$(pwd)"
+tarfile=$1
+cpio_dir=$outdir/$tarfile.tmp
+
+# Script filename relative to the kernel source root
+# We add it to the archive because it is small and any changes
+# to this script will also cause a rebuild of the archive.
+sfile="$(realpath --relative-to $kroot "$(readlink -f "$0")")"
+
+src_file_list="
+include/
+arch/$SRCARCH/include/
+$sfile
+"
+
+obj_file_list="
+include/
+arch/$SRCARCH/include/
+"
+
+# Support incremental builds by skipping archive generation
+# if timestamps of files being archived are not changed.
+
+# This block is useful for debugging the incremental builds.
+# Uncomment it for debugging.
+# iter=1
+# if [ ! -f /tmp/iter ]; then echo 1 > /tmp/iter;
+# else; iter=$(($(cat /tmp/iter) + 1)); fi
+# find $src_file_list -type f | xargs ls -lR > /tmp/src-ls-$iter
+# find $obj_file_list -type f | xargs ls -lR > /tmp/obj-ls-$iter
+
+# include/generated/compile.h is ignored because it is touched even when none
+# of the source files changed. This causes pointless regeneration, so let us
+# ignore them for md5 calculation.
+pushd $kroot > /dev/null
+src_files_md5="$(find $src_file_list -type f |
+ grep -v "include/generated/compile.h" |
+ xargs ls -lR | md5sum | cut -d ' ' -f1)"
+popd > /dev/null
+obj_files_md5="$(find $obj_file_list -type f |
+ grep -v "include/generated/compile.h" |
+ xargs ls -lR | md5sum | cut -d ' ' -f1)"
+
+if [ -f $tarfile ]; then tarfile_md5="$(md5sum $tarfile | cut -d ' ' -f1)"; fi
+if [ -f kernel/kheaders.md5 ] &&
+ [ "$(cat kernel/kheaders.md5|head -1)" == "$src_files_md5" ] &&
+ [ "$(cat kernel/kheaders.md5|head -2|tail -1)" == "$obj_files_md5" ] &&
+ [ "$(cat kernel/kheaders.md5|tail -1)" == "$tarfile_md5" ]; then
+ exit
+fi
+
+if [ "${quiet}" != "silent_" ]; then
+ echo " GEN $tarfile"
+fi
+
+rm -rf $cpio_dir
+mkdir $cpio_dir
+
+pushd $kroot > /dev/null
+for f in $src_file_list;
+ do find "$f" ! -name "*.cmd" ! -name ".*";
+done | cpio --quiet -pd $cpio_dir
+popd > /dev/null
+
+# The second CPIO can complain if files already exist which can
+# happen with out of tree builds. Just silence CPIO for now.
+for f in $obj_file_list;
+ do find "$f" ! -name "*.cmd" ! -name ".*";
+done | cpio --quiet -pd $cpio_dir >/dev/null 2>&1
+
+# Remove comments except SDPX lines
+find $cpio_dir -type f -print0 |
+ xargs -0 -P8 -n1 perl -pi -e 'BEGIN {undef $/;}; s/\/\*((?!SPDX).)*?\*\///smg;'
+
+tar -Jcf $tarfile -C $cpio_dir/ . > /dev/null
+
+echo "$src_files_md5" > kernel/kheaders.md5
+echo "$obj_files_md5" >> kernel/kheaders.md5
+echo "$(md5sum $tarfile | cut -d ' ' -f1)" >> kernel/kheaders.md5
+
+rm -rf $cpio_dir
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 9f8a709337cf..c52b737ab8e3 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -275,11 +275,12 @@ static struct attribute *irq_attrs[] = {
&actions_attr.attr,
NULL
};
+ATTRIBUTE_GROUPS(irq);
static struct kobj_type irq_kobj_type = {
.release = irq_kobj_release,
.sysfs_ops = &kobj_sysfs_ops,
- .default_attrs = irq_attrs,
+ .default_groups = irq_groups,
};
static void irq_sysfs_add(int irq, struct irq_desc *desc)
diff --git a/kernel/kheaders.c b/kernel/kheaders.c
new file mode 100644
index 000000000000..70ae6052920d
--- /dev/null
+++ b/kernel/kheaders.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Provide kernel headers useful to build tracing programs
+ * such as for running eBPF tracing tools.
+ *
+ * (Borrowed code from kernel/configs.c)
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <linux/init.h>
+#include <linux/uaccess.h>
+
+/*
+ * Define kernel_headers_data and kernel_headers_data_end, within which the
+ * compressed kernel headers are stored. The file is first compressed with xz.
+ */
+
+asm (
+" .pushsection .rodata, \"a\" \n"
+" .global kernel_headers_data \n"
+"kernel_headers_data: \n"
+" .incbin \"kernel/kheaders_data.tar.xz\" \n"
+" .global kernel_headers_data_end \n"
+"kernel_headers_data_end: \n"
+" .popsection \n"
+);
+
+extern char kernel_headers_data;
+extern char kernel_headers_data_end;
+
+static ssize_t
+ikheaders_read_current(struct file *file, char __user *buf,
+ size_t len, loff_t *offset)
+{
+ return simple_read_from_buffer(buf, len, offset,
+ &kernel_headers_data,
+ &kernel_headers_data_end -
+ &kernel_headers_data);
+}
+
+static const struct file_operations ikheaders_file_ops = {
+ .read = ikheaders_read_current,
+ .llseek = default_llseek,
+};
+
+static int __init ikheaders_init(void)
+{
+ struct proc_dir_entry *entry;
+
+ /* create the current headers file */
+ entry = proc_create("kheaders.tar.xz", S_IRUGO, NULL,
+ &ikheaders_file_ops);
+ if (!entry)
+ return -ENOMEM;
+
+ proc_set_size(entry,
+ &kernel_headers_data_end -
+ &kernel_headers_data);
+ return 0;
+}
+
+static void __exit ikheaders_cleanup(void)
+{
+ remove_proc_entry("kheaders.tar.xz", NULL);
+}
+
+module_init(ikheaders_init);
+module_exit(ikheaders_cleanup);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Joel Fernandes");
+MODULE_DESCRIPTION("Echo the kernel header artifacts used to build the kernel");
diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
index f12c0eabd843..f6fbaff10e71 100644
--- a/kernel/livepatch/core.c
+++ b/kernel/livepatch/core.c
@@ -419,6 +419,7 @@ static struct attribute *klp_patch_attrs[] = {
&force_kobj_attr.attr,
NULL
};
+ATTRIBUTE_GROUPS(klp_patch);
static void klp_free_object_dynamic(struct klp_object *obj)
{
@@ -549,7 +550,7 @@ static void klp_kobj_release_patch(struct kobject *kobj)
static struct kobj_type klp_ktype_patch = {
.release = klp_kobj_release_patch,
.sysfs_ops = &kobj_sysfs_ops,
- .default_attrs = klp_patch_attrs,
+ .default_groups = klp_patch_groups,
};
static void klp_kobj_release_object(struct kobject *kobj)
diff --git a/kernel/padata.c b/kernel/padata.c
index 3e2633ae3bca..2d2fddbb7a4c 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -957,6 +957,7 @@ static struct attribute *padata_default_attrs[] = {
&parallel_cpumask_attr.attr,
NULL,
};
+ATTRIBUTE_GROUPS(padata_default);
static ssize_t padata_sysfs_show(struct kobject *kobj,
struct attribute *attr, char *buf)
@@ -995,7 +996,7 @@ static const struct sysfs_ops padata_sysfs_ops = {
static struct kobj_type padata_attr_type = {
.sysfs_ops = &padata_sysfs_ops,
- .default_attrs = padata_default_attrs,
+ .default_groups = padata_default_groups,
.release = padata_sysfs_release,
};
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 5403479073b0..962cf343f798 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -600,13 +600,14 @@ rate_limit_us_store(struct gov_attr_set *attr_set, const char *buf, size_t count
static struct governor_attr rate_limit_us = __ATTR_RW(rate_limit_us);
-static struct attribute *sugov_attributes[] = {
+static struct attribute *sugov_attrs[] = {
&rate_limit_us.attr,
NULL
};
+ATTRIBUTE_GROUPS(sugov);
static struct kobj_type sugov_tunables_ktype = {
- .default_attrs = sugov_attributes,
+ .default_groups = sugov_groups,
.sysfs_ops = &governor_sysfs_ops,
};
diff --git a/lib/kobject.c b/lib/kobject.c
index aa89edcd2b63..f2ccdbac8ed9 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -18,7 +18,7 @@
#include <linux/random.h>
/**
- * kobject_namespace - return @kobj's namespace tag
+ * kobject_namespace() - Return @kobj's namespace tag.
* @kobj: kobject in question
*
* Returns namespace tag of @kobj if its parent has namespace ops enabled
@@ -36,7 +36,7 @@ const void *kobject_namespace(struct kobject *kobj)
}
/**
- * kobject_get_ownership - get sysfs ownership data for @kobj
+ * kobject_get_ownership() - Get sysfs ownership data for @kobj.
* @kobj: kobject in question
* @uid: kernel user ID for sysfs objects
* @gid: kernel group ID for sysfs objects
@@ -82,6 +82,7 @@ static int populate_dir(struct kobject *kobj)
static int create_dir(struct kobject *kobj)
{
+ const struct kobj_type *ktype = get_ktype(kobj);
const struct kobj_ns_type_operations *ops;
int error;
@@ -95,6 +96,14 @@ static int create_dir(struct kobject *kobj)
return error;
}
+ if (ktype) {
+ error = sysfs_create_groups(kobj, ktype->default_groups);
+ if (error) {
+ sysfs_remove_dir(kobj);
+ return error;
+ }
+ }
+
/*
* @kobj->sd may be deleted by an ancestor going away. Hold an
* extra reference so that it stays until @kobj is gone.
@@ -153,12 +162,11 @@ static void fill_kobj_path(struct kobject *kobj, char *path, int length)
}
/**
- * kobject_get_path - generate and return the path associated with a given kobj and kset pair.
- *
+ * kobject_get_path() - Allocate memory and fill in the path for @kobj.
* @kobj: kobject in question, with which to build the path
* @gfp_mask: the allocation type used to allocate the path
*
- * The result must be freed by the caller with kfree().
+ * Return: The newly allocated memory, caller must free with kfree().
*/
char *kobject_get_path(struct kobject *kobj, gfp_t gfp_mask)
{
@@ -265,7 +273,7 @@ static int kobject_add_internal(struct kobject *kobj)
}
/**
- * kobject_set_name_vargs - Set the name of an kobject
+ * kobject_set_name_vargs() - Set the name of a kobject.
* @kobj: struct kobject to set the name of
* @fmt: format string used to build the name
* @vargs: vargs to format the string.
@@ -305,7 +313,7 @@ int kobject_set_name_vargs(struct kobject *kobj, const char *fmt,
}
/**
- * kobject_set_name - Set the name of a kobject
+ * kobject_set_name() - Set the name of a kobject.
* @kobj: struct kobject to set the name of
* @fmt: format string used to build the name
*
@@ -327,7 +335,7 @@ int kobject_set_name(struct kobject *kobj, const char *fmt, ...)
EXPORT_SYMBOL(kobject_set_name);
/**
- * kobject_init - initialize a kobject structure
+ * kobject_init() - Initialize a kobject structure.
* @kobj: pointer to the kobject to initialize
* @ktype: pointer to the ktype for this kobject.
*
@@ -383,7 +391,7 @@ static __printf(3, 0) int kobject_add_varg(struct kobject *kobj,
}
/**
- * kobject_add - the main kobject add function
+ * kobject_add() - The main kobject add function.
* @kobj: the kobject to add
* @parent: pointer to the parent of the kobject.
* @fmt: format to name the kobject with.
@@ -397,15 +405,23 @@ static __printf(3, 0) int kobject_add_varg(struct kobject *kobj,
* is assigned to the kobject, then the kobject will be located in the
* root of the sysfs tree.
*
- * If this function returns an error, kobject_put() must be called to
- * properly clean up the memory associated with the object.
- * Under no instance should the kobject that is passed to this function
- * be directly freed with a call to kfree(), that can leak memory.
- *
* Note, no "add" uevent will be created with this call, the caller should set
* up all of the necessary sysfs files for the object and then call
* kobject_uevent() with the UEVENT_ADD parameter to ensure that
* userspace is properly notified of this kobject's creation.
+ *
+ * Return: If this function returns an error, kobject_put() must be
+ * called to properly clean up the memory associated with the
+ * object. Under no instance should the kobject that is passed
+ * to this function be directly freed with a call to kfree(),
+ * that can leak memory.
+ *
+ * If this function returns success, kobject_put() must also be called
+ * in order to properly clean up the memory associated with the object.
+ *
+ * In short, once this function is called, kobject_put() MUST be called
+ * when the use of the object is finished in order to properly free
+ * everything.
*/
int kobject_add(struct kobject *kobj, struct kobject *parent,
const char *fmt, ...)
@@ -431,15 +447,19 @@ int kobject_add(struct kobject *kobj, struct kobject *parent,
EXPORT_SYMBOL(kobject_add);
/**
- * kobject_init_and_add - initialize a kobject structure and add it to the kobject hierarchy
+ * kobject_init_and_add() - Initialize a kobject structure and add it to
+ * the kobject hierarchy.
* @kobj: pointer to the kobject to initialize
* @ktype: pointer to the ktype for this kobject.
* @parent: pointer to the parent of this kobject.
* @fmt: the name of the kobject.
*
- * This function combines the call to kobject_init() and
- * kobject_add(). The same type of error handling after a call to
- * kobject_add() and kobject lifetime rules are the same here.
+ * This function combines the call to kobject_init() and kobject_add().
+ *
+ * If this function returns an error, kobject_put() must be called to
+ * properly clean up the memory associated with the object. This is the
+ * same type of error handling after a call to kobject_add() and kobject
+ * lifetime rules are the same here.
*/
int kobject_init_and_add(struct kobject *kobj, struct kobj_type *ktype,
struct kobject *parent, const char *fmt, ...)
@@ -458,7 +478,7 @@ int kobject_init_and_add(struct kobject *kobj, struct kobj_type *ktype,
EXPORT_SYMBOL_GPL(kobject_init_and_add);
/**
- * kobject_rename - change the name of an object
+ * kobject_rename() - Change the name of an object.
* @kobj: object in question.
* @new_name: object's new name
*
@@ -525,7 +545,7 @@ out:
EXPORT_SYMBOL_GPL(kobject_rename);
/**
- * kobject_move - move object to another parent
+ * kobject_move() - Move object to another parent.
* @kobj: object in question.
* @new_parent: object's new parent (can be NULL)
*/
@@ -578,17 +598,26 @@ out:
EXPORT_SYMBOL_GPL(kobject_move);
/**
- * kobject_del - unlink kobject from hierarchy.
+ * kobject_del() - Unlink kobject from hierarchy.
* @kobj: object.
+ *
+ * This is the function that should be called to delete an object
+ * successfully added via kobject_add().
*/
void kobject_del(struct kobject *kobj)
{
struct kernfs_node *sd;
+ const struct kobj_type *ktype;
if (!kobj)
return;
sd = kobj->sd;
+ ktype = get_ktype(kobj);
+
+ if (ktype)
+ sysfs_remove_groups(kobj, ktype->default_groups);
+
sysfs_remove_dir(kobj);
sysfs_put(sd);
@@ -600,7 +629,7 @@ void kobject_del(struct kobject *kobj)
EXPORT_SYMBOL(kobject_del);
/**
- * kobject_get - increment refcount for object.
+ * kobject_get() - Increment refcount for object.
* @kobj: object.
*/
struct kobject *kobject_get(struct kobject *kobj)
@@ -693,7 +722,7 @@ static void kobject_release(struct kref *kref)
}
/**
- * kobject_put - decrement refcount for object.
+ * kobject_put() - Decrement refcount for object.
* @kobj: object.
*
* Decrement the refcount, and if 0, call kobject_cleanup().
@@ -722,7 +751,7 @@ static struct kobj_type dynamic_kobj_ktype = {
};
/**
- * kobject_create - create a struct kobject dynamically
+ * kobject_create() - Create a struct kobject dynamically.
*
* This function creates a kobject structure dynamically and sets it up
* to be a "dynamic" kobject with a default release function set up.
@@ -745,8 +774,8 @@ struct kobject *kobject_create(void)
}
/**
- * kobject_create_and_add - create a struct kobject dynamically and register it with sysfs
- *
+ * kobject_create_and_add() - Create a struct kobject dynamically and
+ * register it with sysfs.
* @name: the name for the kobject
* @parent: the parent kobject of this kobject, if any.
*
@@ -777,7 +806,7 @@ struct kobject *kobject_create_and_add(const char *name, struct kobject *parent)
EXPORT_SYMBOL_GPL(kobject_create_and_add);
/**
- * kset_init - initialize a kset for use
+ * kset_init() - Initialize a kset for use.
* @k: kset
*/
void kset_init(struct kset *k)
@@ -819,7 +848,7 @@ const struct sysfs_ops kobj_sysfs_ops = {
EXPORT_SYMBOL_GPL(kobj_sysfs_ops);
/**
- * kset_register - initialize and add a kset.
+ * kset_register() - Initialize and add a kset.
* @k: kset.
*/
int kset_register(struct kset *k)
@@ -839,7 +868,7 @@ int kset_register(struct kset *k)
EXPORT_SYMBOL(kset_register);
/**
- * kset_unregister - remove a kset.
+ * kset_unregister() - Remove a kset.
* @k: kset.
*/
void kset_unregister(struct kset *k)
@@ -852,7 +881,7 @@ void kset_unregister(struct kset *k)
EXPORT_SYMBOL(kset_unregister);
/**
- * kset_find_obj - search for object in kset.
+ * kset_find_obj() - Search for object in kset.
* @kset: kset we're looking in.
* @name: object's name.
*
@@ -900,7 +929,7 @@ static struct kobj_type kset_ktype = {
};
/**
- * kset_create - create a struct kset dynamically
+ * kset_create() - Create a struct kset dynamically.
*
* @name: the name for the kset
* @uevent_ops: a struct kset_uevent_ops for the kset
@@ -944,7 +973,7 @@ static struct kset *kset_create(const char *name,
}
/**
- * kset_create_and_add - create a struct kset dynamically and add it to sysfs
+ * kset_create_and_add() - Create a struct kset dynamically and add it to sysfs.
*
* @name: the name for the kset
* @uevent_ops: a struct kset_uevent_ops for the kset
diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
index f05802687ba4..7998affa45d4 100644
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -466,6 +466,13 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action,
int i = 0;
int retval = 0;
+ /*
+ * Mark "remove" event done regardless of result, for some subsystems
+ * do not want to re-trigger "remove" event via automatic cleanup.
+ */
+ if (action == KOBJ_REMOVE)
+ kobj->state_remove_uevent_sent = 1;
+
pr_debug("kobject: '%s' (%p): %s\n",
kobject_name(kobj), kobj, __func__);
@@ -567,10 +574,6 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action,
kobj->state_add_uevent_sent = 1;
break;
- case KOBJ_REMOVE:
- kobj->state_remove_uevent_sent = 1;
- break;
-
case KOBJ_UNBIND:
zap_modalias_env(env);
break;
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 8f8b7b6c2945..530e5b04b97d 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -863,6 +863,7 @@ static struct attribute *rx_queue_default_attrs[] __ro_after_init = {
#endif
NULL
};
+ATTRIBUTE_GROUPS(rx_queue_default);
static void rx_queue_release(struct kobject *kobj)
{
@@ -911,7 +912,7 @@ static void rx_queue_get_ownership(struct kobject *kobj,
static struct kobj_type rx_queue_ktype __ro_after_init = {
.sysfs_ops = &rx_queue_sysfs_ops,
.release = rx_queue_release,
- .default_attrs = rx_queue_default_attrs,
+ .default_groups = rx_queue_default_groups,
.namespace = rx_queue_namespace,
.get_ownership = rx_queue_get_ownership,
};
@@ -1416,6 +1417,7 @@ static struct attribute *netdev_queue_default_attrs[] __ro_after_init = {
#endif
NULL
};
+ATTRIBUTE_GROUPS(netdev_queue_default);
static void netdev_queue_release(struct kobject *kobj)
{
@@ -1448,7 +1450,7 @@ static void netdev_queue_get_ownership(struct kobject *kobj,
static struct kobj_type netdev_queue_ktype __ro_after_init = {
.sysfs_ops = &netdev_queue_sysfs_ops,
.release = netdev_queue_release,
- .default_attrs = netdev_queue_default_attrs,
+ .default_groups = netdev_queue_default_groups,
.namespace = netdev_queue_namespace,
.get_ownership = netdev_queue_get_ownership,
};
diff --git a/samples/kobject/kset-example.c b/samples/kobject/kset-example.c
index 401328fd687d..c8010f126808 100644
--- a/samples/kobject/kset-example.c
+++ b/samples/kobject/kset-example.c
@@ -178,6 +178,7 @@ static struct attribute *foo_default_attrs[] = {
&bar_attribute.attr,
NULL, /* need to NULL terminate the list of attributes */
};
+ATTRIBUTE_GROUPS(foo_default);
/*
* Our own ktype for our kobjects. Here we specify our sysfs ops, the
@@ -187,7 +188,7 @@ static struct attribute *foo_default_attrs[] = {
static struct kobj_type foo_ktype = {
.sysfs_ops = &foo_sysfs_ops,
.release = foo_release,
- .default_attrs = foo_default_attrs,
+ .default_groups = foo_default_groups,
};
static struct kset *example_kset;