aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/irq
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/irq')
-rw-r--r--kernel/irq/Kconfig4
-rw-r--r--kernel/irq/affinity.c121
-rw-r--r--kernel/irq/autoprobe.c6
-rw-r--r--kernel/irq/chip.c115
-rw-r--r--kernel/irq/cpuhotplug.c2
-rw-r--r--kernel/irq/debugfs.c10
-rw-r--r--kernel/irq/devres.c5
-rw-r--r--kernel/irq/handle.c4
-rw-r--r--kernel/irq/internals.h15
-rw-r--r--kernel/irq/irq_sim.c12
-rw-r--r--kernel/irq/irqdesc.c46
-rw-r--r--kernel/irq/irqdomain.c63
-rw-r--r--kernel/irq/manage.c501
-rw-r--r--kernel/irq/spurious.c4
-rw-r--r--kernel/irq/timings.c522
15 files changed, 1161 insertions, 269 deletions
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
index 5f3e2baefca9..f92d9a687372 100644
--- a/kernel/irq/Kconfig
+++ b/kernel/irq/Kconfig
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
menu "IRQ subsystem"
# Options selectable by the architecture code
@@ -91,6 +92,9 @@ config GENERIC_MSI_IRQ_DOMAIN
select IRQ_DOMAIN_HIERARCHY
select GENERIC_MSI_IRQ
+config IRQ_MSI_IOMMU
+ bool
+
config HANDLE_DOMAIN_IRQ
bool
diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c
index 45b68b4ea48b..f18cd5aa33e8 100644
--- a/kernel/irq/affinity.c
+++ b/kernel/irq/affinity.c
@@ -9,7 +9,7 @@
#include <linux/cpu.h>
static void irq_spread_init_one(struct cpumask *irqmsk, struct cpumask *nmsk,
- int cpus_per_vec)
+ unsigned int cpus_per_vec)
{
const struct cpumask *siblmsk;
int cpu, sibl;
@@ -95,15 +95,17 @@ static int get_nodes_in_cpumask(cpumask_var_t *node_to_cpumask,
}
static int __irq_build_affinity_masks(const struct irq_affinity *affd,
- int startvec, int numvecs, int firstvec,
+ unsigned int startvec,
+ unsigned int numvecs,
+ unsigned int firstvec,
cpumask_var_t *node_to_cpumask,
const struct cpumask *cpu_mask,
struct cpumask *nmsk,
struct irq_affinity_desc *masks)
{
- int n, nodes, cpus_per_vec, extra_vecs, done = 0;
- int last_affv = firstvec + numvecs;
- int curvec = startvec;
+ unsigned int n, nodes, cpus_per_vec, extra_vecs, done = 0;
+ unsigned int last_affv = firstvec + numvecs;
+ unsigned int curvec = startvec;
nodemask_t nodemsk = NODE_MASK_NONE;
if (!cpumask_weight(cpu_mask))
@@ -117,18 +119,16 @@ static int __irq_build_affinity_masks(const struct irq_affinity *affd,
*/
if (numvecs <= nodes) {
for_each_node_mask(n, nodemsk) {
- cpumask_or(&masks[curvec].mask,
- &masks[curvec].mask,
- node_to_cpumask[n]);
+ cpumask_or(&masks[curvec].mask, &masks[curvec].mask,
+ node_to_cpumask[n]);
if (++curvec == last_affv)
curvec = firstvec;
}
- done = numvecs;
- goto out;
+ return numvecs;
}
for_each_node_mask(n, nodemsk) {
- int ncpus, v, vecs_to_assign, vecs_per_node;
+ unsigned int ncpus, v, vecs_to_assign, vecs_per_node;
/* Spread the vectors per node */
vecs_per_node = (numvecs - (curvec - firstvec)) / nodes;
@@ -163,8 +163,6 @@ static int __irq_build_affinity_masks(const struct irq_affinity *affd,
curvec = firstvec;
--nodes;
}
-
-out:
return done;
}
@@ -174,19 +172,24 @@ out:
* 2) spread other possible CPUs on these vectors
*/
static int irq_build_affinity_masks(const struct irq_affinity *affd,
- int startvec, int numvecs, int firstvec,
- cpumask_var_t *node_to_cpumask,
+ unsigned int startvec, unsigned int numvecs,
+ unsigned int firstvec,
struct irq_affinity_desc *masks)
{
- int curvec = startvec, nr_present, nr_others;
- int ret = -ENOMEM;
+ unsigned int curvec = startvec, nr_present, nr_others;
+ cpumask_var_t *node_to_cpumask;
cpumask_var_t nmsk, npresmsk;
+ int ret = -ENOMEM;
if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL))
return ret;
if (!zalloc_cpumask_var(&npresmsk, GFP_KERNEL))
- goto fail;
+ goto fail_nmsk;
+
+ node_to_cpumask = alloc_node_to_cpumask();
+ if (!node_to_cpumask)
+ goto fail_npresmsk;
ret = 0;
/* Stabilize the cpumasks */
@@ -217,13 +220,22 @@ static int irq_build_affinity_masks(const struct irq_affinity *affd,
if (nr_present < numvecs)
WARN_ON(nr_present + nr_others < numvecs);
+ free_node_to_cpumask(node_to_cpumask);
+
+ fail_npresmsk:
free_cpumask_var(npresmsk);
- fail:
+ fail_nmsk:
free_cpumask_var(nmsk);
return ret;
}
+static void default_calc_sets(struct irq_affinity *affd, unsigned int affvecs)
+{
+ affd->nr_sets = 1;
+ affd->set_size[0] = affvecs;
+}
+
/**
* irq_create_affinity_masks - Create affinity masks for multiqueue spreading
* @nvecs: The total number of vectors
@@ -232,50 +244,62 @@ static int irq_build_affinity_masks(const struct irq_affinity *affd,
* Returns the irq_affinity_desc pointer or NULL if allocation failed.
*/
struct irq_affinity_desc *
-irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd)
+irq_create_affinity_masks(unsigned int nvecs, struct irq_affinity *affd)
{
- int affvecs = nvecs - affd->pre_vectors - affd->post_vectors;
- int curvec, usedvecs;
- cpumask_var_t *node_to_cpumask;
+ unsigned int affvecs, curvec, usedvecs, i;
struct irq_affinity_desc *masks = NULL;
- int i, nr_sets;
/*
- * If there aren't any vectors left after applying the pre/post
- * vectors don't bother with assigning affinity.
+ * Determine the number of vectors which need interrupt affinities
+ * assigned. If the pre/post request exhausts the available vectors
+ * then nothing to do here except for invoking the calc_sets()
+ * callback so the device driver can adjust to the situation. If there
+ * is only a single vector, then managing the queue is pointless as
+ * well.
*/
- if (nvecs == affd->pre_vectors + affd->post_vectors)
+ if (nvecs > 1 && nvecs > affd->pre_vectors + affd->post_vectors)
+ affvecs = nvecs - affd->pre_vectors - affd->post_vectors;
+ else
+ affvecs = 0;
+
+ /*
+ * Simple invocations do not provide a calc_sets() callback. Install
+ * the generic one.
+ */
+ if (!affd->calc_sets)
+ affd->calc_sets = default_calc_sets;
+
+ /* Recalculate the sets */
+ affd->calc_sets(affd, affvecs);
+
+ if (WARN_ON_ONCE(affd->nr_sets > IRQ_AFFINITY_MAX_SETS))
return NULL;
- node_to_cpumask = alloc_node_to_cpumask();
- if (!node_to_cpumask)
+ /* Nothing to assign? */
+ if (!affvecs)
return NULL;
masks = kcalloc(nvecs, sizeof(*masks), GFP_KERNEL);
if (!masks)
- goto outnodemsk;
+ return NULL;
/* Fill out vectors at the beginning that don't need affinity */
for (curvec = 0; curvec < affd->pre_vectors; curvec++)
cpumask_copy(&masks[curvec].mask, irq_default_affinity);
+
/*
* Spread on present CPUs starting from affd->pre_vectors. If we
* have multiple sets, build each sets affinity mask separately.
*/
- nr_sets = affd->nr_sets;
- if (!nr_sets)
- nr_sets = 1;
-
- for (i = 0, usedvecs = 0; i < nr_sets; i++) {
- int this_vecs = affd->sets ? affd->sets[i] : affvecs;
+ for (i = 0, usedvecs = 0; i < affd->nr_sets; i++) {
+ unsigned int this_vecs = affd->set_size[i];
int ret;
ret = irq_build_affinity_masks(affd, curvec, this_vecs,
- curvec, node_to_cpumask, masks);
+ curvec, masks);
if (ret) {
kfree(masks);
- masks = NULL;
- goto outnodemsk;
+ return NULL;
}
curvec += this_vecs;
usedvecs += this_vecs;
@@ -293,8 +317,6 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd)
for (i = affd->pre_vectors; i < nvecs - affd->post_vectors; i++)
masks[i].is_managed = 1;
-outnodemsk:
- free_node_to_cpumask(node_to_cpumask);
return masks;
}
@@ -304,25 +326,22 @@ outnodemsk:
* @maxvec: The maximum number of vectors available
* @affd: Description of the affinity requirements
*/
-int irq_calc_affinity_vectors(int minvec, int maxvec, const struct irq_affinity *affd)
+unsigned int irq_calc_affinity_vectors(unsigned int minvec, unsigned int maxvec,
+ const struct irq_affinity *affd)
{
- int resv = affd->pre_vectors + affd->post_vectors;
- int vecs = maxvec - resv;
- int set_vecs;
+ unsigned int resv = affd->pre_vectors + affd->post_vectors;
+ unsigned int set_vecs;
if (resv > minvec)
return 0;
- if (affd->nr_sets) {
- int i;
-
- for (i = 0, set_vecs = 0; i < affd->nr_sets; i++)
- set_vecs += affd->sets[i];
+ if (affd->calc_sets) {
+ set_vecs = maxvec - resv;
} else {
get_online_cpus();
set_vecs = cpumask_weight(cpu_possible_mask);
put_online_cpus();
}
- return resv + min(set_vecs, vecs);
+ return resv + min(set_vecs, maxvec - resv);
}
diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c
index 16cbf6beb276..ae60cae24e9a 100644
--- a/kernel/irq/autoprobe.c
+++ b/kernel/irq/autoprobe.c
@@ -90,7 +90,7 @@ unsigned long probe_irq_on(void)
/* It triggered already - consider it spurious. */
if (!(desc->istate & IRQS_WAITING)) {
desc->istate &= ~IRQS_AUTODETECT;
- irq_shutdown(desc);
+ irq_shutdown_and_deactivate(desc);
} else
if (i < 32)
mask |= 1 << i;
@@ -127,7 +127,7 @@ unsigned int probe_irq_mask(unsigned long val)
mask |= 1 << i;
desc->istate &= ~IRQS_AUTODETECT;
- irq_shutdown(desc);
+ irq_shutdown_and_deactivate(desc);
}
raw_spin_unlock_irq(&desc->lock);
}
@@ -169,7 +169,7 @@ int probe_irq_off(unsigned long val)
nr_of_irqs++;
}
desc->istate &= ~IRQS_AUTODETECT;
- irq_shutdown(desc);
+ irq_shutdown_and_deactivate(desc);
}
raw_spin_unlock_irq(&desc->lock);
}
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 34e969069488..3ff4a1260885 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -314,6 +314,12 @@ void irq_shutdown(struct irq_desc *desc)
}
irq_state_clr_started(desc);
}
+}
+
+
+void irq_shutdown_and_deactivate(struct irq_desc *desc)
+{
+ irq_shutdown(desc);
/*
* This must be called even if the interrupt was never started up,
* because the activation can happen before the interrupt is
@@ -730,6 +736,37 @@ out:
EXPORT_SYMBOL_GPL(handle_fasteoi_irq);
/**
+ * handle_fasteoi_nmi - irq handler for NMI interrupt lines
+ * @desc: the interrupt description structure for this irq
+ *
+ * A simple NMI-safe handler, considering the restrictions
+ * from request_nmi.
+ *
+ * Only a single callback will be issued to the chip: an ->eoi()
+ * call when the interrupt has been serviced. This enables support
+ * for modern forms of interrupt handlers, which handle the flow
+ * details in hardware, transparently.
+ */
+void handle_fasteoi_nmi(struct irq_desc *desc)
+{
+ struct irq_chip *chip = irq_desc_get_chip(desc);
+ struct irqaction *action = desc->action;
+ unsigned int irq = irq_desc_get_irq(desc);
+ irqreturn_t res;
+
+ trace_irq_handler_entry(irq, action);
+ /*
+ * NMIs cannot be shared, there is only one action.
+ */
+ res = action->handler(irq, action->dev_id);
+ trace_irq_handler_exit(irq, action, res);
+
+ if (chip->irq_eoi)
+ chip->irq_eoi(&desc->irq_data);
+}
+EXPORT_SYMBOL_GPL(handle_fasteoi_nmi);
+
+/**
* handle_edge_irq - edge type IRQ handler
* @desc: the interrupt description structure for this irq
*
@@ -855,7 +892,11 @@ void handle_percpu_irq(struct irq_desc *desc)
{
struct irq_chip *chip = irq_desc_get_chip(desc);
- kstat_incr_irqs_this_cpu(desc);
+ /*
+ * PER CPU interrupts are not serialized. Do not touch
+ * desc->tot_count.
+ */
+ __kstat_incr_irqs_this_cpu(desc);
if (chip->irq_ack)
chip->irq_ack(&desc->irq_data);
@@ -884,7 +925,11 @@ void handle_percpu_devid_irq(struct irq_desc *desc)
unsigned int irq = irq_desc_get_irq(desc);
irqreturn_t res;
- kstat_incr_irqs_this_cpu(desc);
+ /*
+ * PER CPU interrupts are not serialized. Do not touch
+ * desc->tot_count.
+ */
+ __kstat_incr_irqs_this_cpu(desc);
if (chip->irq_ack)
chip->irq_ack(&desc->irq_data);
@@ -908,6 +953,29 @@ void handle_percpu_devid_irq(struct irq_desc *desc)
chip->irq_eoi(&desc->irq_data);
}
+/**
+ * handle_percpu_devid_fasteoi_nmi - Per CPU local NMI handler with per cpu
+ * dev ids
+ * @desc: the interrupt description structure for this irq
+ *
+ * Similar to handle_fasteoi_nmi, but handling the dev_id cookie
+ * as a percpu pointer.
+ */
+void handle_percpu_devid_fasteoi_nmi(struct irq_desc *desc)
+{
+ struct irq_chip *chip = irq_desc_get_chip(desc);
+ struct irqaction *action = desc->action;
+ unsigned int irq = irq_desc_get_irq(desc);
+ irqreturn_t res;
+
+ trace_irq_handler_entry(irq, action);
+ res = action->handler(irq, raw_cpu_ptr(action->percpu_dev_id));
+ trace_irq_handler_exit(irq, action, res);
+
+ if (chip->irq_eoi)
+ chip->irq_eoi(&desc->irq_data);
+}
+
static void
__irq_do_set_handler(struct irq_desc *desc, irq_flow_handler_t handle,
int is_chained, const char *name)
@@ -1278,6 +1346,17 @@ void irq_chip_mask_parent(struct irq_data *data)
EXPORT_SYMBOL_GPL(irq_chip_mask_parent);
/**
+ * irq_chip_mask_ack_parent - Mask and acknowledge the parent interrupt
+ * @data: Pointer to interrupt specific data
+ */
+void irq_chip_mask_ack_parent(struct irq_data *data)
+{
+ data = data->parent_data;
+ data->chip->irq_mask_ack(data);
+}
+EXPORT_SYMBOL_GPL(irq_chip_mask_ack_parent);
+
+/**
* irq_chip_unmask_parent - Unmask the parent interrupt
* @data: Pointer to interrupt specific data
*/
@@ -1376,11 +1455,43 @@ int irq_chip_set_vcpu_affinity_parent(struct irq_data *data, void *vcpu_info)
int irq_chip_set_wake_parent(struct irq_data *data, unsigned int on)
{
data = data->parent_data;
+
+ if (data->chip->flags & IRQCHIP_SKIP_SET_WAKE)
+ return 0;
+
if (data->chip->irq_set_wake)
return data->chip->irq_set_wake(data, on);
return -ENOSYS;
}
+EXPORT_SYMBOL_GPL(irq_chip_set_wake_parent);
+
+/**
+ * irq_chip_request_resources_parent - Request resources on the parent interrupt
+ * @data: Pointer to interrupt specific data
+ */
+int irq_chip_request_resources_parent(struct irq_data *data)
+{
+ data = data->parent_data;
+
+ if (data->chip->irq_request_resources)
+ return data->chip->irq_request_resources(data);
+
+ return -ENOSYS;
+}
+EXPORT_SYMBOL_GPL(irq_chip_request_resources_parent);
+
+/**
+ * irq_chip_release_resources_parent - Release resources on the parent interrupt
+ * @data: Pointer to interrupt specific data
+ */
+void irq_chip_release_resources_parent(struct irq_data *data)
+{
+ data = data->parent_data;
+ if (data->chip->irq_release_resources)
+ data->chip->irq_release_resources(data);
+}
+EXPORT_SYMBOL_GPL(irq_chip_release_resources_parent);
#endif
/**
diff --git a/kernel/irq/cpuhotplug.c b/kernel/irq/cpuhotplug.c
index 5b1072e394b2..6c7ca2e983a5 100644
--- a/kernel/irq/cpuhotplug.c
+++ b/kernel/irq/cpuhotplug.c
@@ -116,7 +116,7 @@ static bool migrate_one_irq(struct irq_desc *desc)
*/
if (irqd_affinity_is_managed(d)) {
irqd_set_managed_shutdown(d);
- irq_shutdown(desc);
+ irq_shutdown_and_deactivate(desc);
return false;
}
affinity = cpu_online_mask;
diff --git a/kernel/irq/debugfs.c b/kernel/irq/debugfs.c
index 6f636136cccc..c1eccd4f6520 100644
--- a/kernel/irq/debugfs.c
+++ b/kernel/irq/debugfs.c
@@ -56,6 +56,7 @@ static const struct irq_bit_descr irqchip_flags[] = {
BIT_MASK_DESCR(IRQCHIP_ONESHOT_SAFE),
BIT_MASK_DESCR(IRQCHIP_EOI_THREADED),
BIT_MASK_DESCR(IRQCHIP_SUPPORTS_LEVEL_MSI),
+ BIT_MASK_DESCR(IRQCHIP_SUPPORTS_NMI),
};
static void
@@ -140,6 +141,7 @@ static const struct irq_bit_descr irqdesc_istates[] = {
BIT_MASK_DESCR(IRQS_WAITING),
BIT_MASK_DESCR(IRQS_PENDING),
BIT_MASK_DESCR(IRQS_SUSPENDED),
+ BIT_MASK_DESCR(IRQS_NMI),
};
@@ -150,7 +152,7 @@ static int irq_debug_show(struct seq_file *m, void *p)
raw_spin_lock_irq(&desc->lock);
data = irq_desc_get_irq_data(desc);
- seq_printf(m, "handler: %pf\n", desc->handle_irq);
+ seq_printf(m, "handler: %ps\n", desc->handle_irq);
seq_printf(m, "device: %s\n", desc->dev_name);
seq_printf(m, "status: 0x%08x\n", desc->status_use_accessors);
irq_debug_show_bits(m, 0, desc->status_use_accessors, irqdesc_states,
@@ -203,8 +205,8 @@ static ssize_t irq_debug_write(struct file *file, const char __user *user_buf,
chip_bus_lock(desc);
raw_spin_lock_irqsave(&desc->lock, flags);
- if (irq_settings_is_level(desc)) {
- /* Can't do level, sorry */
+ if (irq_settings_is_level(desc) || desc->istate & IRQS_NMI) {
+ /* Can't do level nor NMIs, sorry */
err = -EINVAL;
} else {
desc->istate |= IRQS_PENDING;
@@ -256,8 +258,6 @@ static int __init irq_debugfs_init(void)
int irq;
root_dir = debugfs_create_dir("irq", NULL);
- if (!root_dir)
- return -ENOMEM;
irq_domain_debugfs_init(root_dir);
diff --git a/kernel/irq/devres.c b/kernel/irq/devres.c
index 5d5378ea0afe..f6e5515ee077 100644
--- a/kernel/irq/devres.c
+++ b/kernel/irq/devres.c
@@ -84,8 +84,6 @@ EXPORT_SYMBOL(devm_request_threaded_irq);
* @dev: device to request interrupt for
* @irq: Interrupt line to allocate
* @handler: Function to be called when the IRQ occurs
- * @thread_fn: function to be called in a threaded interrupt context. NULL
- * for devices which handle everything in @handler
* @irqflags: Interrupt type flags
* @devname: An ascii name for the claiming device, dev_name(dev) if NULL
* @dev_id: A cookie passed back to the handler function
@@ -222,9 +220,8 @@ devm_irq_alloc_generic_chip(struct device *dev, const char *name, int num_ct,
irq_flow_handler_t handler)
{
struct irq_chip_generic *gc;
- unsigned long sz = sizeof(*gc) + num_ct * sizeof(struct irq_chip_type);
- gc = devm_kzalloc(dev, sz, GFP_KERNEL);
+ gc = devm_kzalloc(dev, struct_size(gc, chip_types, num_ct), GFP_KERNEL);
if (gc)
irq_init_generic_chip(gc, name, num_ct,
irq_base, reg_base, handler);
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 38554bc35375..a4ace611f47f 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -149,7 +149,7 @@ irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc, unsigned int *flags
res = action->handler(irq, action->dev_id);
trace_irq_handler_exit(irq, action, res);
- if (WARN_ONCE(!irqs_disabled(),"irq %u handler %pF enabled interrupts\n",
+ if (WARN_ONCE(!irqs_disabled(),"irq %u handler %pS enabled interrupts\n",
irq, action->handler))
local_irq_disable();
@@ -166,7 +166,7 @@ irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc, unsigned int *flags
__irq_wake_thread(desc, action);
- /* Fall through to add to randomness */
+ /* Fall through - to add to randomness */
case IRQ_HANDLED:
*flags |= action->flags;
break;
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index ca6afa267070..3a948f41ab00 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -49,6 +49,7 @@ enum {
* IRQS_WAITING - irq is waiting
* IRQS_PENDING - irq is pending and replayed later
* IRQS_SUSPENDED - irq is suspended
+ * IRQS_NMI - irq line is used to deliver NMIs
*/
enum {
IRQS_AUTODETECT = 0x00000001,
@@ -60,6 +61,7 @@ enum {
IRQS_PENDING = 0x00000200,
IRQS_SUSPENDED = 0x00000800,
IRQS_TIMINGS = 0x00001000,
+ IRQS_NMI = 0x00002000,
};
#include "debug.h"
@@ -80,6 +82,7 @@ extern int irq_activate_and_startup(struct irq_desc *desc, bool resend);
extern int irq_startup(struct irq_desc *desc, bool resend, bool force);
extern void irq_shutdown(struct irq_desc *desc);
+extern void irq_shutdown_and_deactivate(struct irq_desc *desc);
extern void irq_enable(struct irq_desc *desc);
extern void irq_disable(struct irq_desc *desc);
extern void irq_percpu_enable(struct irq_desc *desc, unsigned int cpu);
@@ -94,6 +97,10 @@ static inline void irq_mark_irq(unsigned int irq) { }
extern void irq_mark_irq(unsigned int irq);
#endif
+extern int __irq_get_irqchip_state(struct irq_data *data,
+ enum irqchip_irq_state which,
+ bool *state);
+
extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr);
irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc, unsigned int *flags);
@@ -242,12 +249,18 @@ static inline void irq_state_set_masked(struct irq_desc *desc)
#undef __irqd_to_state
-static inline void kstat_incr_irqs_this_cpu(struct irq_desc *desc)
+static inline void __kstat_incr_irqs_this_cpu(struct irq_desc *desc)
{
__this_cpu_inc(*desc->kstat_irqs);
__this_cpu_inc(kstat.irqs_sum);
}
+static inline void kstat_incr_irqs_this_cpu(struct irq_desc *desc)
+{
+ __kstat_incr_irqs_this_cpu(desc);
+ desc->tot_count++;
+}
+
static inline int irq_desc_get_node(struct irq_desc *desc)
{
return irq_common_data_get_node(&desc->irq_common_data);
diff --git a/kernel/irq/irq_sim.c b/kernel/irq/irq_sim.c
index 98a20e1594ce..b992f88c5613 100644
--- a/kernel/irq/irq_sim.c
+++ b/kernel/irq/irq_sim.c
@@ -25,10 +25,22 @@ static void irq_sim_irqunmask(struct irq_data *data)
irq_ctx->enabled = true;
}
+static int irq_sim_set_type(struct irq_data *data, unsigned int type)
+{
+ /* We only support rising and falling edge trigger types. */
+ if (type & ~IRQ_TYPE_EDGE_BOTH)
+ return -EINVAL;
+
+ irqd_set_trigger_type(data, type);
+
+ return 0;
+}
+
static struct irq_chip irq_sim_irqchip = {
.name = "irq_sim",
.irq_mask = irq_sim_irqmask,
.irq_unmask = irq_sim_irqunmask,
+ .irq_set_type = irq_sim_set_type,
};
static void irq_sim_handle_irq(struct irq_work *work)
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index ef8ad36cadcf..c52b737ab8e3 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -119,6 +119,7 @@ static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node,
desc->depth = 1;
desc->irq_count = 0;
desc->irqs_unhandled = 0;
+ desc->tot_count = 0;
desc->name = NULL;
desc->owner = owner;
for_each_possible_cpu(cpu)
@@ -274,11 +275,12 @@ static struct attribute *irq_attrs[] = {
&actions_attr.attr,
NULL
};
+ATTRIBUTE_GROUPS(irq);
static struct kobj_type irq_kobj_type = {
.release = irq_kobj_release,
.sysfs_ops = &kobj_sysfs_ops,
- .default_attrs = irq_attrs,
+ .default_groups = irq_groups,
};
static void irq_sysfs_add(int irq, struct irq_desc *desc)
@@ -557,6 +559,7 @@ int __init early_irq_init(void)
alloc_masks(&desc[i], node);
raw_spin_lock_init(&desc[i].lock);
lockdep_set_class(&desc[i].lock, &irq_desc_lock_class);
+ mutex_init(&desc[i].request_mutex);
desc_set_defaults(i, &desc[i], node, NULL, NULL);
}
return arch_early_irq_init();
@@ -669,6 +672,41 @@ int __handle_domain_irq(struct irq_domain *domain, unsigned int hwirq,
set_irq_regs(old_regs);
return ret;
}
+
+#ifdef CONFIG_IRQ_DOMAIN
+/**
+ * handle_domain_nmi - Invoke the handler for a HW irq belonging to a domain
+ * @domain: The domain where to perform the lookup
+ * @hwirq: The HW irq number to convert to a logical one
+ * @regs: Register file coming from the low-level handling code
+ *
+ * Returns: 0 on success, or -EINVAL if conversion has failed
+ */
+int handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq,
+ struct pt_regs *regs)
+{
+ struct pt_regs *old_regs = set_irq_regs(regs);
+ unsigned int irq;
+ int ret = 0;
+
+ nmi_enter();
+
+ irq = irq_find_mapping(domain, hwirq);
+
+ /*
+ * ack_bad_irq is not NMI-safe, just report
+ * an invalid interrupt.
+ */
+ if (likely(irq))
+ generic_handle_irq(irq);
+ else
+ ret = -EINVAL;
+
+ nmi_exit();
+ set_irq_regs(old_regs);
+ return ret;
+}
+#endif
#endif
/* Dynamic interrupt handling */
@@ -919,11 +957,15 @@ unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
unsigned int kstat_irqs(unsigned int irq)
{
struct irq_desc *desc = irq_to_desc(irq);
- int cpu;
unsigned int sum = 0;
+ int cpu;
if (!desc || !desc->kstat_irqs)
return 0;
+ if (!irq_settings_is_per_cpu_devid(desc) &&
+ !irq_settings_is_per_cpu(desc))
+ return desc->tot_count;
+
for_each_possible_cpu(cpu)
sum += *per_cpu_ptr(desc->kstat_irqs, cpu);
return sum;
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 8b0be4bd6565..a453e229f99c 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -458,6 +458,20 @@ void irq_set_default_host(struct irq_domain *domain)
}
EXPORT_SYMBOL_GPL(irq_set_default_host);
+/**
+ * irq_get_default_host() - Retrieve the "default" irq domain
+ *
+ * Returns: the default domain, if any.
+ *
+ * Modern code should never use this. This should only be used on
+ * systems that cannot implement a firmware->fwnode mapping (which
+ * both DT and ACPI provide).
+ */
+struct irq_domain *irq_get_default_host(void)
+{
+ return irq_default_domain;
+}
+
static void irq_domain_clear_mapping(struct irq_domain *domain,
irq_hw_number_t hwirq)
{
@@ -729,16 +743,17 @@ static int irq_domain_translate(struct irq_domain *d,
return 0;
}
-static void of_phandle_args_to_fwspec(struct of_phandle_args *irq_data,
+static void of_phandle_args_to_fwspec(struct device_node *np, const u32 *args,
+ unsigned int count,
struct irq_fwspec *fwspec)
{
int i;
- fwspec->fwnode = irq_data->np ? &irq_data->np->fwnode : NULL;
- fwspec->param_count = irq_data->args_count;
+ fwspec->fwnode = np ? &np->fwnode : NULL;
+ fwspec->param_count = count;
- for (i = 0; i < irq_data->args_count; i++)
- fwspec->param[i] = irq_data->args[i];
+ for (i = 0; i < count; i++)
+ fwspec->param[i] = args[i];
}
unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
@@ -836,7 +851,9 @@ unsigned int irq_create_of_mapping(struct of_phandle_args *irq_data)
{
struct irq_fwspec fwspec;
- of_phandle_args_to_fwspec(irq_data, &fwspec);
+ of_phandle_args_to_fwspec(irq_data->np, irq_data->args,
+ irq_data->args_count, &fwspec);
+
return irq_create_fwspec_mapping(&fwspec);
}
EXPORT_SYMBOL_GPL(irq_create_of_mapping);
@@ -928,11 +945,10 @@ int irq_domain_xlate_twocell(struct irq_domain *d, struct device_node *ctrlr,
const u32 *intspec, unsigned int intsize,
irq_hw_number_t *out_hwirq, unsigned int *out_type)
{
- if (WARN_ON(intsize < 2))
- return -EINVAL;
- *out_hwirq = intspec[0];
- *out_type = intspec[1] & IRQ_TYPE_SENSE_MASK;
- return 0;
+ struct irq_fwspec fwspec;
+
+ of_phandle_args_to_fwspec(ctrlr, intspec, intsize, &fwspec);
+ return irq_domain_translate_twocell(d, &fwspec, out_hwirq, out_type);
}
EXPORT_SYMBOL_GPL(irq_domain_xlate_twocell);
@@ -968,6 +984,27 @@ const struct irq_domain_ops irq_domain_simple_ops = {
};
EXPORT_SYMBOL_GPL(irq_domain_simple_ops);
+/**
+ * irq_domain_translate_twocell() - Generic translate for direct two cell
+ * bindings
+ *
+ * Device Tree IRQ specifier translation function which works with two cell
+ * bindings where the cell values map directly to the hwirq number
+ * and linux irq flags.
+ */
+int irq_domain_translate_twocell(struct irq_domain *d,
+ struct irq_fwspec *fwspec,
+ unsigned long *out_hwirq,
+ unsigned int *out_type)
+{
+ if (WARN_ON(fwspec->param_count < 2))
+ return -EINVAL;
+ *out_hwirq = fwspec->param[0];
+ *out_type = fwspec->param[1] & IRQ_TYPE_SENSE_MASK;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(irq_domain_translate_twocell);
+
int irq_domain_alloc_descs(int virq, unsigned int cnt, irq_hw_number_t hwirq,
int node, const struct irq_affinity_desc *affinity)
{
@@ -1260,7 +1297,7 @@ int irq_domain_alloc_irqs_hierarchy(struct irq_domain *domain,
/**
* __irq_domain_alloc_irqs - Allocate IRQs from domain
* @domain: domain to allocate from
- * @irq_base: allocate specified IRQ nubmer if irq_base >= 0
+ * @irq_base: allocate specified IRQ number if irq_base >= 0
* @nr_irqs: number of IRQs to allocate
* @node: NUMA node id for memory allocation
* @arg: domain specific argument
@@ -1749,8 +1786,6 @@ void __init irq_domain_debugfs_init(struct dentry *root)
struct irq_domain *d;
domain_dir = debugfs_create_dir("domains", root);
- if (!domain_dir)
- return;
debugfs_create_file("default", 0444, domain_dir, NULL,
&irq_domain_debug_fops);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 84b54a17b95d..e8f7f179bf77 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -13,6 +13,7 @@
#include <linux/module.h>
#include <linux/random.h>
#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/sched/rt.h>
@@ -34,8 +35,9 @@ static int __init setup_forced_irqthreads(char *arg)
early_param("threadirqs", setup_forced_irqthreads);
#endif
-static void __synchronize_hardirq(struct irq_desc *desc)
+static void __synchronize_hardirq(struct irq_desc *desc, bool sync_chip)
{
+ struct irq_data *irqd = irq_desc_get_irq_data(desc);
bool inprogress;
do {
@@ -51,6 +53,20 @@ static void __synchronize_hardirq(struct irq_desc *desc)
/* Ok, that indicated we're done: double-check carefully. */
raw_spin_lock_irqsave(&desc->lock, flags);
inprogress = irqd_irq_inprogress(&desc->irq_data);
+
+ /*
+ * If requested and supported, check at the chip whether it
+ * is in flight at the hardware level, i.e. already pending
+ * in a CPU and waiting for service and acknowledge.
+ */
+ if (!inprogress && sync_chip) {
+ /*
+ * Ignore the return code. inprogress is only updated
+ * when the chip supports it.
+ */
+ __irq_get_irqchip_state(irqd, IRQCHIP_STATE_ACTIVE,
+ &inprogress);
+ }
raw_spin_unlock_irqrestore(&desc->lock, flags);
/* Oops, that failed? */
@@ -73,13 +89,18 @@ static void __synchronize_hardirq(struct irq_desc *desc)
* Returns: false if a threaded handler is active.
*
* This function may be called - with care - from IRQ context.
+ *
+ * It does not check whether there is an interrupt in flight at the
+ * hardware level, but not serviced yet, as this might deadlock when
+ * called with interrupts disabled and the target CPU of the interrupt
+ * is the current CPU.
*/
bool synchronize_hardirq(unsigned int irq)
{
struct irq_desc *desc = irq_to_desc(irq);
if (desc) {
- __synchronize_hardirq(desc);
+ __synchronize_hardirq(desc, false);
return !atomic_read(&desc->threads_active);
}
@@ -95,14 +116,19 @@ EXPORT_SYMBOL(synchronize_hardirq);
* to complete before returning. If you use this function while
* holding a resource the IRQ handler may need you will deadlock.
*
- * This function may be called - with care - from IRQ context.
+ * Can only be called from preemptible code as it might sleep when
+ * an interrupt thread is associated to @irq.
+ *
+ * It optionally makes sure (when the irq chip supports that method)
+ * that the interrupt is not pending in any CPU and waiting for
+ * service.
*/
void synchronize_irq(unsigned int irq)
{
struct irq_desc *desc = irq_to_desc(irq);
if (desc) {
- __synchronize_hardirq(desc);
+ __synchronize_hardirq(desc, true);
/*
* We made sure that no hardirq handler is
* running. Now verify that no threaded handlers are
@@ -196,6 +222,7 @@ int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask,
case IRQ_SET_MASK_OK:
case IRQ_SET_MASK_OK_DONE:
cpumask_copy(desc->irq_common_data.affinity, mask);
+ /* fall through */
case IRQ_SET_MASK_OK_NOCOPY:
irq_validate_effective_affinity(data);
irq_set_thread_affinity(desc);
@@ -341,7 +368,7 @@ irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify)
/* The release function is promised process context */
might_sleep();
- if (!desc)
+ if (!desc || desc->istate & IRQS_NMI)
return -EINVAL;
/* Complete initialisation of *notify */
@@ -356,8 +383,10 @@ irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify)
desc->affinity_notify = notify;
raw_spin_unlock_irqrestore(&desc->lock, flags);
- if (old_notify)
+ if (old_notify) {
+ cancel_work_sync(&old_notify->work);
kref_put(&old_notify->kref, old_notify->release);
+ }
return 0;
}
@@ -553,6 +582,21 @@ bool disable_hardirq(unsigned int irq)
}
EXPORT_SYMBOL_GPL(disable_hardirq);
+/**
+ * disable_nmi_nosync - disable an nmi without waiting
+ * @irq: Interrupt to disable
+ *
+ * Disable the selected interrupt line. Disables and enables are
+ * nested.
+ * The interrupt to disable must have been requested through request_nmi.
+ * Unlike disable_nmi(), this function does not ensure existing
+ * instances of the IRQ handler have completed before returning.
+ */
+void disable_nmi_nosync(unsigned int irq)
+{
+ disable_irq_nosync(irq);
+}
+
void __enable_irq(struct irq_desc *desc)
{
switch (desc->depth) {
@@ -609,6 +653,20 @@ out:
}
EXPORT_SYMBOL(enable_irq);
+/**
+ * enable_nmi - enable handling of an nmi
+ * @irq: Interrupt to enable
+ *
+ * The interrupt to enable must have been requested through request_nmi.
+ * Undoes the effect of one call to disable_nmi(). If this
+ * matches the last disable, processing of interrupts on this
+ * IRQ line is re-enabled.
+ */
+void enable_nmi(unsigned int irq)
+{
+ enable_irq(irq);
+}
+
static int set_irq_wake_real(unsigned int irq, unsigned int on)
{
struct irq_desc *desc = irq_to_desc(irq);
@@ -644,6 +702,12 @@ int irq_set_irq_wake(unsigned int irq, unsigned int on)
if (!desc)
return -EINVAL;
+ /* Don't use NMIs as wake up interrupts please */
+ if (desc->istate & IRQS_NMI) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
/* wakeup-capable irqs can be shared between drivers that
* don't need to have the same sleep mode behaviors.
*/
@@ -666,6 +730,8 @@ int irq_set_irq_wake(unsigned int irq, unsigned int on)
irqd_clear(&desc->irq_data, IRQD_WAKEUP_STATE);
}
}
+
+out_unlock:
irq_put_desc_busunlock(desc, flags);
return ret;
}
@@ -726,6 +792,7 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned long flags)
case IRQ_SET_MASK_OK_DONE:
irqd_clear(&desc->irq_data, IRQD_TRIGGER_MASK);
irqd_set(&desc->irq_data, flags);
+ /* fall through */
case IRQ_SET_MASK_OK_NOCOPY:
flags = irqd_get_trigger_type(&desc->irq_data);
@@ -740,7 +807,7 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned long flags)
ret = 0;
break;
default:
- pr_err("Setting trigger mode %lu for irq %u failed (%pF)\n",
+ pr_err("Setting trigger mode %lu for irq %u failed (%pS)\n",
flags, irq_desc_get_irq(desc), chip->irq_set_type);
}
if (unmask)
@@ -1128,6 +1195,39 @@ static void irq_release_resources(struct irq_desc *desc)
c->irq_release_resources(d);
}
+static bool irq_supports_nmi(struct irq_desc *desc)
+{
+ struct irq_data *d = irq_desc_get_irq_data(desc);
+
+#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
+ /* Only IRQs directly managed by the root irqchip can be set as NMI */
+ if (d->parent_data)
+ return false;
+#endif
+ /* Don't support NMIs for chips behind a slow bus */
+ if (d->chip->irq_bus_lock || d->chip->irq_bus_sync_unlock)
+ return false;
+
+ return d->chip->flags & IRQCHIP_SUPPORTS_NMI;
+}
+
+static int irq_nmi_setup(struct irq_desc *desc)
+{
+ struct irq_data *d = irq_desc_get_irq_data(desc);
+ struct irq_chip *c = d->chip;
+
+ return c->irq_nmi_setup ? c->irq_nmi_setup(d) : -EINVAL;
+}
+
+static void irq_nmi_teardown(struct irq_desc *desc)
+{
+ struct irq_data *d = irq_desc_get_irq_data(desc);
+ struct irq_chip *c = d->chip;
+
+ if (c->irq_nmi_teardown)
+ c->irq_nmi_teardown(d);
+}
+
static int
setup_irq_thread(struct irqaction *new, unsigned int irq, bool secondary)
{
@@ -1302,9 +1402,17 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
* fields must have IRQF_SHARED set and the bits which
* set the trigger type must match. Also all must
* agree on ONESHOT.
+ * Interrupt lines used for NMIs cannot be shared.
*/
unsigned int oldtype;
+ if (desc->istate & IRQS_NMI) {
+ pr_err("Invalid attempt to share NMI for %s (irq %d) on irqchip %s.\n",
+ new->name, irq, desc->irq_data.chip->name);
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
/*
* If nobody did set the configuration before, inherit
* the one provided by the requester.
@@ -1617,6 +1725,7 @@ static struct irqaction *__free_irq(struct irq_desc *desc, void *dev_id)
/* If this was the last handler, shut down the IRQ line: */
if (!desc->action) {
irq_settings_clr_disable_unlazy(desc);
+ /* Only shutdown. Deactivate after synchronize_hardirq() */
irq_shutdown(desc);
}
@@ -1645,8 +1754,12 @@ static struct irqaction *__free_irq(struct irq_desc *desc, void *dev_id)
unregister_handler_proc(irq, action);
- /* Make sure it's not being used on another CPU: */
- synchronize_hardirq(irq);
+ /*
+ * Make sure it's not being used on another CPU and if the chip
+ * supports it also make sure that there is no (not yet serviced)
+ * interrupt in flight at the hardware level.
+ */
+ __synchronize_hardirq(desc, true);
#ifdef CONFIG_DEBUG_SHIRQ
/*
@@ -1686,6 +1799,14 @@ static struct irqaction *__free_irq(struct irq_desc *desc, void *dev_id)
* require it to deallocate resources over the slow bus.
*/
chip_bus_lock(desc);
+ /*
+ * There is no interrupt on the fly anymore. Deactivate it
+ * completely.
+ */
+ raw_spin_lock_irqsave(&desc->lock, flags);
+ irq_domain_deactivate_irq(&desc->irq_data);
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
+
irq_release_resources(desc);
chip_bus_sync_unlock(desc);
irq_remove_timings(desc);
@@ -1756,6 +1877,59 @@ const void *free_irq(unsigned int irq, void *dev_id)
}
EXPORT_SYMBOL(free_irq);
+/* This function must be called with desc->lock held */
+static const void *__cleanup_nmi(unsigned int irq, struct irq_desc *desc)
+{
+ const char *devname = NULL;
+
+ desc->istate &= ~IRQS_NMI;
+
+ if (!WARN_ON(desc->action == NULL)) {
+ irq_pm_remove_action(desc, desc->action);
+ devname = desc->action->name;
+ unregister_handler_proc(irq, desc->action);
+
+ kfree(desc->action);
+ desc->action = NULL;
+ }
+
+ irq_settings_clr_disable_unlazy(desc);
+ irq_shutdown_and_deactivate(desc);
+
+ irq_release_resources(desc);
+
+ irq_chip_pm_put(&desc->irq_data);
+ module_put(desc->owner);
+
+ return devname;
+}
+
+const void *free_nmi(unsigned int irq, void *dev_id)
+{
+ struct irq_desc *desc = irq_to_desc(irq);
+ unsigned long flags;
+ const void *devname;
+
+ if (!desc || WARN_ON(!(desc->istate & IRQS_NMI)))
+ return NULL;
+
+ if (WARN_ON(irq_settings_is_per_cpu_devid(desc)))
+ return NULL;
+
+ /* NMI still enabled */
+ if (WARN_ON(desc->depth == 0))
+ disable_nmi_nosync(irq);
+
+ raw_spin_lock_irqsave(&desc->lock, flags);
+
+ irq_nmi_teardown(desc);
+ devname = __cleanup_nmi(irq, desc);
+
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
+
+ return devname;
+}
+
/**
* request_threaded_irq - allocate an interrupt line
* @irq: Interrupt line to allocate
@@ -1925,6 +2099,101 @@ int request_any_context_irq(unsigned int irq, irq_handler_t handler,
}
EXPORT_SYMBOL_GPL(request_any_context_irq);
+/**
+ * request_nmi - allocate an interrupt line for NMI delivery
+ * @irq: Interrupt line to allocate
+ * @handler: Function to be called when the IRQ occurs.
+ * Threaded handler for threaded interrupts.
+ * @irqflags: Interrupt type flags
+ * @name: An ascii name for the claiming device
+ * @dev_id: A cookie passed back to the handler function
+ *
+ * This call allocates interrupt resources and enables the
+ * interrupt line and IRQ handling. It sets up the IRQ line
+ * to be handled as an NMI.
+ *
+ * An interrupt line delivering NMIs cannot be shared and IRQ handling
+ * cannot be threaded.
+ *
+ * Interrupt lines requested for NMI delivering must produce per cpu
+ * interrupts and have auto enabling setting disabled.
+ *
+ * Dev_id must be globally unique. Normally the address of the
+ * device data structure is used as the cookie. Since the handler
+ * receives this value it makes sense to use it.
+ *
+ * If the interrupt line cannot be used to deliver NMIs, function
+ * will fail and return a negative value.
+ */
+int request_nmi(unsigned int irq, irq_handler_t handler,
+ unsigned long irqflags, const char *name, void *dev_id)
+{
+ struct irqaction *action;
+ struct irq_desc *desc;
+ unsigned long flags;
+ int retval;
+
+ if (irq == IRQ_NOTCONNECTED)
+ return -ENOTCONN;
+
+ /* NMI cannot be shared, used for Polling */
+ if (irqflags & (IRQF_SHARED | IRQF_COND_SUSPEND | IRQF_IRQPOLL))
+ return -EINVAL;
+
+ if (!(irqflags & IRQF_PERCPU))
+ return -EINVAL;
+
+ if (!handler)
+ return -EINVAL;
+
+ desc = irq_to_desc(irq);
+
+ if (!desc || irq_settings_can_autoenable(desc) ||
+ !irq_settings_can_request(desc) ||
+ WARN_ON(irq_settings_is_per_cpu_devid(desc)) ||
+ !irq_supports_nmi(desc))
+ return -EINVAL;
+
+ action = kzalloc(sizeof(struct irqaction), GFP_KERNEL);
+ if (!action)
+ return -ENOMEM;
+
+ action->handler = handler;
+ action->flags = irqflags | IRQF_NO_THREAD | IRQF_NOBALANCING;
+ action->name = name;
+ action->dev_id = dev_id;
+
+ retval = irq_chip_pm_get(&desc->irq_data);
+ if (retval < 0)
+ goto err_out;
+
+ retval = __setup_irq(irq, desc, action);
+ if (retval)
+ goto err_irq_setup;
+
+ raw_spin_lock_irqsave(&desc->lock, flags);
+
+ /* Setup NMI state */
+ desc->istate |= IRQS_NMI;
+ retval = irq_nmi_setup(desc);
+ if (retval) {
+ __cleanup_nmi(irq, desc);
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
+ return -EINVAL;
+ }
+
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
+
+ return 0;
+
+err_irq_setup:
+ irq_chip_pm_put(&desc->irq_data);
+err_out:
+ kfree(action);
+
+ return retval;
+}
+
void enable_percpu_irq(unsigned int irq, unsigned int type)
{
unsigned int cpu = smp_processor_id();
@@ -1959,6 +2228,11 @@ out:
}
EXPORT_SYMBOL_GPL(enable_percpu_irq);
+void enable_percpu_nmi(unsigned int irq, unsigned int type)
+{
+ enable_percpu_irq(irq, type);
+}
+
/**
* irq_percpu_is_enabled - Check whether the per cpu irq is enabled
* @irq: Linux irq number to check for
@@ -1998,6 +2272,11 @@ void disable_percpu_irq(unsigned int irq)
}
EXPORT_SYMBOL_GPL(disable_percpu_irq);
+void disable_percpu_nmi(unsigned int irq)
+{
+ disable_percpu_irq(irq);
+}
+
/*
* Internal function to unregister a percpu irqaction.
*/
@@ -2029,6 +2308,8 @@ static struct irqaction *__free_percpu_irq(unsigned int irq, void __percpu *dev_
/* Found it - now remove it from the list of entries: */
desc->action = NULL;
+ desc->istate &= ~IRQS_NMI;
+
raw_spin_unlock_irqrestore(&desc->lock, flags);
unregister_handler_proc(irq, action);
@@ -2082,6 +2363,19 @@ void free_percpu_irq(unsigned int irq, void __percpu *dev_id)
}
EXPORT_SYMBOL_GPL(free_percpu_irq);
+void free_percpu_nmi(unsigned int irq, void __percpu *dev_id)
+{
+ struct irq_desc *desc = irq_to_desc(irq);
+
+ if (!desc || !irq_settings_is_per_cpu_devid(desc))
+ return;
+
+ if (WARN_ON(!(desc->istate & IRQS_NMI)))
+ return;
+
+ kfree(__free_percpu_irq(irq, dev_id));
+}
+
/**
* setup_percpu_irq - setup a per-cpu interrupt
* @irq: Interrupt line to setup
@@ -2172,6 +2466,180 @@ int __request_percpu_irq(unsigned int irq, irq_handler_t handler,
EXPORT_SYMBOL_GPL(__request_percpu_irq);
/**
+ * request_percpu_nmi - allocate a percpu interrupt line for NMI delivery
+ * @irq: Interrupt line to allocate
+ * @handler: Function to be called when the IRQ occurs.
+ * @name: An ascii name for the claiming device
+ * @dev_id: A percpu cookie passed back to the handler function
+ *
+ * This call allocates interrupt resources for a per CPU NMI. Per CPU NMIs
+ * have to be setup on each CPU by calling prepare_percpu_nmi() before
+ * being enabled on the same CPU by using enable_percpu_nmi().
+ *
+ * Dev_id must be globally unique. It is a per-cpu variable, and
+ * the handler gets called with the interrupted CPU's instance of
+ * that variable.
+ *
+ * Interrupt lines requested for NMI delivering should have auto enabling
+ * setting disabled.
+ *
+ * If the interrupt line cannot be used to deliver NMIs, function
+ * will fail returning a negative value.
+ */
+int request_percpu_nmi(unsigned int irq, irq_handler_t handler,
+ const char *name, void __percpu *dev_id)
+{
+ struct irqaction *action;
+ struct irq_desc *desc;
+ unsigned long flags;
+ int retval;
+
+ if (!handler)
+ return -EINVAL;
+
+ desc = irq_to_desc(irq);
+
+ if (!desc || !irq_settings_can_request(desc) ||
+ !irq_settings_is_per_cpu_devid(desc) ||
+ irq_settings_can_autoenable(desc) ||
+ !irq_supports_nmi(desc))
+ return -EINVAL;
+
+ /* The line cannot already be NMI */
+ if (desc->istate & IRQS_NMI)
+ return -EINVAL;
+
+ action = kzalloc(sizeof(struct irqaction), GFP_KERNEL);
+ if (!action)
+ return -ENOMEM;
+
+ action->handler = handler;
+ action->flags = IRQF_PERCPU | IRQF_NO_SUSPEND | IRQF_NO_THREAD
+ | IRQF_NOBALANCING;
+ action->name = name;
+ action->percpu_dev_id = dev_id;
+
+ retval = irq_chip_pm_get(&desc->irq_data);
+ if (retval < 0)
+ goto err_out;
+
+ retval = __setup_irq(irq, desc, action);
+ if (retval)
+ goto err_irq_setup;
+
+ raw_spin_lock_irqsave(&desc->lock, flags);
+ desc->istate |= IRQS_NMI;
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
+
+ return 0;
+
+err_irq_setup:
+ irq_chip_pm_put(&desc->irq_data);
+err_out:
+ kfree(action);
+
+ return retval;
+}
+
+/**
+ * prepare_percpu_nmi - performs CPU local setup for NMI delivery
+ * @irq: Interrupt line to prepare for NMI delivery
+ *
+ * This call prepares an interrupt line to deliver NMI on the current CPU,
+ * before that interrupt line gets enabled with enable_percpu_nmi().
+ *
+ * As a CPU local operation, this should be called from non-preemptible
+ * context.
+ *
+ * If the interrupt line cannot be used to deliver NMIs, function
+ * will fail returning a negative value.
+ */
+int prepare_percpu_nmi(unsigned int irq)
+{
+ unsigned long flags;
+ struct irq_desc *desc;
+ int ret = 0;
+
+ WARN_ON(preemptible());
+
+ desc = irq_get_desc_lock(irq, &flags,
+ IRQ_GET_DESC_CHECK_PERCPU);
+ if (!desc)
+ return -EINVAL;
+
+ if (WARN(!(desc->istate & IRQS_NMI),
+ KERN_ERR "prepare_percpu_nmi called for a non-NMI interrupt: irq %u\n",
+ irq)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = irq_nmi_setup(desc);
+ if (ret) {
+ pr_err("Failed to setup NMI delivery: irq %u\n", irq);
+ goto out;
+ }
+
+out:
+ irq_put_desc_unlock(desc, flags);
+ return ret;
+}
+
+/**
+ * teardown_percpu_nmi - undoes NMI setup of IRQ line
+ * @irq: Interrupt line from which CPU local NMI configuration should be
+ * removed
+ *
+ * This call undoes the setup done by prepare_percpu_nmi().
+ *
+ * IRQ line should not be enabled for the current CPU.
+ *
+ * As a CPU local operation, this should be called from non-preemptible
+ * context.
+ */
+void teardown_percpu_nmi(unsigned int irq)
+{
+ unsigned long flags;
+ struct irq_desc *desc;
+
+ WARN_ON(preemptible());
+
+ desc = irq_get_desc_lock(irq, &flags,
+ IRQ_GET_DESC_CHECK_PERCPU);
+ if (!desc)
+ return;
+
+ if (WARN_ON(!(desc->istate & IRQS_NMI)))
+ goto out;
+
+ irq_nmi_teardown(desc);
+out:
+ irq_put_desc_unlock(desc, flags);
+}
+
+int __irq_get_irqchip_state(struct irq_data *data, enum irqchip_irq_state which,
+ bool *state)
+{
+ struct irq_chip *chip;
+ int err = -EINVAL;
+
+ do {
+ chip = irq_data_get_irq_chip(data);
+ if (chip->irq_get_irqchip_state)
+ break;
+#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
+ data = data->parent_data;
+#else
+ data = NULL;
+#endif
+ } while (data);
+
+ if (data)
+ err = chip->irq_get_irqchip_state(data, which, state);
+ return err;
+}
+
+/**
* irq_get_irqchip_state - returns the irqchip state of a interrupt.
* @irq: Interrupt line that is forwarded to a VM
* @which: One of IRQCHIP_STATE_* the caller wants to know about
@@ -2189,7 +2657,6 @@ int irq_get_irqchip_state(unsigned int irq, enum irqchip_irq_state which,
{
struct irq_desc *desc;
struct irq_data *data;
- struct irq_chip *chip;
unsigned long flags;
int err = -EINVAL;
@@ -2199,19 +2666,7 @@ int irq_get_irqchip_state(unsigned int irq, enum irqchip_irq_state which,
data = irq_desc_get_irq_data(desc);
- do {
- chip = irq_data_get_irq_chip(data);
- if (chip->irq_get_irqchip_state)
- break;
-#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
- data = data->parent_data;
-#else
- data = NULL;
-#endif
- } while (data);
-
- if (data)
- err = chip->irq_get_irqchip_state(data, which, state);
+ err = __irq_get_irqchip_state(data, which, state);
irq_put_desc_busunlock(desc, flags);
return err;
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index 6d2fa6914b30..2ed97a7c9b2a 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -212,9 +212,9 @@ static void __report_bad_irq(struct irq_desc *desc, irqreturn_t action_ret)
*/
raw_spin_lock_irqsave(&desc->lock, flags);
for_each_action_of_desc(desc, action) {
- printk(KERN_ERR "[<%p>] %pf", action->handler, action->handler);
+ printk(KERN_ERR "[<%p>] %ps", action->handler, action->handler);
if (action->thread_fn)
- printk(KERN_CONT " threaded [<%p>] %pf",
+ printk(KERN_CONT " threaded [<%p>] %ps",
action->thread_fn, action->thread_fn);
printk(KERN_CONT "\n");
}
diff --git a/kernel/irq/timings.c b/kernel/irq/timings.c
index 1e4cb63a5c82..90c735da15d0 100644
--- a/kernel/irq/timings.c
+++ b/kernel/irq/timings.c
@@ -9,6 +9,7 @@
#include <linux/idr.h>
#include <linux/irq.h>
#include <linux/math64.h>
+#include <linux/log2.h>
#include <trace/events/irq.h>
@@ -18,16 +19,6 @@ DEFINE_STATIC_KEY_FALSE(irq_timing_enabled);
DEFINE_PER_CPU(struct irq_timings, irq_timings);
-struct irqt_stat {
- u64 next_evt;
- u64 last_ts;
- u64 variance;
- u32 avg;
- u32 nr_samples;
- int anomalies;
- int valid;
-};
-
static DEFINE_IDR(irqt_stats);
void irq_timings_enable(void)
@@ -40,75 +31,360 @@ void irq_timings_disable(void)
static_branch_disable(&irq_timing_enabled);
}
-/**
- * irqs_update - update the irq timing statistics with a new timestamp
+/*
+ * The main goal of this algorithm is to predict the next interrupt
+ * occurrence on the current CPU.
+ *
+ * Currently, the interrupt timings are stored in a circular array
+ * buffer every time there is an interrupt, as a tuple: the interrupt
+ * number and the associated timestamp when the event occurred <irq,
+ * timestamp>.
+ *
+ * For every interrupt occurring in a short period of time, we can
+ * measure the elapsed time between the occurrences for the same
+ * interrupt and we end up with a suite of intervals. The experience
+ * showed the interrupts are often coming following a periodic
+ * pattern.
+ *
+ * The objective of the algorithm is to find out this periodic pattern
+ * in a fastest way and use its period to predict the next irq event.
+ *
+ * When the next interrupt event is requested, we are in the situation
+ * where the interrupts are disabled and the circular buffer
+ * containing the timings is filled with the events which happened
+ * after the previous next-interrupt-event request.
+ *
+ * At this point, we read the circular buffer and we fill the irq
+ * related statistics structure. After this step, the circular array
+ * containing the timings is empty because all the values are
+ * dispatched in their corresponding buffers.
+ *
+ * Now for each interrupt, we can predict the next event by using the
+ * suffix array, log interval and exponential moving average
+ *
+ * 1. Suffix array
+ *
+ * Suffix array is an array of all the suffixes of a string. It is
+ * widely used as a data structure for compression, text search, ...
+ * For instance for the word 'banana', the suffixes will be: 'banana'
+ * 'anana' 'nana' 'ana' 'na' 'a'
+ *
+ * Usually, the suffix array is sorted but for our purpose it is
+ * not necessary and won't provide any improvement in the context of
+ * the solved problem where we clearly define the boundaries of the
+ * search by a max period and min period.
+ *
+ * The suffix array will build a suite of intervals of different
+ * length and will look for the repetition of each suite. If the suite
+ * is repeating then we have the period because it is the length of
+ * the suite whatever its position in the buffer.
+ *
+ * 2. Log interval
+ *
+ * We saw the irq timings allow to compute the interval of the
+ * occurrences for a specific interrupt. We can reasonibly assume the
+ * longer is the interval, the higher is the error for the next event
+ * and we can consider storing those interval values into an array
+ * where each slot in the array correspond to an interval at the power
+ * of 2 of the index. For example, index 12 will contain values
+ * between 2^11 and 2^12.
+ *
+ * At the end we have an array of values where at each index defines a
+ * [2^index - 1, 2 ^ index] interval values allowing to store a large
+ * number of values inside a small array.
+ *
+ * For example, if we have the value 1123, then we store it at
+ * ilog2(1123) = 10 index value.
+ *
+ * Storing those value at the specific index is done by computing an
+ * exponential moving average for this specific slot. For instance,
+ * for values 1800, 1123, 1453, ... fall under the same slot (10) and
+ * the exponential moving average is computed every time a new value
+ * is stored at this slot.
+ *
+ * 3. Exponential Moving Average
+ *
+ * The EMA is largely used to track a signal for stocks or as a low
+ * pass filter. The magic of the formula, is it is very simple and the
+ * reactivity of the average can be tuned with the factors called
+ * alpha.
+ *
+ * The higher the alphas are, the faster the average respond to the
+ * signal change. In our case, if a slot in the array is a big
+ * interval, we can have numbers with a big difference between
+ * them. The impact of those differences in the average computation
+ * can be tuned by changing the alpha value.
+ *
+ *
+ * -- The algorithm --
+ *
+ * We saw the different processing above, now let's see how they are
+ * used together.
+ *
+ * For each interrupt:
+ * For each interval:
+ * Compute the index = ilog2(interval)
+ * Compute a new_ema(buffer[index], interval)
+ * Store the index in a circular buffer
+ *
+ * Compute the suffix array of the indexes
+ *
+ * For each suffix:
+ * If the suffix is reverse-found 3 times
+ * Return suffix
+ *
+ * Return Not found
+ *
+ * However we can not have endless suffix array to be build, it won't
+ * make sense and it will add an extra overhead, so we can restrict
+ * this to a maximum suffix length of 5 and a minimum suffix length of
+ * 2. The experience showed 5 is the majority of the maximum pattern
+ * period found for different devices.
+ *
+ * The result is a pattern finding less than 1us for an interrupt.
*
- * @irqs: an irqt_stat struct pointer
- * @ts: the new timestamp
+ * Example based on real values:
*
- * The statistics are computed online, in other words, the code is
- * designed to compute the statistics on a stream of values rather
- * than doing multiple passes on the values to compute the average,
- * then the variance. The integer division introduces a loss of
- * precision but with an acceptable error margin regarding the results
- * we would have with the double floating precision: we are dealing
- * with nanosec, so big numbers, consequently the mantisse is
- * negligeable, especially when converting the time in usec
- * afterwards.
+ * Example 1 : MMC write/read interrupt interval:
*
- * The computation happens at idle time. When the CPU is not idle, the
- * interrupts' timestamps are stored in the circular buffer, when the
- * CPU goes idle and this routine is called, all the buffer's values
- * are injected in the statistical model continuying to extend the
- * statistics from the previous busy-idle cycle.
+ * 223947, 1240, 1384, 1386, 1386,
+ * 217416, 1236, 1384, 1386, 1387,
+ * 214719, 1241, 1386, 1387, 1384,
+ * 213696, 1234, 1384, 1386, 1388,
+ * 219904, 1240, 1385, 1389, 1385,
+ * 212240, 1240, 1386, 1386, 1386,
+ * 214415, 1236, 1384, 1386, 1387,
+ * 214276, 1234, 1384, 1388, ?
*
- * The observations showed a device will trigger a burst of periodic
- * interrupts followed by one or two peaks of longer time, for
- * instance when a SD card device flushes its cache, then the periodic
- * intervals occur again. A one second inactivity period resets the
- * stats, that gives us the certitude the statistical values won't
- * exceed 1x10^9, thus the computation won't overflow.
+ * For each element, apply ilog2(value)
*
- * Basically, the purpose of the algorithm is to watch the periodic
- * interrupts and eliminate the peaks.
+ * 15, 8, 8, 8, 8,
+ * 15, 8, 8, 8, 8,
+ * 15, 8, 8, 8, 8,
+ * 15, 8, 8, 8, 8,
+ * 15, 8, 8, 8, 8,
+ * 15, 8, 8, 8, 8,
+ * 15, 8, 8, 8, 8,
+ * 15, 8, 8, 8, ?
*
- * An interrupt is considered periodically stable if the interval of
- * its occurences follow the normal distribution, thus the values
- * comply with:
+ * Max period of 5, we take the last (max_period * 3) 15 elements as
+ * we can be confident if the pattern repeats itself three times it is
+ * a repeating pattern.
*
- * avg - 3 x stddev < value < avg + 3 x stddev
+ * 8,
+ * 15, 8, 8, 8, 8,
+ * 15, 8, 8, 8, 8,
+ * 15, 8, 8, 8, ?
*
- * Which can be simplified to:
+ * Suffixes are:
*
- * -3 x stddev < value - avg < 3 x stddev
+ * 1) 8, 15, 8, 8, 8 <- max period
+ * 2) 8, 15, 8, 8
+ * 3) 8, 15, 8
+ * 4) 8, 15 <- min period
*
- * abs(value - avg) < 3 x stddev
+ * From there we search the repeating pattern for each suffix.
*
- * In order to save a costly square root computation, we use the
- * variance. For the record, stddev = sqrt(variance). The equation
- * above becomes:
+ * buffer: 8, 15, 8, 8, 8, 8, 15, 8, 8, 8, 8, 15, 8, 8, 8
+ * | | | | | | | | | | | | | | |
+ * 8, 15, 8, 8, 8 | | | | | | | | | |
+ * 8, 15, 8, 8, 8 | | | | |
+ * 8, 15, 8, 8, 8
*
- * abs(value - avg) < 3 x sqrt(variance)
+ * When moving the suffix, we found exactly 3 matches.
*
- * And finally we square it:
+ * The first suffix with period 5 is repeating.
*
- * (value - avg) ^ 2 < (3 x sqrt(variance)) ^ 2
+ * The next event is (3 * max_period) % suffix_period
*
- * (value - avg) x (value - avg) < 9 x variance
+ * In this example, the result 0, so the next event is suffix[0] => 8
*
- * Statistically speaking, any values out of this interval is
- * considered as an anomaly and is discarded. However, a normal
- * distribution appears when the number of samples is 30 (it is the
- * rule of thumb in statistics, cf. "30 samples" on Internet). When
- * there are three consecutive anomalies, the statistics are resetted.
+ * However, 8 is the index in the array of exponential moving average
+ * which was calculated on the fly when storing the values, so the
+ * interval is ema[8] = 1366
*
+ *
+ * Example 2:
+ *
+ * 4, 3, 5, 100,
+ * 3, 3, 5, 117,
+ * 4, 4, 5, 112,
+ * 4, 3, 4, 110,
+ * 3, 5, 3, 117,
+ * 4, 4, 5, 112,
+ * 4, 3, 4, 110,
+ * 3, 4, 5, 112,
+ * 4, 3, 4, 110
+ *
+ * ilog2
+ *
+ * 0, 0, 0, 4,
+ * 0, 0, 0, 4,
+ * 0, 0, 0, 4,
+ * 0, 0, 0, 4,
+ * 0, 0, 0, 4,
+ * 0, 0, 0, 4,
+ * 0, 0, 0, 4,
+ * 0, 0, 0, 4,
+ * 0, 0, 0, 4
+ *
+ * Max period 5:
+ * 0, 0, 4,
+ * 0, 0, 0, 4,
+ * 0, 0, 0, 4,
+ * 0, 0, 0, 4
+ *
+ * Suffixes:
+ *
+ * 1) 0, 0, 4, 0, 0
+ * 2) 0, 0, 4, 0
+ * 3) 0, 0, 4
+ * 4) 0, 0
+ *
+ * buffer: 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4
+ * | | | | | | X
+ * 0, 0, 4, 0, 0, | X
+ * 0, 0
+ *
+ * buffer: 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4
+ * | | | | | | | | | | | | | | |
+ * 0, 0, 4, 0, | | | | | | | | | | |
+ * 0, 0, 4, 0, | | | | | | |
+ * 0, 0, 4, 0, | | |
+ * 0 0 4
+ *
+ * Pattern is found 3 times, the remaining is 1 which results from
+ * (max_period * 3) % suffix_period. This value is the index in the
+ * suffix arrays. The suffix array for a period 4 has the value 4
+ * at index 1.
+ */
+#define EMA_ALPHA_VAL 64
+#define EMA_ALPHA_SHIFT 7
+
+#define PREDICTION_PERIOD_MIN 2
+#define PREDICTION_PERIOD_MAX 5
+#define PREDICTION_FACTOR 4
+#define PREDICTION_MAX 10 /* 2 ^ PREDICTION_MAX useconds */
+#define PREDICTION_BUFFER_SIZE 16 /* slots for EMAs, hardly more than 16 */
+
+struct irqt_stat {
+ u64 last_ts;
+ u64 ema_time[PREDICTION_BUFFER_SIZE];
+ int timings[IRQ_TIMINGS_SIZE];
+ int circ_timings[IRQ_TIMINGS_SIZE];
+ int count;
+};
+
+/*
+ * Exponential moving average computation
*/
-static void irqs_update(struct irqt_stat *irqs, u64 ts)
+static u64 irq_timings_ema_new(u64 value, u64 ema_old)
+{
+ s64 diff;
+
+ if (unlikely(!ema_old))
+ return value;
+
+ diff = (value - ema_old) * EMA_ALPHA_VAL;
+ /*
+ * We can use a s64 type variable to be added with the u64
+ * ema_old variable as this one will never have its topmost
+ * bit set, it will be always smaller than 2^63 nanosec
+ * interrupt interval (292 years).
+ */
+ return ema_old + (diff >> EMA_ALPHA_SHIFT);
+}
+
+static int irq_timings_next_event_index(int *buffer, size_t len, int period_max)
+{
+ int i;
+
+ /*
+ * The buffer contains the suite of intervals, in a ilog2
+ * basis, we are looking for a repetition. We point the
+ * beginning of the search three times the length of the
+ * period beginning at the end of the buffer. We do that for
+ * each suffix.
+ */
+ for (i = period_max; i >= PREDICTION_PERIOD_MIN ; i--) {
+
+ int *begin = &buffer[len - (i * 3)];
+ int *ptr = begin;
+
+ /*
+ * We look if the suite with period 'i' repeat
+ * itself. If it is truncated at the end, as it
+ * repeats we can use the period to find out the next
+ * element.
+ */
+ while (!memcmp(ptr, begin, i * sizeof(*ptr))) {
+ ptr += i;
+ if (ptr >= &buffer[len])
+ return begin[((i * 3) % i)];
+ }
+ }
+
+ return -1;
+}
+
+static u64 __irq_timings_next_event(struct irqt_stat *irqs, int irq, u64 now)
+{
+ int index, i, period_max, count, start, min = INT_MAX;
+
+ if ((now - irqs->last_ts) >= NSEC_PER_SEC) {
+ irqs->count = irqs->last_ts = 0;
+ return U64_MAX;
+ }
+
+ /*
+ * As we want to find three times the repetition, we need a
+ * number of intervals greater or equal to three times the
+ * maximum period, otherwise we truncate the max period.
+ */
+ period_max = irqs->count > (3 * PREDICTION_PERIOD_MAX) ?
+ PREDICTION_PERIOD_MAX : irqs->count / 3;
+
+ /*
+ * If we don't have enough irq timings for this prediction,
+ * just bail out.
+ */
+ if (period_max <= PREDICTION_PERIOD_MIN)
+ return U64_MAX;
+
+ /*
+ * 'count' will depends if the circular buffer wrapped or not
+ */
+ count = irqs->count < IRQ_TIMINGS_SIZE ?
+ irqs->count : IRQ_TIMINGS_SIZE;
+
+ start = irqs->count < IRQ_TIMINGS_SIZE ?
+ 0 : (irqs->count & IRQ_TIMINGS_MASK);
+
+ /*
+ * Copy the content of the circular buffer into another buffer
+ * in order to linearize the buffer instead of dealing with
+ * wrapping indexes and shifted array which will be prone to
+ * error and extremelly difficult to debug.
+ */
+ for (i = 0; i < count; i++) {
+ int index = (start + i) & IRQ_TIMINGS_MASK;
+
+ irqs->timings[i] = irqs->circ_timings[index];
+ min = min_t(int, irqs->timings[i], min);
+ }
+
+ index = irq_timings_next_event_index(irqs->timings, count, period_max);
+ if (index < 0)
+ return irqs->last_ts + irqs->ema_time[min];
+
+ return irqs->last_ts + irqs->ema_time[index];
+}
+
+static inline void irq_timings_store(int irq, struct irqt_stat *irqs, u64 ts)
{
u64 old_ts = irqs->last_ts;
- u64 variance = 0;
u64 interval;
- s64 diff;
+ int index;
/*
* The timestamps are absolute time values, we need to compute
@@ -135,87 +411,28 @@ static void irqs_update(struct irqt_stat *irqs, u64 ts)
* want as we need another timestamp to compute an interval.
*/
if (interval >= NSEC_PER_SEC) {
- memset(irqs, 0, sizeof(*irqs));
- irqs->last_ts = ts;
+ irqs->count = 0;
return;
}
/*
- * Pre-compute the delta with the average as the result is
- * used several times in this function.
- */
- diff = interval - irqs->avg;
-
- /*
- * Increment the number of samples.
- */
- irqs->nr_samples++;
-
- /*
- * Online variance divided by the number of elements if there
- * is more than one sample. Normally the formula is division
- * by nr_samples - 1 but we assume the number of element will be
- * more than 32 and dividing by 32 instead of 31 is enough
- * precise.
- */
- if (likely(irqs->nr_samples > 1))
- variance = irqs->variance >> IRQ_TIMINGS_SHIFT;
-
- /*
- * The rule of thumb in statistics for the normal distribution
- * is having at least 30 samples in order to have the model to
- * apply. Values outside the interval are considered as an
- * anomaly.
- */
- if ((irqs->nr_samples >= 30) && ((diff * diff) > (9 * variance))) {
- /*
- * After three consecutive anomalies, we reset the
- * stats as it is no longer stable enough.
- */
- if (irqs->anomalies++ >= 3) {
- memset(irqs, 0, sizeof(*irqs));
- irqs->last_ts = ts;
- return;
- }
- } else {
- /*
- * The anomalies must be consecutives, so at this
- * point, we reset the anomalies counter.
- */
- irqs->anomalies = 0;
- }
-
- /*
- * The interrupt is considered stable enough to try to predict
- * the next event on it.
+ * Get the index in the ema table for this interrupt. The
+ * PREDICTION_FACTOR increase the interval size for the array
+ * of exponential average.
*/
- irqs->valid = 1;
+ index = likely(interval) ?
+ ilog2((interval >> 10) / PREDICTION_FACTOR) : 0;
/*
- * Online average algorithm:
- *
- * new_average = average + ((value - average) / count)
- *
- * The variance computation depends on the new average
- * to be computed here first.
- *
+ * Store the index as an element of the pattern in another
+ * circular array.
*/
- irqs->avg = irqs->avg + (diff >> IRQ_TIMINGS_SHIFT);
+ irqs->circ_timings[irqs->count & IRQ_TIMINGS_MASK] = index;
- /*
- * Online variance algorithm:
- *
- * new_variance = variance + (value - average) x (value - new_average)
- *
- * Warning: irqs->avg is updated with the line above, hence
- * 'interval - irqs->avg' is no longer equal to 'diff'
- */
- irqs->variance = irqs->variance + (diff * (interval - irqs->avg));
+ irqs->ema_time[index] = irq_timings_ema_new(interval,
+ irqs->ema_time[index]);
- /*
- * Update the next event
- */
- irqs->next_evt = ts + irqs->avg;
+ irqs->count++;
}
/**
@@ -259,6 +476,9 @@ u64 irq_timings_next_event(u64 now)
*/
lockdep_assert_irqs_disabled();
+ if (!irqts->count)
+ return next_evt;
+
/*
* Number of elements in the circular buffer: If it happens it
* was flushed before, then the number of elements could be
@@ -269,21 +489,19 @@ u64 irq_timings_next_event(u64 now)
* type but with the cost of extra computation in the
* interrupt handler hot path. We choose efficiency.
*
- * Inject measured irq/timestamp to the statistical model
- * while decrementing the counter because we consume the data
- * from our circular buffer.
+ * Inject measured irq/timestamp to the pattern prediction
+ * model while decrementing the counter because we consume the
+ * data from our circular buffer.
*/
- for (i = irqts->count & IRQ_TIMINGS_MASK,
- irqts->count = min(IRQ_TIMINGS_SIZE, irqts->count);
- irqts->count > 0; irqts->count--, i = (i + 1) & IRQ_TIMINGS_MASK) {
- irq = irq_timing_decode(irqts->values[i], &ts);
+ i = (irqts->count & IRQ_TIMINGS_MASK) - 1;
+ irqts->count = min(IRQ_TIMINGS_SIZE, irqts->count);
+ for (; irqts->count > 0; irqts->count--, i = (i + 1) & IRQ_TIMINGS_MASK) {
+ irq = irq_timing_decode(irqts->values[i], &ts);
s = idr_find(&irqt_stats, irq);
- if (s) {
- irqs = this_cpu_ptr(s);
- irqs_update(irqs, ts);
- }
+ if (s)
+ irq_timings_store(irq, this_cpu_ptr(s), ts);
}
/*
@@ -294,26 +512,12 @@ u64 irq_timings_next_event(u64 now)
irqs = this_cpu_ptr(s);
- if (!irqs->valid)
- continue;
+ ts = __irq_timings_next_event(irqs, i, now);
+ if (ts <= now)
+ return now;
- if (irqs->next_evt <= now) {
- irq = i;
- next_evt = now;
-
- /*
- * This interrupt mustn't use in the future
- * until new events occur and update the
- * statistics.
- */
- irqs->valid = 0;
- break;
- }
-
- if (irqs->next_evt < next_evt) {
- irq = i;
- next_evt = irqs->next_evt;
- }
+ if (ts < next_evt)
+ next_evt = ts;
}
return next_evt;