diff options
Diffstat (limited to 'kernel/irq')
-rw-r--r-- | kernel/irq/Kconfig | 4 | ||||
-rw-r--r-- | kernel/irq/affinity.c | 121 | ||||
-rw-r--r-- | kernel/irq/autoprobe.c | 6 | ||||
-rw-r--r-- | kernel/irq/chip.c | 115 | ||||
-rw-r--r-- | kernel/irq/cpuhotplug.c | 2 | ||||
-rw-r--r-- | kernel/irq/debugfs.c | 10 | ||||
-rw-r--r-- | kernel/irq/devres.c | 5 | ||||
-rw-r--r-- | kernel/irq/handle.c | 4 | ||||
-rw-r--r-- | kernel/irq/internals.h | 15 | ||||
-rw-r--r-- | kernel/irq/irq_sim.c | 12 | ||||
-rw-r--r-- | kernel/irq/irqdesc.c | 46 | ||||
-rw-r--r-- | kernel/irq/irqdomain.c | 63 | ||||
-rw-r--r-- | kernel/irq/manage.c | 501 | ||||
-rw-r--r-- | kernel/irq/spurious.c | 4 | ||||
-rw-r--r-- | kernel/irq/timings.c | 522 |
15 files changed, 1161 insertions, 269 deletions
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig index 5f3e2baefca9..f92d9a687372 100644 --- a/kernel/irq/Kconfig +++ b/kernel/irq/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only menu "IRQ subsystem" # Options selectable by the architecture code @@ -91,6 +92,9 @@ config GENERIC_MSI_IRQ_DOMAIN select IRQ_DOMAIN_HIERARCHY select GENERIC_MSI_IRQ +config IRQ_MSI_IOMMU + bool + config HANDLE_DOMAIN_IRQ bool diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c index 45b68b4ea48b..f18cd5aa33e8 100644 --- a/kernel/irq/affinity.c +++ b/kernel/irq/affinity.c @@ -9,7 +9,7 @@ #include <linux/cpu.h> static void irq_spread_init_one(struct cpumask *irqmsk, struct cpumask *nmsk, - int cpus_per_vec) + unsigned int cpus_per_vec) { const struct cpumask *siblmsk; int cpu, sibl; @@ -95,15 +95,17 @@ static int get_nodes_in_cpumask(cpumask_var_t *node_to_cpumask, } static int __irq_build_affinity_masks(const struct irq_affinity *affd, - int startvec, int numvecs, int firstvec, + unsigned int startvec, + unsigned int numvecs, + unsigned int firstvec, cpumask_var_t *node_to_cpumask, const struct cpumask *cpu_mask, struct cpumask *nmsk, struct irq_affinity_desc *masks) { - int n, nodes, cpus_per_vec, extra_vecs, done = 0; - int last_affv = firstvec + numvecs; - int curvec = startvec; + unsigned int n, nodes, cpus_per_vec, extra_vecs, done = 0; + unsigned int last_affv = firstvec + numvecs; + unsigned int curvec = startvec; nodemask_t nodemsk = NODE_MASK_NONE; if (!cpumask_weight(cpu_mask)) @@ -117,18 +119,16 @@ static int __irq_build_affinity_masks(const struct irq_affinity *affd, */ if (numvecs <= nodes) { for_each_node_mask(n, nodemsk) { - cpumask_or(&masks[curvec].mask, - &masks[curvec].mask, - node_to_cpumask[n]); + cpumask_or(&masks[curvec].mask, &masks[curvec].mask, + node_to_cpumask[n]); if (++curvec == last_affv) curvec = firstvec; } - done = numvecs; - goto out; + return numvecs; } for_each_node_mask(n, nodemsk) { - int ncpus, v, vecs_to_assign, vecs_per_node; + unsigned int ncpus, v, vecs_to_assign, vecs_per_node; /* Spread the vectors per node */ vecs_per_node = (numvecs - (curvec - firstvec)) / nodes; @@ -163,8 +163,6 @@ static int __irq_build_affinity_masks(const struct irq_affinity *affd, curvec = firstvec; --nodes; } - -out: return done; } @@ -174,19 +172,24 @@ out: * 2) spread other possible CPUs on these vectors */ static int irq_build_affinity_masks(const struct irq_affinity *affd, - int startvec, int numvecs, int firstvec, - cpumask_var_t *node_to_cpumask, + unsigned int startvec, unsigned int numvecs, + unsigned int firstvec, struct irq_affinity_desc *masks) { - int curvec = startvec, nr_present, nr_others; - int ret = -ENOMEM; + unsigned int curvec = startvec, nr_present, nr_others; + cpumask_var_t *node_to_cpumask; cpumask_var_t nmsk, npresmsk; + int ret = -ENOMEM; if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL)) return ret; if (!zalloc_cpumask_var(&npresmsk, GFP_KERNEL)) - goto fail; + goto fail_nmsk; + + node_to_cpumask = alloc_node_to_cpumask(); + if (!node_to_cpumask) + goto fail_npresmsk; ret = 0; /* Stabilize the cpumasks */ @@ -217,13 +220,22 @@ static int irq_build_affinity_masks(const struct irq_affinity *affd, if (nr_present < numvecs) WARN_ON(nr_present + nr_others < numvecs); + free_node_to_cpumask(node_to_cpumask); + + fail_npresmsk: free_cpumask_var(npresmsk); - fail: + fail_nmsk: free_cpumask_var(nmsk); return ret; } +static void default_calc_sets(struct irq_affinity *affd, unsigned int affvecs) +{ + affd->nr_sets = 1; + affd->set_size[0] = affvecs; +} + /** * irq_create_affinity_masks - Create affinity masks for multiqueue spreading * @nvecs: The total number of vectors @@ -232,50 +244,62 @@ static int irq_build_affinity_masks(const struct irq_affinity *affd, * Returns the irq_affinity_desc pointer or NULL if allocation failed. */ struct irq_affinity_desc * -irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd) +irq_create_affinity_masks(unsigned int nvecs, struct irq_affinity *affd) { - int affvecs = nvecs - affd->pre_vectors - affd->post_vectors; - int curvec, usedvecs; - cpumask_var_t *node_to_cpumask; + unsigned int affvecs, curvec, usedvecs, i; struct irq_affinity_desc *masks = NULL; - int i, nr_sets; /* - * If there aren't any vectors left after applying the pre/post - * vectors don't bother with assigning affinity. + * Determine the number of vectors which need interrupt affinities + * assigned. If the pre/post request exhausts the available vectors + * then nothing to do here except for invoking the calc_sets() + * callback so the device driver can adjust to the situation. If there + * is only a single vector, then managing the queue is pointless as + * well. */ - if (nvecs == affd->pre_vectors + affd->post_vectors) + if (nvecs > 1 && nvecs > affd->pre_vectors + affd->post_vectors) + affvecs = nvecs - affd->pre_vectors - affd->post_vectors; + else + affvecs = 0; + + /* + * Simple invocations do not provide a calc_sets() callback. Install + * the generic one. + */ + if (!affd->calc_sets) + affd->calc_sets = default_calc_sets; + + /* Recalculate the sets */ + affd->calc_sets(affd, affvecs); + + if (WARN_ON_ONCE(affd->nr_sets > IRQ_AFFINITY_MAX_SETS)) return NULL; - node_to_cpumask = alloc_node_to_cpumask(); - if (!node_to_cpumask) + /* Nothing to assign? */ + if (!affvecs) return NULL; masks = kcalloc(nvecs, sizeof(*masks), GFP_KERNEL); if (!masks) - goto outnodemsk; + return NULL; /* Fill out vectors at the beginning that don't need affinity */ for (curvec = 0; curvec < affd->pre_vectors; curvec++) cpumask_copy(&masks[curvec].mask, irq_default_affinity); + /* * Spread on present CPUs starting from affd->pre_vectors. If we * have multiple sets, build each sets affinity mask separately. */ - nr_sets = affd->nr_sets; - if (!nr_sets) - nr_sets = 1; - - for (i = 0, usedvecs = 0; i < nr_sets; i++) { - int this_vecs = affd->sets ? affd->sets[i] : affvecs; + for (i = 0, usedvecs = 0; i < affd->nr_sets; i++) { + unsigned int this_vecs = affd->set_size[i]; int ret; ret = irq_build_affinity_masks(affd, curvec, this_vecs, - curvec, node_to_cpumask, masks); + curvec, masks); if (ret) { kfree(masks); - masks = NULL; - goto outnodemsk; + return NULL; } curvec += this_vecs; usedvecs += this_vecs; @@ -293,8 +317,6 @@ irq_create_affinity_masks(int nvecs, const struct irq_affinity *affd) for (i = affd->pre_vectors; i < nvecs - affd->post_vectors; i++) masks[i].is_managed = 1; -outnodemsk: - free_node_to_cpumask(node_to_cpumask); return masks; } @@ -304,25 +326,22 @@ outnodemsk: * @maxvec: The maximum number of vectors available * @affd: Description of the affinity requirements */ -int irq_calc_affinity_vectors(int minvec, int maxvec, const struct irq_affinity *affd) +unsigned int irq_calc_affinity_vectors(unsigned int minvec, unsigned int maxvec, + const struct irq_affinity *affd) { - int resv = affd->pre_vectors + affd->post_vectors; - int vecs = maxvec - resv; - int set_vecs; + unsigned int resv = affd->pre_vectors + affd->post_vectors; + unsigned int set_vecs; if (resv > minvec) return 0; - if (affd->nr_sets) { - int i; - - for (i = 0, set_vecs = 0; i < affd->nr_sets; i++) - set_vecs += affd->sets[i]; + if (affd->calc_sets) { + set_vecs = maxvec - resv; } else { get_online_cpus(); set_vecs = cpumask_weight(cpu_possible_mask); put_online_cpus(); } - return resv + min(set_vecs, vecs); + return resv + min(set_vecs, maxvec - resv); } diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c index 16cbf6beb276..ae60cae24e9a 100644 --- a/kernel/irq/autoprobe.c +++ b/kernel/irq/autoprobe.c @@ -90,7 +90,7 @@ unsigned long probe_irq_on(void) /* It triggered already - consider it spurious. */ if (!(desc->istate & IRQS_WAITING)) { desc->istate &= ~IRQS_AUTODETECT; - irq_shutdown(desc); + irq_shutdown_and_deactivate(desc); } else if (i < 32) mask |= 1 << i; @@ -127,7 +127,7 @@ unsigned int probe_irq_mask(unsigned long val) mask |= 1 << i; desc->istate &= ~IRQS_AUTODETECT; - irq_shutdown(desc); + irq_shutdown_and_deactivate(desc); } raw_spin_unlock_irq(&desc->lock); } @@ -169,7 +169,7 @@ int probe_irq_off(unsigned long val) nr_of_irqs++; } desc->istate &= ~IRQS_AUTODETECT; - irq_shutdown(desc); + irq_shutdown_and_deactivate(desc); } raw_spin_unlock_irq(&desc->lock); } diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 34e969069488..3ff4a1260885 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -314,6 +314,12 @@ void irq_shutdown(struct irq_desc *desc) } irq_state_clr_started(desc); } +} + + +void irq_shutdown_and_deactivate(struct irq_desc *desc) +{ + irq_shutdown(desc); /* * This must be called even if the interrupt was never started up, * because the activation can happen before the interrupt is @@ -730,6 +736,37 @@ out: EXPORT_SYMBOL_GPL(handle_fasteoi_irq); /** + * handle_fasteoi_nmi - irq handler for NMI interrupt lines + * @desc: the interrupt description structure for this irq + * + * A simple NMI-safe handler, considering the restrictions + * from request_nmi. + * + * Only a single callback will be issued to the chip: an ->eoi() + * call when the interrupt has been serviced. This enables support + * for modern forms of interrupt handlers, which handle the flow + * details in hardware, transparently. + */ +void handle_fasteoi_nmi(struct irq_desc *desc) +{ + struct irq_chip *chip = irq_desc_get_chip(desc); + struct irqaction *action = desc->action; + unsigned int irq = irq_desc_get_irq(desc); + irqreturn_t res; + + trace_irq_handler_entry(irq, action); + /* + * NMIs cannot be shared, there is only one action. + */ + res = action->handler(irq, action->dev_id); + trace_irq_handler_exit(irq, action, res); + + if (chip->irq_eoi) + chip->irq_eoi(&desc->irq_data); +} +EXPORT_SYMBOL_GPL(handle_fasteoi_nmi); + +/** * handle_edge_irq - edge type IRQ handler * @desc: the interrupt description structure for this irq * @@ -855,7 +892,11 @@ void handle_percpu_irq(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); - kstat_incr_irqs_this_cpu(desc); + /* + * PER CPU interrupts are not serialized. Do not touch + * desc->tot_count. + */ + __kstat_incr_irqs_this_cpu(desc); if (chip->irq_ack) chip->irq_ack(&desc->irq_data); @@ -884,7 +925,11 @@ void handle_percpu_devid_irq(struct irq_desc *desc) unsigned int irq = irq_desc_get_irq(desc); irqreturn_t res; - kstat_incr_irqs_this_cpu(desc); + /* + * PER CPU interrupts are not serialized. Do not touch + * desc->tot_count. + */ + __kstat_incr_irqs_this_cpu(desc); if (chip->irq_ack) chip->irq_ack(&desc->irq_data); @@ -908,6 +953,29 @@ void handle_percpu_devid_irq(struct irq_desc *desc) chip->irq_eoi(&desc->irq_data); } +/** + * handle_percpu_devid_fasteoi_nmi - Per CPU local NMI handler with per cpu + * dev ids + * @desc: the interrupt description structure for this irq + * + * Similar to handle_fasteoi_nmi, but handling the dev_id cookie + * as a percpu pointer. + */ +void handle_percpu_devid_fasteoi_nmi(struct irq_desc *desc) +{ + struct irq_chip *chip = irq_desc_get_chip(desc); + struct irqaction *action = desc->action; + unsigned int irq = irq_desc_get_irq(desc); + irqreturn_t res; + + trace_irq_handler_entry(irq, action); + res = action->handler(irq, raw_cpu_ptr(action->percpu_dev_id)); + trace_irq_handler_exit(irq, action, res); + + if (chip->irq_eoi) + chip->irq_eoi(&desc->irq_data); +} + static void __irq_do_set_handler(struct irq_desc *desc, irq_flow_handler_t handle, int is_chained, const char *name) @@ -1278,6 +1346,17 @@ void irq_chip_mask_parent(struct irq_data *data) EXPORT_SYMBOL_GPL(irq_chip_mask_parent); /** + * irq_chip_mask_ack_parent - Mask and acknowledge the parent interrupt + * @data: Pointer to interrupt specific data + */ +void irq_chip_mask_ack_parent(struct irq_data *data) +{ + data = data->parent_data; + data->chip->irq_mask_ack(data); +} +EXPORT_SYMBOL_GPL(irq_chip_mask_ack_parent); + +/** * irq_chip_unmask_parent - Unmask the parent interrupt * @data: Pointer to interrupt specific data */ @@ -1376,11 +1455,43 @@ int irq_chip_set_vcpu_affinity_parent(struct irq_data *data, void *vcpu_info) int irq_chip_set_wake_parent(struct irq_data *data, unsigned int on) { data = data->parent_data; + + if (data->chip->flags & IRQCHIP_SKIP_SET_WAKE) + return 0; + if (data->chip->irq_set_wake) return data->chip->irq_set_wake(data, on); return -ENOSYS; } +EXPORT_SYMBOL_GPL(irq_chip_set_wake_parent); + +/** + * irq_chip_request_resources_parent - Request resources on the parent interrupt + * @data: Pointer to interrupt specific data + */ +int irq_chip_request_resources_parent(struct irq_data *data) +{ + data = data->parent_data; + + if (data->chip->irq_request_resources) + return data->chip->irq_request_resources(data); + + return -ENOSYS; +} +EXPORT_SYMBOL_GPL(irq_chip_request_resources_parent); + +/** + * irq_chip_release_resources_parent - Release resources on the parent interrupt + * @data: Pointer to interrupt specific data + */ +void irq_chip_release_resources_parent(struct irq_data *data) +{ + data = data->parent_data; + if (data->chip->irq_release_resources) + data->chip->irq_release_resources(data); +} +EXPORT_SYMBOL_GPL(irq_chip_release_resources_parent); #endif /** diff --git a/kernel/irq/cpuhotplug.c b/kernel/irq/cpuhotplug.c index 5b1072e394b2..6c7ca2e983a5 100644 --- a/kernel/irq/cpuhotplug.c +++ b/kernel/irq/cpuhotplug.c @@ -116,7 +116,7 @@ static bool migrate_one_irq(struct irq_desc *desc) */ if (irqd_affinity_is_managed(d)) { irqd_set_managed_shutdown(d); - irq_shutdown(desc); + irq_shutdown_and_deactivate(desc); return false; } affinity = cpu_online_mask; diff --git a/kernel/irq/debugfs.c b/kernel/irq/debugfs.c index 6f636136cccc..c1eccd4f6520 100644 --- a/kernel/irq/debugfs.c +++ b/kernel/irq/debugfs.c @@ -56,6 +56,7 @@ static const struct irq_bit_descr irqchip_flags[] = { BIT_MASK_DESCR(IRQCHIP_ONESHOT_SAFE), BIT_MASK_DESCR(IRQCHIP_EOI_THREADED), BIT_MASK_DESCR(IRQCHIP_SUPPORTS_LEVEL_MSI), + BIT_MASK_DESCR(IRQCHIP_SUPPORTS_NMI), }; static void @@ -140,6 +141,7 @@ static const struct irq_bit_descr irqdesc_istates[] = { BIT_MASK_DESCR(IRQS_WAITING), BIT_MASK_DESCR(IRQS_PENDING), BIT_MASK_DESCR(IRQS_SUSPENDED), + BIT_MASK_DESCR(IRQS_NMI), }; @@ -150,7 +152,7 @@ static int irq_debug_show(struct seq_file *m, void *p) raw_spin_lock_irq(&desc->lock); data = irq_desc_get_irq_data(desc); - seq_printf(m, "handler: %pf\n", desc->handle_irq); + seq_printf(m, "handler: %ps\n", desc->handle_irq); seq_printf(m, "device: %s\n", desc->dev_name); seq_printf(m, "status: 0x%08x\n", desc->status_use_accessors); irq_debug_show_bits(m, 0, desc->status_use_accessors, irqdesc_states, @@ -203,8 +205,8 @@ static ssize_t irq_debug_write(struct file *file, const char __user *user_buf, chip_bus_lock(desc); raw_spin_lock_irqsave(&desc->lock, flags); - if (irq_settings_is_level(desc)) { - /* Can't do level, sorry */ + if (irq_settings_is_level(desc) || desc->istate & IRQS_NMI) { + /* Can't do level nor NMIs, sorry */ err = -EINVAL; } else { desc->istate |= IRQS_PENDING; @@ -256,8 +258,6 @@ static int __init irq_debugfs_init(void) int irq; root_dir = debugfs_create_dir("irq", NULL); - if (!root_dir) - return -ENOMEM; irq_domain_debugfs_init(root_dir); diff --git a/kernel/irq/devres.c b/kernel/irq/devres.c index 5d5378ea0afe..f6e5515ee077 100644 --- a/kernel/irq/devres.c +++ b/kernel/irq/devres.c @@ -84,8 +84,6 @@ EXPORT_SYMBOL(devm_request_threaded_irq); * @dev: device to request interrupt for * @irq: Interrupt line to allocate * @handler: Function to be called when the IRQ occurs - * @thread_fn: function to be called in a threaded interrupt context. NULL - * for devices which handle everything in @handler * @irqflags: Interrupt type flags * @devname: An ascii name for the claiming device, dev_name(dev) if NULL * @dev_id: A cookie passed back to the handler function @@ -222,9 +220,8 @@ devm_irq_alloc_generic_chip(struct device *dev, const char *name, int num_ct, irq_flow_handler_t handler) { struct irq_chip_generic *gc; - unsigned long sz = sizeof(*gc) + num_ct * sizeof(struct irq_chip_type); - gc = devm_kzalloc(dev, sz, GFP_KERNEL); + gc = devm_kzalloc(dev, struct_size(gc, chip_types, num_ct), GFP_KERNEL); if (gc) irq_init_generic_chip(gc, name, num_ct, irq_base, reg_base, handler); diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 38554bc35375..a4ace611f47f 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -149,7 +149,7 @@ irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc, unsigned int *flags res = action->handler(irq, action->dev_id); trace_irq_handler_exit(irq, action, res); - if (WARN_ONCE(!irqs_disabled(),"irq %u handler %pF enabled interrupts\n", + if (WARN_ONCE(!irqs_disabled(),"irq %u handler %pS enabled interrupts\n", irq, action->handler)) local_irq_disable(); @@ -166,7 +166,7 @@ irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc, unsigned int *flags __irq_wake_thread(desc, action); - /* Fall through to add to randomness */ + /* Fall through - to add to randomness */ case IRQ_HANDLED: *flags |= action->flags; break; diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index ca6afa267070..3a948f41ab00 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -49,6 +49,7 @@ enum { * IRQS_WAITING - irq is waiting * IRQS_PENDING - irq is pending and replayed later * IRQS_SUSPENDED - irq is suspended + * IRQS_NMI - irq line is used to deliver NMIs */ enum { IRQS_AUTODETECT = 0x00000001, @@ -60,6 +61,7 @@ enum { IRQS_PENDING = 0x00000200, IRQS_SUSPENDED = 0x00000800, IRQS_TIMINGS = 0x00001000, + IRQS_NMI = 0x00002000, }; #include "debug.h" @@ -80,6 +82,7 @@ extern int irq_activate_and_startup(struct irq_desc *desc, bool resend); extern int irq_startup(struct irq_desc *desc, bool resend, bool force); extern void irq_shutdown(struct irq_desc *desc); +extern void irq_shutdown_and_deactivate(struct irq_desc *desc); extern void irq_enable(struct irq_desc *desc); extern void irq_disable(struct irq_desc *desc); extern void irq_percpu_enable(struct irq_desc *desc, unsigned int cpu); @@ -94,6 +97,10 @@ static inline void irq_mark_irq(unsigned int irq) { } extern void irq_mark_irq(unsigned int irq); #endif +extern int __irq_get_irqchip_state(struct irq_data *data, + enum irqchip_irq_state which, + bool *state); + extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr); irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc, unsigned int *flags); @@ -242,12 +249,18 @@ static inline void irq_state_set_masked(struct irq_desc *desc) #undef __irqd_to_state -static inline void kstat_incr_irqs_this_cpu(struct irq_desc *desc) +static inline void __kstat_incr_irqs_this_cpu(struct irq_desc *desc) { __this_cpu_inc(*desc->kstat_irqs); __this_cpu_inc(kstat.irqs_sum); } +static inline void kstat_incr_irqs_this_cpu(struct irq_desc *desc) +{ + __kstat_incr_irqs_this_cpu(desc); + desc->tot_count++; +} + static inline int irq_desc_get_node(struct irq_desc *desc) { return irq_common_data_get_node(&desc->irq_common_data); diff --git a/kernel/irq/irq_sim.c b/kernel/irq/irq_sim.c index 98a20e1594ce..b992f88c5613 100644 --- a/kernel/irq/irq_sim.c +++ b/kernel/irq/irq_sim.c @@ -25,10 +25,22 @@ static void irq_sim_irqunmask(struct irq_data *data) irq_ctx->enabled = true; } +static int irq_sim_set_type(struct irq_data *data, unsigned int type) +{ + /* We only support rising and falling edge trigger types. */ + if (type & ~IRQ_TYPE_EDGE_BOTH) + return -EINVAL; + + irqd_set_trigger_type(data, type); + + return 0; +} + static struct irq_chip irq_sim_irqchip = { .name = "irq_sim", .irq_mask = irq_sim_irqmask, .irq_unmask = irq_sim_irqunmask, + .irq_set_type = irq_sim_set_type, }; static void irq_sim_handle_irq(struct irq_work *work) diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index ef8ad36cadcf..c52b737ab8e3 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -119,6 +119,7 @@ static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node, desc->depth = 1; desc->irq_count = 0; desc->irqs_unhandled = 0; + desc->tot_count = 0; desc->name = NULL; desc->owner = owner; for_each_possible_cpu(cpu) @@ -274,11 +275,12 @@ static struct attribute *irq_attrs[] = { &actions_attr.attr, NULL }; +ATTRIBUTE_GROUPS(irq); static struct kobj_type irq_kobj_type = { .release = irq_kobj_release, .sysfs_ops = &kobj_sysfs_ops, - .default_attrs = irq_attrs, + .default_groups = irq_groups, }; static void irq_sysfs_add(int irq, struct irq_desc *desc) @@ -557,6 +559,7 @@ int __init early_irq_init(void) alloc_masks(&desc[i], node); raw_spin_lock_init(&desc[i].lock); lockdep_set_class(&desc[i].lock, &irq_desc_lock_class); + mutex_init(&desc[i].request_mutex); desc_set_defaults(i, &desc[i], node, NULL, NULL); } return arch_early_irq_init(); @@ -669,6 +672,41 @@ int __handle_domain_irq(struct irq_domain *domain, unsigned int hwirq, set_irq_regs(old_regs); return ret; } + +#ifdef CONFIG_IRQ_DOMAIN +/** + * handle_domain_nmi - Invoke the handler for a HW irq belonging to a domain + * @domain: The domain where to perform the lookup + * @hwirq: The HW irq number to convert to a logical one + * @regs: Register file coming from the low-level handling code + * + * Returns: 0 on success, or -EINVAL if conversion has failed + */ +int handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq, + struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + unsigned int irq; + int ret = 0; + + nmi_enter(); + + irq = irq_find_mapping(domain, hwirq); + + /* + * ack_bad_irq is not NMI-safe, just report + * an invalid interrupt. + */ + if (likely(irq)) + generic_handle_irq(irq); + else + ret = -EINVAL; + + nmi_exit(); + set_irq_regs(old_regs); + return ret; +} +#endif #endif /* Dynamic interrupt handling */ @@ -919,11 +957,15 @@ unsigned int kstat_irqs_cpu(unsigned int irq, int cpu) unsigned int kstat_irqs(unsigned int irq) { struct irq_desc *desc = irq_to_desc(irq); - int cpu; unsigned int sum = 0; + int cpu; if (!desc || !desc->kstat_irqs) return 0; + if (!irq_settings_is_per_cpu_devid(desc) && + !irq_settings_is_per_cpu(desc)) + return desc->tot_count; + for_each_possible_cpu(cpu) sum += *per_cpu_ptr(desc->kstat_irqs, cpu); return sum; diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 8b0be4bd6565..a453e229f99c 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -458,6 +458,20 @@ void irq_set_default_host(struct irq_domain *domain) } EXPORT_SYMBOL_GPL(irq_set_default_host); +/** + * irq_get_default_host() - Retrieve the "default" irq domain + * + * Returns: the default domain, if any. + * + * Modern code should never use this. This should only be used on + * systems that cannot implement a firmware->fwnode mapping (which + * both DT and ACPI provide). + */ +struct irq_domain *irq_get_default_host(void) +{ + return irq_default_domain; +} + static void irq_domain_clear_mapping(struct irq_domain *domain, irq_hw_number_t hwirq) { @@ -729,16 +743,17 @@ static int irq_domain_translate(struct irq_domain *d, return 0; } -static void of_phandle_args_to_fwspec(struct of_phandle_args *irq_data, +static void of_phandle_args_to_fwspec(struct device_node *np, const u32 *args, + unsigned int count, struct irq_fwspec *fwspec) { int i; - fwspec->fwnode = irq_data->np ? &irq_data->np->fwnode : NULL; - fwspec->param_count = irq_data->args_count; + fwspec->fwnode = np ? &np->fwnode : NULL; + fwspec->param_count = count; - for (i = 0; i < irq_data->args_count; i++) - fwspec->param[i] = irq_data->args[i]; + for (i = 0; i < count; i++) + fwspec->param[i] = args[i]; } unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec) @@ -836,7 +851,9 @@ unsigned int irq_create_of_mapping(struct of_phandle_args *irq_data) { struct irq_fwspec fwspec; - of_phandle_args_to_fwspec(irq_data, &fwspec); + of_phandle_args_to_fwspec(irq_data->np, irq_data->args, + irq_data->args_count, &fwspec); + return irq_create_fwspec_mapping(&fwspec); } EXPORT_SYMBOL_GPL(irq_create_of_mapping); @@ -928,11 +945,10 @@ int irq_domain_xlate_twocell(struct irq_domain *d, struct device_node *ctrlr, const u32 *intspec, unsigned int intsize, irq_hw_number_t *out_hwirq, unsigned int *out_type) { - if (WARN_ON(intsize < 2)) - return -EINVAL; - *out_hwirq = intspec[0]; - *out_type = intspec[1] & IRQ_TYPE_SENSE_MASK; - return 0; + struct irq_fwspec fwspec; + + of_phandle_args_to_fwspec(ctrlr, intspec, intsize, &fwspec); + return irq_domain_translate_twocell(d, &fwspec, out_hwirq, out_type); } EXPORT_SYMBOL_GPL(irq_domain_xlate_twocell); @@ -968,6 +984,27 @@ const struct irq_domain_ops irq_domain_simple_ops = { }; EXPORT_SYMBOL_GPL(irq_domain_simple_ops); +/** + * irq_domain_translate_twocell() - Generic translate for direct two cell + * bindings + * + * Device Tree IRQ specifier translation function which works with two cell + * bindings where the cell values map directly to the hwirq number + * and linux irq flags. + */ +int irq_domain_translate_twocell(struct irq_domain *d, + struct irq_fwspec *fwspec, + unsigned long *out_hwirq, + unsigned int *out_type) +{ + if (WARN_ON(fwspec->param_count < 2)) + return -EINVAL; + *out_hwirq = fwspec->param[0]; + *out_type = fwspec->param[1] & IRQ_TYPE_SENSE_MASK; + return 0; +} +EXPORT_SYMBOL_GPL(irq_domain_translate_twocell); + int irq_domain_alloc_descs(int virq, unsigned int cnt, irq_hw_number_t hwirq, int node, const struct irq_affinity_desc *affinity) { @@ -1260,7 +1297,7 @@ int irq_domain_alloc_irqs_hierarchy(struct irq_domain *domain, /** * __irq_domain_alloc_irqs - Allocate IRQs from domain * @domain: domain to allocate from - * @irq_base: allocate specified IRQ nubmer if irq_base >= 0 + * @irq_base: allocate specified IRQ number if irq_base >= 0 * @nr_irqs: number of IRQs to allocate * @node: NUMA node id for memory allocation * @arg: domain specific argument @@ -1749,8 +1786,6 @@ void __init irq_domain_debugfs_init(struct dentry *root) struct irq_domain *d; domain_dir = debugfs_create_dir("domains", root); - if (!domain_dir) - return; debugfs_create_file("default", 0444, domain_dir, NULL, &irq_domain_debug_fops); diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 84b54a17b95d..e8f7f179bf77 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -13,6 +13,7 @@ #include <linux/module.h> #include <linux/random.h> #include <linux/interrupt.h> +#include <linux/irqdomain.h> #include <linux/slab.h> #include <linux/sched.h> #include <linux/sched/rt.h> @@ -34,8 +35,9 @@ static int __init setup_forced_irqthreads(char *arg) early_param("threadirqs", setup_forced_irqthreads); #endif -static void __synchronize_hardirq(struct irq_desc *desc) +static void __synchronize_hardirq(struct irq_desc *desc, bool sync_chip) { + struct irq_data *irqd = irq_desc_get_irq_data(desc); bool inprogress; do { @@ -51,6 +53,20 @@ static void __synchronize_hardirq(struct irq_desc *desc) /* Ok, that indicated we're done: double-check carefully. */ raw_spin_lock_irqsave(&desc->lock, flags); inprogress = irqd_irq_inprogress(&desc->irq_data); + + /* + * If requested and supported, check at the chip whether it + * is in flight at the hardware level, i.e. already pending + * in a CPU and waiting for service and acknowledge. + */ + if (!inprogress && sync_chip) { + /* + * Ignore the return code. inprogress is only updated + * when the chip supports it. + */ + __irq_get_irqchip_state(irqd, IRQCHIP_STATE_ACTIVE, + &inprogress); + } raw_spin_unlock_irqrestore(&desc->lock, flags); /* Oops, that failed? */ @@ -73,13 +89,18 @@ static void __synchronize_hardirq(struct irq_desc *desc) * Returns: false if a threaded handler is active. * * This function may be called - with care - from IRQ context. + * + * It does not check whether there is an interrupt in flight at the + * hardware level, but not serviced yet, as this might deadlock when + * called with interrupts disabled and the target CPU of the interrupt + * is the current CPU. */ bool synchronize_hardirq(unsigned int irq) { struct irq_desc *desc = irq_to_desc(irq); if (desc) { - __synchronize_hardirq(desc); + __synchronize_hardirq(desc, false); return !atomic_read(&desc->threads_active); } @@ -95,14 +116,19 @@ EXPORT_SYMBOL(synchronize_hardirq); * to complete before returning. If you use this function while * holding a resource the IRQ handler may need you will deadlock. * - * This function may be called - with care - from IRQ context. + * Can only be called from preemptible code as it might sleep when + * an interrupt thread is associated to @irq. + * + * It optionally makes sure (when the irq chip supports that method) + * that the interrupt is not pending in any CPU and waiting for + * service. */ void synchronize_irq(unsigned int irq) { struct irq_desc *desc = irq_to_desc(irq); if (desc) { - __synchronize_hardirq(desc); + __synchronize_hardirq(desc, true); /* * We made sure that no hardirq handler is * running. Now verify that no threaded handlers are @@ -196,6 +222,7 @@ int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask, case IRQ_SET_MASK_OK: case IRQ_SET_MASK_OK_DONE: cpumask_copy(desc->irq_common_data.affinity, mask); + /* fall through */ case IRQ_SET_MASK_OK_NOCOPY: irq_validate_effective_affinity(data); irq_set_thread_affinity(desc); @@ -341,7 +368,7 @@ irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify) /* The release function is promised process context */ might_sleep(); - if (!desc) + if (!desc || desc->istate & IRQS_NMI) return -EINVAL; /* Complete initialisation of *notify */ @@ -356,8 +383,10 @@ irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify) desc->affinity_notify = notify; raw_spin_unlock_irqrestore(&desc->lock, flags); - if (old_notify) + if (old_notify) { + cancel_work_sync(&old_notify->work); kref_put(&old_notify->kref, old_notify->release); + } return 0; } @@ -553,6 +582,21 @@ bool disable_hardirq(unsigned int irq) } EXPORT_SYMBOL_GPL(disable_hardirq); +/** + * disable_nmi_nosync - disable an nmi without waiting + * @irq: Interrupt to disable + * + * Disable the selected interrupt line. Disables and enables are + * nested. + * The interrupt to disable must have been requested through request_nmi. + * Unlike disable_nmi(), this function does not ensure existing + * instances of the IRQ handler have completed before returning. + */ +void disable_nmi_nosync(unsigned int irq) +{ + disable_irq_nosync(irq); +} + void __enable_irq(struct irq_desc *desc) { switch (desc->depth) { @@ -609,6 +653,20 @@ out: } EXPORT_SYMBOL(enable_irq); +/** + * enable_nmi - enable handling of an nmi + * @irq: Interrupt to enable + * + * The interrupt to enable must have been requested through request_nmi. + * Undoes the effect of one call to disable_nmi(). If this + * matches the last disable, processing of interrupts on this + * IRQ line is re-enabled. + */ +void enable_nmi(unsigned int irq) +{ + enable_irq(irq); +} + static int set_irq_wake_real(unsigned int irq, unsigned int on) { struct irq_desc *desc = irq_to_desc(irq); @@ -644,6 +702,12 @@ int irq_set_irq_wake(unsigned int irq, unsigned int on) if (!desc) return -EINVAL; + /* Don't use NMIs as wake up interrupts please */ + if (desc->istate & IRQS_NMI) { + ret = -EINVAL; + goto out_unlock; + } + /* wakeup-capable irqs can be shared between drivers that * don't need to have the same sleep mode behaviors. */ @@ -666,6 +730,8 @@ int irq_set_irq_wake(unsigned int irq, unsigned int on) irqd_clear(&desc->irq_data, IRQD_WAKEUP_STATE); } } + +out_unlock: irq_put_desc_busunlock(desc, flags); return ret; } @@ -726,6 +792,7 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned long flags) case IRQ_SET_MASK_OK_DONE: irqd_clear(&desc->irq_data, IRQD_TRIGGER_MASK); irqd_set(&desc->irq_data, flags); + /* fall through */ case IRQ_SET_MASK_OK_NOCOPY: flags = irqd_get_trigger_type(&desc->irq_data); @@ -740,7 +807,7 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned long flags) ret = 0; break; default: - pr_err("Setting trigger mode %lu for irq %u failed (%pF)\n", + pr_err("Setting trigger mode %lu for irq %u failed (%pS)\n", flags, irq_desc_get_irq(desc), chip->irq_set_type); } if (unmask) @@ -1128,6 +1195,39 @@ static void irq_release_resources(struct irq_desc *desc) c->irq_release_resources(d); } +static bool irq_supports_nmi(struct irq_desc *desc) +{ + struct irq_data *d = irq_desc_get_irq_data(desc); + +#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY + /* Only IRQs directly managed by the root irqchip can be set as NMI */ + if (d->parent_data) + return false; +#endif + /* Don't support NMIs for chips behind a slow bus */ + if (d->chip->irq_bus_lock || d->chip->irq_bus_sync_unlock) + return false; + + return d->chip->flags & IRQCHIP_SUPPORTS_NMI; +} + +static int irq_nmi_setup(struct irq_desc *desc) +{ + struct irq_data *d = irq_desc_get_irq_data(desc); + struct irq_chip *c = d->chip; + + return c->irq_nmi_setup ? c->irq_nmi_setup(d) : -EINVAL; +} + +static void irq_nmi_teardown(struct irq_desc *desc) +{ + struct irq_data *d = irq_desc_get_irq_data(desc); + struct irq_chip *c = d->chip; + + if (c->irq_nmi_teardown) + c->irq_nmi_teardown(d); +} + static int setup_irq_thread(struct irqaction *new, unsigned int irq, bool secondary) { @@ -1302,9 +1402,17 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) * fields must have IRQF_SHARED set and the bits which * set the trigger type must match. Also all must * agree on ONESHOT. + * Interrupt lines used for NMIs cannot be shared. */ unsigned int oldtype; + if (desc->istate & IRQS_NMI) { + pr_err("Invalid attempt to share NMI for %s (irq %d) on irqchip %s.\n", + new->name, irq, desc->irq_data.chip->name); + ret = -EINVAL; + goto out_unlock; + } + /* * If nobody did set the configuration before, inherit * the one provided by the requester. @@ -1617,6 +1725,7 @@ static struct irqaction *__free_irq(struct irq_desc *desc, void *dev_id) /* If this was the last handler, shut down the IRQ line: */ if (!desc->action) { irq_settings_clr_disable_unlazy(desc); + /* Only shutdown. Deactivate after synchronize_hardirq() */ irq_shutdown(desc); } @@ -1645,8 +1754,12 @@ static struct irqaction *__free_irq(struct irq_desc *desc, void *dev_id) unregister_handler_proc(irq, action); - /* Make sure it's not being used on another CPU: */ - synchronize_hardirq(irq); + /* + * Make sure it's not being used on another CPU and if the chip + * supports it also make sure that there is no (not yet serviced) + * interrupt in flight at the hardware level. + */ + __synchronize_hardirq(desc, true); #ifdef CONFIG_DEBUG_SHIRQ /* @@ -1686,6 +1799,14 @@ static struct irqaction *__free_irq(struct irq_desc *desc, void *dev_id) * require it to deallocate resources over the slow bus. */ chip_bus_lock(desc); + /* + * There is no interrupt on the fly anymore. Deactivate it + * completely. + */ + raw_spin_lock_irqsave(&desc->lock, flags); + irq_domain_deactivate_irq(&desc->irq_data); + raw_spin_unlock_irqrestore(&desc->lock, flags); + irq_release_resources(desc); chip_bus_sync_unlock(desc); irq_remove_timings(desc); @@ -1756,6 +1877,59 @@ const void *free_irq(unsigned int irq, void *dev_id) } EXPORT_SYMBOL(free_irq); +/* This function must be called with desc->lock held */ +static const void *__cleanup_nmi(unsigned int irq, struct irq_desc *desc) +{ + const char *devname = NULL; + + desc->istate &= ~IRQS_NMI; + + if (!WARN_ON(desc->action == NULL)) { + irq_pm_remove_action(desc, desc->action); + devname = desc->action->name; + unregister_handler_proc(irq, desc->action); + + kfree(desc->action); + desc->action = NULL; + } + + irq_settings_clr_disable_unlazy(desc); + irq_shutdown_and_deactivate(desc); + + irq_release_resources(desc); + + irq_chip_pm_put(&desc->irq_data); + module_put(desc->owner); + + return devname; +} + +const void *free_nmi(unsigned int irq, void *dev_id) +{ + struct irq_desc *desc = irq_to_desc(irq); + unsigned long flags; + const void *devname; + + if (!desc || WARN_ON(!(desc->istate & IRQS_NMI))) + return NULL; + + if (WARN_ON(irq_settings_is_per_cpu_devid(desc))) + return NULL; + + /* NMI still enabled */ + if (WARN_ON(desc->depth == 0)) + disable_nmi_nosync(irq); + + raw_spin_lock_irqsave(&desc->lock, flags); + + irq_nmi_teardown(desc); + devname = __cleanup_nmi(irq, desc); + + raw_spin_unlock_irqrestore(&desc->lock, flags); + + return devname; +} + /** * request_threaded_irq - allocate an interrupt line * @irq: Interrupt line to allocate @@ -1925,6 +2099,101 @@ int request_any_context_irq(unsigned int irq, irq_handler_t handler, } EXPORT_SYMBOL_GPL(request_any_context_irq); +/** + * request_nmi - allocate an interrupt line for NMI delivery + * @irq: Interrupt line to allocate + * @handler: Function to be called when the IRQ occurs. + * Threaded handler for threaded interrupts. + * @irqflags: Interrupt type flags + * @name: An ascii name for the claiming device + * @dev_id: A cookie passed back to the handler function + * + * This call allocates interrupt resources and enables the + * interrupt line and IRQ handling. It sets up the IRQ line + * to be handled as an NMI. + * + * An interrupt line delivering NMIs cannot be shared and IRQ handling + * cannot be threaded. + * + * Interrupt lines requested for NMI delivering must produce per cpu + * interrupts and have auto enabling setting disabled. + * + * Dev_id must be globally unique. Normally the address of the + * device data structure is used as the cookie. Since the handler + * receives this value it makes sense to use it. + * + * If the interrupt line cannot be used to deliver NMIs, function + * will fail and return a negative value. + */ +int request_nmi(unsigned int irq, irq_handler_t handler, + unsigned long irqflags, const char *name, void *dev_id) +{ + struct irqaction *action; + struct irq_desc *desc; + unsigned long flags; + int retval; + + if (irq == IRQ_NOTCONNECTED) + return -ENOTCONN; + + /* NMI cannot be shared, used for Polling */ + if (irqflags & (IRQF_SHARED | IRQF_COND_SUSPEND | IRQF_IRQPOLL)) + return -EINVAL; + + if (!(irqflags & IRQF_PERCPU)) + return -EINVAL; + + if (!handler) + return -EINVAL; + + desc = irq_to_desc(irq); + + if (!desc || irq_settings_can_autoenable(desc) || + !irq_settings_can_request(desc) || + WARN_ON(irq_settings_is_per_cpu_devid(desc)) || + !irq_supports_nmi(desc)) + return -EINVAL; + + action = kzalloc(sizeof(struct irqaction), GFP_KERNEL); + if (!action) + return -ENOMEM; + + action->handler = handler; + action->flags = irqflags | IRQF_NO_THREAD | IRQF_NOBALANCING; + action->name = name; + action->dev_id = dev_id; + + retval = irq_chip_pm_get(&desc->irq_data); + if (retval < 0) + goto err_out; + + retval = __setup_irq(irq, desc, action); + if (retval) + goto err_irq_setup; + + raw_spin_lock_irqsave(&desc->lock, flags); + + /* Setup NMI state */ + desc->istate |= IRQS_NMI; + retval = irq_nmi_setup(desc); + if (retval) { + __cleanup_nmi(irq, desc); + raw_spin_unlock_irqrestore(&desc->lock, flags); + return -EINVAL; + } + + raw_spin_unlock_irqrestore(&desc->lock, flags); + + return 0; + +err_irq_setup: + irq_chip_pm_put(&desc->irq_data); +err_out: + kfree(action); + + return retval; +} + void enable_percpu_irq(unsigned int irq, unsigned int type) { unsigned int cpu = smp_processor_id(); @@ -1959,6 +2228,11 @@ out: } EXPORT_SYMBOL_GPL(enable_percpu_irq); +void enable_percpu_nmi(unsigned int irq, unsigned int type) +{ + enable_percpu_irq(irq, type); +} + /** * irq_percpu_is_enabled - Check whether the per cpu irq is enabled * @irq: Linux irq number to check for @@ -1998,6 +2272,11 @@ void disable_percpu_irq(unsigned int irq) } EXPORT_SYMBOL_GPL(disable_percpu_irq); +void disable_percpu_nmi(unsigned int irq) +{ + disable_percpu_irq(irq); +} + /* * Internal function to unregister a percpu irqaction. */ @@ -2029,6 +2308,8 @@ static struct irqaction *__free_percpu_irq(unsigned int irq, void __percpu *dev_ /* Found it - now remove it from the list of entries: */ desc->action = NULL; + desc->istate &= ~IRQS_NMI; + raw_spin_unlock_irqrestore(&desc->lock, flags); unregister_handler_proc(irq, action); @@ -2082,6 +2363,19 @@ void free_percpu_irq(unsigned int irq, void __percpu *dev_id) } EXPORT_SYMBOL_GPL(free_percpu_irq); +void free_percpu_nmi(unsigned int irq, void __percpu *dev_id) +{ + struct irq_desc *desc = irq_to_desc(irq); + + if (!desc || !irq_settings_is_per_cpu_devid(desc)) + return; + + if (WARN_ON(!(desc->istate & IRQS_NMI))) + return; + + kfree(__free_percpu_irq(irq, dev_id)); +} + /** * setup_percpu_irq - setup a per-cpu interrupt * @irq: Interrupt line to setup @@ -2172,6 +2466,180 @@ int __request_percpu_irq(unsigned int irq, irq_handler_t handler, EXPORT_SYMBOL_GPL(__request_percpu_irq); /** + * request_percpu_nmi - allocate a percpu interrupt line for NMI delivery + * @irq: Interrupt line to allocate + * @handler: Function to be called when the IRQ occurs. + * @name: An ascii name for the claiming device + * @dev_id: A percpu cookie passed back to the handler function + * + * This call allocates interrupt resources for a per CPU NMI. Per CPU NMIs + * have to be setup on each CPU by calling prepare_percpu_nmi() before + * being enabled on the same CPU by using enable_percpu_nmi(). + * + * Dev_id must be globally unique. It is a per-cpu variable, and + * the handler gets called with the interrupted CPU's instance of + * that variable. + * + * Interrupt lines requested for NMI delivering should have auto enabling + * setting disabled. + * + * If the interrupt line cannot be used to deliver NMIs, function + * will fail returning a negative value. + */ +int request_percpu_nmi(unsigned int irq, irq_handler_t handler, + const char *name, void __percpu *dev_id) +{ + struct irqaction *action; + struct irq_desc *desc; + unsigned long flags; + int retval; + + if (!handler) + return -EINVAL; + + desc = irq_to_desc(irq); + + if (!desc || !irq_settings_can_request(desc) || + !irq_settings_is_per_cpu_devid(desc) || + irq_settings_can_autoenable(desc) || + !irq_supports_nmi(desc)) + return -EINVAL; + + /* The line cannot already be NMI */ + if (desc->istate & IRQS_NMI) + return -EINVAL; + + action = kzalloc(sizeof(struct irqaction), GFP_KERNEL); + if (!action) + return -ENOMEM; + + action->handler = handler; + action->flags = IRQF_PERCPU | IRQF_NO_SUSPEND | IRQF_NO_THREAD + | IRQF_NOBALANCING; + action->name = name; + action->percpu_dev_id = dev_id; + + retval = irq_chip_pm_get(&desc->irq_data); + if (retval < 0) + goto err_out; + + retval = __setup_irq(irq, desc, action); + if (retval) + goto err_irq_setup; + + raw_spin_lock_irqsave(&desc->lock, flags); + desc->istate |= IRQS_NMI; + raw_spin_unlock_irqrestore(&desc->lock, flags); + + return 0; + +err_irq_setup: + irq_chip_pm_put(&desc->irq_data); +err_out: + kfree(action); + + return retval; +} + +/** + * prepare_percpu_nmi - performs CPU local setup for NMI delivery + * @irq: Interrupt line to prepare for NMI delivery + * + * This call prepares an interrupt line to deliver NMI on the current CPU, + * before that interrupt line gets enabled with enable_percpu_nmi(). + * + * As a CPU local operation, this should be called from non-preemptible + * context. + * + * If the interrupt line cannot be used to deliver NMIs, function + * will fail returning a negative value. + */ +int prepare_percpu_nmi(unsigned int irq) +{ + unsigned long flags; + struct irq_desc *desc; + int ret = 0; + + WARN_ON(preemptible()); + + desc = irq_get_desc_lock(irq, &flags, + IRQ_GET_DESC_CHECK_PERCPU); + if (!desc) + return -EINVAL; + + if (WARN(!(desc->istate & IRQS_NMI), + KERN_ERR "prepare_percpu_nmi called for a non-NMI interrupt: irq %u\n", + irq)) { + ret = -EINVAL; + goto out; + } + + ret = irq_nmi_setup(desc); + if (ret) { + pr_err("Failed to setup NMI delivery: irq %u\n", irq); + goto out; + } + +out: + irq_put_desc_unlock(desc, flags); + return ret; +} + +/** + * teardown_percpu_nmi - undoes NMI setup of IRQ line + * @irq: Interrupt line from which CPU local NMI configuration should be + * removed + * + * This call undoes the setup done by prepare_percpu_nmi(). + * + * IRQ line should not be enabled for the current CPU. + * + * As a CPU local operation, this should be called from non-preemptible + * context. + */ +void teardown_percpu_nmi(unsigned int irq) +{ + unsigned long flags; + struct irq_desc *desc; + + WARN_ON(preemptible()); + + desc = irq_get_desc_lock(irq, &flags, + IRQ_GET_DESC_CHECK_PERCPU); + if (!desc) + return; + + if (WARN_ON(!(desc->istate & IRQS_NMI))) + goto out; + + irq_nmi_teardown(desc); +out: + irq_put_desc_unlock(desc, flags); +} + +int __irq_get_irqchip_state(struct irq_data *data, enum irqchip_irq_state which, + bool *state) +{ + struct irq_chip *chip; + int err = -EINVAL; + + do { + chip = irq_data_get_irq_chip(data); + if (chip->irq_get_irqchip_state) + break; +#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY + data = data->parent_data; +#else + data = NULL; +#endif + } while (data); + + if (data) + err = chip->irq_get_irqchip_state(data, which, state); + return err; +} + +/** * irq_get_irqchip_state - returns the irqchip state of a interrupt. * @irq: Interrupt line that is forwarded to a VM * @which: One of IRQCHIP_STATE_* the caller wants to know about @@ -2189,7 +2657,6 @@ int irq_get_irqchip_state(unsigned int irq, enum irqchip_irq_state which, { struct irq_desc *desc; struct irq_data *data; - struct irq_chip *chip; unsigned long flags; int err = -EINVAL; @@ -2199,19 +2666,7 @@ int irq_get_irqchip_state(unsigned int irq, enum irqchip_irq_state which, data = irq_desc_get_irq_data(desc); - do { - chip = irq_data_get_irq_chip(data); - if (chip->irq_get_irqchip_state) - break; -#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY - data = data->parent_data; -#else - data = NULL; -#endif - } while (data); - - if (data) - err = chip->irq_get_irqchip_state(data, which, state); + err = __irq_get_irqchip_state(data, which, state); irq_put_desc_busunlock(desc, flags); return err; diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c index 6d2fa6914b30..2ed97a7c9b2a 100644 --- a/kernel/irq/spurious.c +++ b/kernel/irq/spurious.c @@ -212,9 +212,9 @@ static void __report_bad_irq(struct irq_desc *desc, irqreturn_t action_ret) */ raw_spin_lock_irqsave(&desc->lock, flags); for_each_action_of_desc(desc, action) { - printk(KERN_ERR "[<%p>] %pf", action->handler, action->handler); + printk(KERN_ERR "[<%p>] %ps", action->handler, action->handler); if (action->thread_fn) - printk(KERN_CONT " threaded [<%p>] %pf", + printk(KERN_CONT " threaded [<%p>] %ps", action->thread_fn, action->thread_fn); printk(KERN_CONT "\n"); } diff --git a/kernel/irq/timings.c b/kernel/irq/timings.c index 1e4cb63a5c82..90c735da15d0 100644 --- a/kernel/irq/timings.c +++ b/kernel/irq/timings.c @@ -9,6 +9,7 @@ #include <linux/idr.h> #include <linux/irq.h> #include <linux/math64.h> +#include <linux/log2.h> #include <trace/events/irq.h> @@ -18,16 +19,6 @@ DEFINE_STATIC_KEY_FALSE(irq_timing_enabled); DEFINE_PER_CPU(struct irq_timings, irq_timings); -struct irqt_stat { - u64 next_evt; - u64 last_ts; - u64 variance; - u32 avg; - u32 nr_samples; - int anomalies; - int valid; -}; - static DEFINE_IDR(irqt_stats); void irq_timings_enable(void) @@ -40,75 +31,360 @@ void irq_timings_disable(void) static_branch_disable(&irq_timing_enabled); } -/** - * irqs_update - update the irq timing statistics with a new timestamp +/* + * The main goal of this algorithm is to predict the next interrupt + * occurrence on the current CPU. + * + * Currently, the interrupt timings are stored in a circular array + * buffer every time there is an interrupt, as a tuple: the interrupt + * number and the associated timestamp when the event occurred <irq, + * timestamp>. + * + * For every interrupt occurring in a short period of time, we can + * measure the elapsed time between the occurrences for the same + * interrupt and we end up with a suite of intervals. The experience + * showed the interrupts are often coming following a periodic + * pattern. + * + * The objective of the algorithm is to find out this periodic pattern + * in a fastest way and use its period to predict the next irq event. + * + * When the next interrupt event is requested, we are in the situation + * where the interrupts are disabled and the circular buffer + * containing the timings is filled with the events which happened + * after the previous next-interrupt-event request. + * + * At this point, we read the circular buffer and we fill the irq + * related statistics structure. After this step, the circular array + * containing the timings is empty because all the values are + * dispatched in their corresponding buffers. + * + * Now for each interrupt, we can predict the next event by using the + * suffix array, log interval and exponential moving average + * + * 1. Suffix array + * + * Suffix array is an array of all the suffixes of a string. It is + * widely used as a data structure for compression, text search, ... + * For instance for the word 'banana', the suffixes will be: 'banana' + * 'anana' 'nana' 'ana' 'na' 'a' + * + * Usually, the suffix array is sorted but for our purpose it is + * not necessary and won't provide any improvement in the context of + * the solved problem where we clearly define the boundaries of the + * search by a max period and min period. + * + * The suffix array will build a suite of intervals of different + * length and will look for the repetition of each suite. If the suite + * is repeating then we have the period because it is the length of + * the suite whatever its position in the buffer. + * + * 2. Log interval + * + * We saw the irq timings allow to compute the interval of the + * occurrences for a specific interrupt. We can reasonibly assume the + * longer is the interval, the higher is the error for the next event + * and we can consider storing those interval values into an array + * where each slot in the array correspond to an interval at the power + * of 2 of the index. For example, index 12 will contain values + * between 2^11 and 2^12. + * + * At the end we have an array of values where at each index defines a + * [2^index - 1, 2 ^ index] interval values allowing to store a large + * number of values inside a small array. + * + * For example, if we have the value 1123, then we store it at + * ilog2(1123) = 10 index value. + * + * Storing those value at the specific index is done by computing an + * exponential moving average for this specific slot. For instance, + * for values 1800, 1123, 1453, ... fall under the same slot (10) and + * the exponential moving average is computed every time a new value + * is stored at this slot. + * + * 3. Exponential Moving Average + * + * The EMA is largely used to track a signal for stocks or as a low + * pass filter. The magic of the formula, is it is very simple and the + * reactivity of the average can be tuned with the factors called + * alpha. + * + * The higher the alphas are, the faster the average respond to the + * signal change. In our case, if a slot in the array is a big + * interval, we can have numbers with a big difference between + * them. The impact of those differences in the average computation + * can be tuned by changing the alpha value. + * + * + * -- The algorithm -- + * + * We saw the different processing above, now let's see how they are + * used together. + * + * For each interrupt: + * For each interval: + * Compute the index = ilog2(interval) + * Compute a new_ema(buffer[index], interval) + * Store the index in a circular buffer + * + * Compute the suffix array of the indexes + * + * For each suffix: + * If the suffix is reverse-found 3 times + * Return suffix + * + * Return Not found + * + * However we can not have endless suffix array to be build, it won't + * make sense and it will add an extra overhead, so we can restrict + * this to a maximum suffix length of 5 and a minimum suffix length of + * 2. The experience showed 5 is the majority of the maximum pattern + * period found for different devices. + * + * The result is a pattern finding less than 1us for an interrupt. * - * @irqs: an irqt_stat struct pointer - * @ts: the new timestamp + * Example based on real values: * - * The statistics are computed online, in other words, the code is - * designed to compute the statistics on a stream of values rather - * than doing multiple passes on the values to compute the average, - * then the variance. The integer division introduces a loss of - * precision but with an acceptable error margin regarding the results - * we would have with the double floating precision: we are dealing - * with nanosec, so big numbers, consequently the mantisse is - * negligeable, especially when converting the time in usec - * afterwards. + * Example 1 : MMC write/read interrupt interval: * - * The computation happens at idle time. When the CPU is not idle, the - * interrupts' timestamps are stored in the circular buffer, when the - * CPU goes idle and this routine is called, all the buffer's values - * are injected in the statistical model continuying to extend the - * statistics from the previous busy-idle cycle. + * 223947, 1240, 1384, 1386, 1386, + * 217416, 1236, 1384, 1386, 1387, + * 214719, 1241, 1386, 1387, 1384, + * 213696, 1234, 1384, 1386, 1388, + * 219904, 1240, 1385, 1389, 1385, + * 212240, 1240, 1386, 1386, 1386, + * 214415, 1236, 1384, 1386, 1387, + * 214276, 1234, 1384, 1388, ? * - * The observations showed a device will trigger a burst of periodic - * interrupts followed by one or two peaks of longer time, for - * instance when a SD card device flushes its cache, then the periodic - * intervals occur again. A one second inactivity period resets the - * stats, that gives us the certitude the statistical values won't - * exceed 1x10^9, thus the computation won't overflow. + * For each element, apply ilog2(value) * - * Basically, the purpose of the algorithm is to watch the periodic - * interrupts and eliminate the peaks. + * 15, 8, 8, 8, 8, + * 15, 8, 8, 8, 8, + * 15, 8, 8, 8, 8, + * 15, 8, 8, 8, 8, + * 15, 8, 8, 8, 8, + * 15, 8, 8, 8, 8, + * 15, 8, 8, 8, 8, + * 15, 8, 8, 8, ? * - * An interrupt is considered periodically stable if the interval of - * its occurences follow the normal distribution, thus the values - * comply with: + * Max period of 5, we take the last (max_period * 3) 15 elements as + * we can be confident if the pattern repeats itself three times it is + * a repeating pattern. * - * avg - 3 x stddev < value < avg + 3 x stddev + * 8, + * 15, 8, 8, 8, 8, + * 15, 8, 8, 8, 8, + * 15, 8, 8, 8, ? * - * Which can be simplified to: + * Suffixes are: * - * -3 x stddev < value - avg < 3 x stddev + * 1) 8, 15, 8, 8, 8 <- max period + * 2) 8, 15, 8, 8 + * 3) 8, 15, 8 + * 4) 8, 15 <- min period * - * abs(value - avg) < 3 x stddev + * From there we search the repeating pattern for each suffix. * - * In order to save a costly square root computation, we use the - * variance. For the record, stddev = sqrt(variance). The equation - * above becomes: + * buffer: 8, 15, 8, 8, 8, 8, 15, 8, 8, 8, 8, 15, 8, 8, 8 + * | | | | | | | | | | | | | | | + * 8, 15, 8, 8, 8 | | | | | | | | | | + * 8, 15, 8, 8, 8 | | | | | + * 8, 15, 8, 8, 8 * - * abs(value - avg) < 3 x sqrt(variance) + * When moving the suffix, we found exactly 3 matches. * - * And finally we square it: + * The first suffix with period 5 is repeating. * - * (value - avg) ^ 2 < (3 x sqrt(variance)) ^ 2 + * The next event is (3 * max_period) % suffix_period * - * (value - avg) x (value - avg) < 9 x variance + * In this example, the result 0, so the next event is suffix[0] => 8 * - * Statistically speaking, any values out of this interval is - * considered as an anomaly and is discarded. However, a normal - * distribution appears when the number of samples is 30 (it is the - * rule of thumb in statistics, cf. "30 samples" on Internet). When - * there are three consecutive anomalies, the statistics are resetted. + * However, 8 is the index in the array of exponential moving average + * which was calculated on the fly when storing the values, so the + * interval is ema[8] = 1366 * + * + * Example 2: + * + * 4, 3, 5, 100, + * 3, 3, 5, 117, + * 4, 4, 5, 112, + * 4, 3, 4, 110, + * 3, 5, 3, 117, + * 4, 4, 5, 112, + * 4, 3, 4, 110, + * 3, 4, 5, 112, + * 4, 3, 4, 110 + * + * ilog2 + * + * 0, 0, 0, 4, + * 0, 0, 0, 4, + * 0, 0, 0, 4, + * 0, 0, 0, 4, + * 0, 0, 0, 4, + * 0, 0, 0, 4, + * 0, 0, 0, 4, + * 0, 0, 0, 4, + * 0, 0, 0, 4 + * + * Max period 5: + * 0, 0, 4, + * 0, 0, 0, 4, + * 0, 0, 0, 4, + * 0, 0, 0, 4 + * + * Suffixes: + * + * 1) 0, 0, 4, 0, 0 + * 2) 0, 0, 4, 0 + * 3) 0, 0, 4 + * 4) 0, 0 + * + * buffer: 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4 + * | | | | | | X + * 0, 0, 4, 0, 0, | X + * 0, 0 + * + * buffer: 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4 + * | | | | | | | | | | | | | | | + * 0, 0, 4, 0, | | | | | | | | | | | + * 0, 0, 4, 0, | | | | | | | + * 0, 0, 4, 0, | | | + * 0 0 4 + * + * Pattern is found 3 times, the remaining is 1 which results from + * (max_period * 3) % suffix_period. This value is the index in the + * suffix arrays. The suffix array for a period 4 has the value 4 + * at index 1. + */ +#define EMA_ALPHA_VAL 64 +#define EMA_ALPHA_SHIFT 7 + +#define PREDICTION_PERIOD_MIN 2 +#define PREDICTION_PERIOD_MAX 5 +#define PREDICTION_FACTOR 4 +#define PREDICTION_MAX 10 /* 2 ^ PREDICTION_MAX useconds */ +#define PREDICTION_BUFFER_SIZE 16 /* slots for EMAs, hardly more than 16 */ + +struct irqt_stat { + u64 last_ts; + u64 ema_time[PREDICTION_BUFFER_SIZE]; + int timings[IRQ_TIMINGS_SIZE]; + int circ_timings[IRQ_TIMINGS_SIZE]; + int count; +}; + +/* + * Exponential moving average computation */ -static void irqs_update(struct irqt_stat *irqs, u64 ts) +static u64 irq_timings_ema_new(u64 value, u64 ema_old) +{ + s64 diff; + + if (unlikely(!ema_old)) + return value; + + diff = (value - ema_old) * EMA_ALPHA_VAL; + /* + * We can use a s64 type variable to be added with the u64 + * ema_old variable as this one will never have its topmost + * bit set, it will be always smaller than 2^63 nanosec + * interrupt interval (292 years). + */ + return ema_old + (diff >> EMA_ALPHA_SHIFT); +} + +static int irq_timings_next_event_index(int *buffer, size_t len, int period_max) +{ + int i; + + /* + * The buffer contains the suite of intervals, in a ilog2 + * basis, we are looking for a repetition. We point the + * beginning of the search three times the length of the + * period beginning at the end of the buffer. We do that for + * each suffix. + */ + for (i = period_max; i >= PREDICTION_PERIOD_MIN ; i--) { + + int *begin = &buffer[len - (i * 3)]; + int *ptr = begin; + + /* + * We look if the suite with period 'i' repeat + * itself. If it is truncated at the end, as it + * repeats we can use the period to find out the next + * element. + */ + while (!memcmp(ptr, begin, i * sizeof(*ptr))) { + ptr += i; + if (ptr >= &buffer[len]) + return begin[((i * 3) % i)]; + } + } + + return -1; +} + +static u64 __irq_timings_next_event(struct irqt_stat *irqs, int irq, u64 now) +{ + int index, i, period_max, count, start, min = INT_MAX; + + if ((now - irqs->last_ts) >= NSEC_PER_SEC) { + irqs->count = irqs->last_ts = 0; + return U64_MAX; + } + + /* + * As we want to find three times the repetition, we need a + * number of intervals greater or equal to three times the + * maximum period, otherwise we truncate the max period. + */ + period_max = irqs->count > (3 * PREDICTION_PERIOD_MAX) ? + PREDICTION_PERIOD_MAX : irqs->count / 3; + + /* + * If we don't have enough irq timings for this prediction, + * just bail out. + */ + if (period_max <= PREDICTION_PERIOD_MIN) + return U64_MAX; + + /* + * 'count' will depends if the circular buffer wrapped or not + */ + count = irqs->count < IRQ_TIMINGS_SIZE ? + irqs->count : IRQ_TIMINGS_SIZE; + + start = irqs->count < IRQ_TIMINGS_SIZE ? + 0 : (irqs->count & IRQ_TIMINGS_MASK); + + /* + * Copy the content of the circular buffer into another buffer + * in order to linearize the buffer instead of dealing with + * wrapping indexes and shifted array which will be prone to + * error and extremelly difficult to debug. + */ + for (i = 0; i < count; i++) { + int index = (start + i) & IRQ_TIMINGS_MASK; + + irqs->timings[i] = irqs->circ_timings[index]; + min = min_t(int, irqs->timings[i], min); + } + + index = irq_timings_next_event_index(irqs->timings, count, period_max); + if (index < 0) + return irqs->last_ts + irqs->ema_time[min]; + + return irqs->last_ts + irqs->ema_time[index]; +} + +static inline void irq_timings_store(int irq, struct irqt_stat *irqs, u64 ts) { u64 old_ts = irqs->last_ts; - u64 variance = 0; u64 interval; - s64 diff; + int index; /* * The timestamps are absolute time values, we need to compute @@ -135,87 +411,28 @@ static void irqs_update(struct irqt_stat *irqs, u64 ts) * want as we need another timestamp to compute an interval. */ if (interval >= NSEC_PER_SEC) { - memset(irqs, 0, sizeof(*irqs)); - irqs->last_ts = ts; + irqs->count = 0; return; } /* - * Pre-compute the delta with the average as the result is - * used several times in this function. - */ - diff = interval - irqs->avg; - - /* - * Increment the number of samples. - */ - irqs->nr_samples++; - - /* - * Online variance divided by the number of elements if there - * is more than one sample. Normally the formula is division - * by nr_samples - 1 but we assume the number of element will be - * more than 32 and dividing by 32 instead of 31 is enough - * precise. - */ - if (likely(irqs->nr_samples > 1)) - variance = irqs->variance >> IRQ_TIMINGS_SHIFT; - - /* - * The rule of thumb in statistics for the normal distribution - * is having at least 30 samples in order to have the model to - * apply. Values outside the interval are considered as an - * anomaly. - */ - if ((irqs->nr_samples >= 30) && ((diff * diff) > (9 * variance))) { - /* - * After three consecutive anomalies, we reset the - * stats as it is no longer stable enough. - */ - if (irqs->anomalies++ >= 3) { - memset(irqs, 0, sizeof(*irqs)); - irqs->last_ts = ts; - return; - } - } else { - /* - * The anomalies must be consecutives, so at this - * point, we reset the anomalies counter. - */ - irqs->anomalies = 0; - } - - /* - * The interrupt is considered stable enough to try to predict - * the next event on it. + * Get the index in the ema table for this interrupt. The + * PREDICTION_FACTOR increase the interval size for the array + * of exponential average. */ - irqs->valid = 1; + index = likely(interval) ? + ilog2((interval >> 10) / PREDICTION_FACTOR) : 0; /* - * Online average algorithm: - * - * new_average = average + ((value - average) / count) - * - * The variance computation depends on the new average - * to be computed here first. - * + * Store the index as an element of the pattern in another + * circular array. */ - irqs->avg = irqs->avg + (diff >> IRQ_TIMINGS_SHIFT); + irqs->circ_timings[irqs->count & IRQ_TIMINGS_MASK] = index; - /* - * Online variance algorithm: - * - * new_variance = variance + (value - average) x (value - new_average) - * - * Warning: irqs->avg is updated with the line above, hence - * 'interval - irqs->avg' is no longer equal to 'diff' - */ - irqs->variance = irqs->variance + (diff * (interval - irqs->avg)); + irqs->ema_time[index] = irq_timings_ema_new(interval, + irqs->ema_time[index]); - /* - * Update the next event - */ - irqs->next_evt = ts + irqs->avg; + irqs->count++; } /** @@ -259,6 +476,9 @@ u64 irq_timings_next_event(u64 now) */ lockdep_assert_irqs_disabled(); + if (!irqts->count) + return next_evt; + /* * Number of elements in the circular buffer: If it happens it * was flushed before, then the number of elements could be @@ -269,21 +489,19 @@ u64 irq_timings_next_event(u64 now) * type but with the cost of extra computation in the * interrupt handler hot path. We choose efficiency. * - * Inject measured irq/timestamp to the statistical model - * while decrementing the counter because we consume the data - * from our circular buffer. + * Inject measured irq/timestamp to the pattern prediction + * model while decrementing the counter because we consume the + * data from our circular buffer. */ - for (i = irqts->count & IRQ_TIMINGS_MASK, - irqts->count = min(IRQ_TIMINGS_SIZE, irqts->count); - irqts->count > 0; irqts->count--, i = (i + 1) & IRQ_TIMINGS_MASK) { - irq = irq_timing_decode(irqts->values[i], &ts); + i = (irqts->count & IRQ_TIMINGS_MASK) - 1; + irqts->count = min(IRQ_TIMINGS_SIZE, irqts->count); + for (; irqts->count > 0; irqts->count--, i = (i + 1) & IRQ_TIMINGS_MASK) { + irq = irq_timing_decode(irqts->values[i], &ts); s = idr_find(&irqt_stats, irq); - if (s) { - irqs = this_cpu_ptr(s); - irqs_update(irqs, ts); - } + if (s) + irq_timings_store(irq, this_cpu_ptr(s), ts); } /* @@ -294,26 +512,12 @@ u64 irq_timings_next_event(u64 now) irqs = this_cpu_ptr(s); - if (!irqs->valid) - continue; + ts = __irq_timings_next_event(irqs, i, now); + if (ts <= now) + return now; - if (irqs->next_evt <= now) { - irq = i; - next_evt = now; - - /* - * This interrupt mustn't use in the future - * until new events occur and update the - * statistics. - */ - irqs->valid = 0; - break; - } - - if (irqs->next_evt < next_evt) { - irq = i; - next_evt = irqs->next_evt; - } + if (ts < next_evt) + next_evt = ts; } return next_evt; |