diff options
Diffstat (limited to 'drivers/xen/events/events_base.c')
-rw-r--r-- | drivers/xen/events/events_base.c | 620 |
1 files changed, 539 insertions, 81 deletions
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 8d49b91d92cd..d138027034fd 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -32,6 +32,10 @@ #include <linux/slab.h> #include <linux/irqnr.h> #include <linux/pci.h> +#include <linux/spinlock.h> +#include <linux/cpuhotplug.h> +#include <linux/atomic.h> +#include <linux/ktime.h> #ifdef CONFIG_X86 #include <asm/desc.h> @@ -61,6 +65,15 @@ #include "events_internal.h" +#undef MODULE_PARAM_PREFIX +#define MODULE_PARAM_PREFIX "xen." + +static uint __read_mostly event_loop_timeout = 2; +module_param(event_loop_timeout, uint, 0644); + +static uint __read_mostly event_eoi_delay = 10; +module_param(event_eoi_delay, uint, 0644); + const struct evtchn_ops *evtchn_ops; /* @@ -69,6 +82,25 @@ const struct evtchn_ops *evtchn_ops; */ static DEFINE_MUTEX(irq_mapping_update_lock); +/* + * Lock protecting event handling loop against removing event channels. + * Adding of event channels is no issue as the associated IRQ becomes active + * only after everything is setup (before request_[threaded_]irq() the handler + * can't be entered for an event, as the event channel will be unmasked only + * then). + */ +static DEFINE_RWLOCK(evtchn_rwlock); + +/* + * Lock hierarchy: + * + * irq_mapping_update_lock + * evtchn_rwlock + * IRQ-desc lock + * percpu eoi_list_lock + * irq_info->lock + */ + static LIST_HEAD(xen_irq_list_head); /* IRQ <-> VIRQ mapping. */ @@ -90,18 +122,23 @@ static bool (*pirq_needs_eoi)(unsigned irq); /* Xen will never allocate port zero for any purpose. */ #define VALID_EVTCHN(chn) ((chn) != 0) +static struct irq_info *legacy_info_ptrs[NR_IRQS_LEGACY]; + static struct irq_chip xen_dynamic_chip; +static struct irq_chip xen_lateeoi_chip; static struct irq_chip xen_percpu_chip; static struct irq_chip xen_pirq_chip; static void enable_dynirq(struct irq_data *data); static void disable_dynirq(struct irq_data *data); -static void clear_evtchn_to_irq_row(unsigned row) +static DEFINE_PER_CPU(unsigned int, irq_epoch); + +static void clear_evtchn_to_irq_row(int *evtchn_row) { unsigned col; for (col = 0; col < EVTCHN_PER_ROW; col++) - evtchn_to_irq[row][col] = -1; + WRITE_ONCE(evtchn_row[col], -1); } static void clear_evtchn_to_irq_all(void) @@ -111,7 +148,7 @@ static void clear_evtchn_to_irq_all(void) for (row = 0; row < EVTCHN_ROW(xen_evtchn_max_channels()); row++) { if (evtchn_to_irq[row] == NULL) continue; - clear_evtchn_to_irq_row(row); + clear_evtchn_to_irq_row(evtchn_to_irq[row]); } } @@ -119,6 +156,7 @@ static int set_evtchn_to_irq(unsigned evtchn, unsigned irq) { unsigned row; unsigned col; + int *evtchn_row; if (evtchn >= xen_evtchn_max_channels()) return -EINVAL; @@ -131,14 +169,21 @@ static int set_evtchn_to_irq(unsigned evtchn, unsigned irq) if (irq == -1) return 0; - evtchn_to_irq[row] = (int *)get_zeroed_page(GFP_KERNEL); - if (evtchn_to_irq[row] == NULL) + evtchn_row = (int *) __get_free_pages(GFP_KERNEL, 0); + if (evtchn_row == NULL) return -ENOMEM; - clear_evtchn_to_irq_row(row); + clear_evtchn_to_irq_row(evtchn_row); + + /* + * We've prepared an empty row for the mapping. If a different + * thread was faster inserting it, we can drop ours. + */ + if (cmpxchg(&evtchn_to_irq[row], NULL, evtchn_row) != NULL) + free_page((unsigned long) evtchn_row); } - evtchn_to_irq[row][col] = irq; + WRITE_ONCE(evtchn_to_irq[row][col], irq); return 0; } @@ -148,13 +193,24 @@ int get_evtchn_to_irq(unsigned evtchn) return -1; if (evtchn_to_irq[EVTCHN_ROW(evtchn)] == NULL) return -1; - return evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)]; + return READ_ONCE(evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)]); } /* Get info for IRQ */ struct irq_info *info_for_irq(unsigned irq) { - return irq_get_handler_data(irq); + if (irq < nr_legacy_irqs()) + return legacy_info_ptrs[irq]; + else + return irq_get_chip_data(irq); +} + +static void set_info_for_irq(unsigned int irq, struct irq_info *info) +{ + if (irq < nr_legacy_irqs()) + legacy_info_ptrs[irq] = info; + else + irq_set_chip_data(irq, info); } /* Constructors for packed IRQ information. */ @@ -172,6 +228,8 @@ static int xen_irq_info_common_setup(struct irq_info *info, info->irq = irq; info->evtchn = evtchn; info->cpu = cpu; + info->mask_reason = EVT_MASK_REASON_EXPLICIT; + raw_spin_lock_init(&info->lock); ret = set_evtchn_to_irq(evtchn, irq); if (ret < 0) @@ -238,6 +296,7 @@ static int xen_irq_info_pirq_setup(unsigned irq, static void xen_irq_info_cleanup(struct irq_info *info) { set_evtchn_to_irq(info->evtchn, -1); + xen_evtchn_port_remove(info->evtchn, info->cpu); info->evtchn = 0; } @@ -246,10 +305,14 @@ static void xen_irq_info_cleanup(struct irq_info *info) */ unsigned int evtchn_from_irq(unsigned irq) { - if (unlikely(WARN(irq >= nr_irqs, "Invalid irq %d!\n", irq))) + const struct irq_info *info = NULL; + + if (likely(irq < nr_irqs)) + info = info_for_irq(irq); + if (!info) return 0; - return info_for_irq(irq)->evtchn; + return info->evtchn; } unsigned irq_from_evtchn(unsigned int evtchn) @@ -314,6 +377,34 @@ unsigned int cpu_from_evtchn(unsigned int evtchn) return ret; } +static void do_mask(struct irq_info *info, u8 reason) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&info->lock, flags); + + if (!info->mask_reason) + mask_evtchn(info->evtchn); + + info->mask_reason |= reason; + + raw_spin_unlock_irqrestore(&info->lock, flags); +} + +static void do_unmask(struct irq_info *info, u8 reason) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&info->lock, flags); + + info->mask_reason &= ~reason; + + if (!info->mask_reason) + unmask_evtchn(info->evtchn); + + raw_spin_unlock_irqrestore(&info->lock, flags); +} + #ifdef CONFIG_X86 static bool pirq_check_eoi_map(unsigned irq) { @@ -360,9 +451,160 @@ void notify_remote_via_irq(int irq) } EXPORT_SYMBOL_GPL(notify_remote_via_irq); +struct lateeoi_work { + struct delayed_work delayed; + spinlock_t eoi_list_lock; + struct list_head eoi_list; +}; + +static DEFINE_PER_CPU(struct lateeoi_work, lateeoi); + +static void lateeoi_list_del(struct irq_info *info) +{ + struct lateeoi_work *eoi = &per_cpu(lateeoi, info->eoi_cpu); + unsigned long flags; + + spin_lock_irqsave(&eoi->eoi_list_lock, flags); + list_del_init(&info->eoi_list); + spin_unlock_irqrestore(&eoi->eoi_list_lock, flags); +} + +static void lateeoi_list_add(struct irq_info *info) +{ + struct lateeoi_work *eoi = &per_cpu(lateeoi, info->eoi_cpu); + struct irq_info *elem; + u64 now = get_jiffies_64(); + unsigned long delay; + unsigned long flags; + + if (now < info->eoi_time) + delay = info->eoi_time - now; + else + delay = 1; + + spin_lock_irqsave(&eoi->eoi_list_lock, flags); + + if (list_empty(&eoi->eoi_list)) { + list_add(&info->eoi_list, &eoi->eoi_list); + mod_delayed_work_on(info->eoi_cpu, system_wq, + &eoi->delayed, delay); + } else { + list_for_each_entry_reverse(elem, &eoi->eoi_list, eoi_list) { + if (elem->eoi_time <= info->eoi_time) + break; + } + list_add(&info->eoi_list, &elem->eoi_list); + } + + spin_unlock_irqrestore(&eoi->eoi_list_lock, flags); +} + +static void xen_irq_lateeoi_locked(struct irq_info *info, bool spurious) +{ + evtchn_port_t evtchn; + unsigned int cpu; + unsigned int delay = 0; + + evtchn = info->evtchn; + if (!VALID_EVTCHN(evtchn) || !list_empty(&info->eoi_list)) + return; + + if (spurious) { + if ((1 << info->spurious_cnt) < (HZ << 2)) + info->spurious_cnt++; + if (info->spurious_cnt > 1) { + delay = 1 << (info->spurious_cnt - 2); + if (delay > HZ) + delay = HZ; + if (!info->eoi_time) + info->eoi_cpu = smp_processor_id(); + info->eoi_time = get_jiffies_64() + delay; + } + } else { + info->spurious_cnt = 0; + } + + cpu = info->eoi_cpu; + if (info->eoi_time && + (info->irq_epoch == per_cpu(irq_epoch, cpu) || delay)) { + lateeoi_list_add(info); + return; + } + + info->eoi_time = 0; + + /* is_active hasn't been reset yet, do it now. */ + smp_store_release(&info->is_active, 0); + do_unmask(info, EVT_MASK_REASON_EOI_PENDING); +} + +static void xen_irq_lateeoi_worker(struct work_struct *work) +{ + struct lateeoi_work *eoi; + struct irq_info *info; + u64 now = get_jiffies_64(); + unsigned long flags; + + eoi = container_of(to_delayed_work(work), struct lateeoi_work, delayed); + + read_lock_irqsave(&evtchn_rwlock, flags); + + while (true) { + spin_lock(&eoi->eoi_list_lock); + + info = list_first_entry_or_null(&eoi->eoi_list, struct irq_info, + eoi_list); + + if (info == NULL || now < info->eoi_time) { + spin_unlock(&eoi->eoi_list_lock); + break; + } + + list_del_init(&info->eoi_list); + + spin_unlock(&eoi->eoi_list_lock); + + info->eoi_time = 0; + + xen_irq_lateeoi_locked(info, false); + } + + if (info) + mod_delayed_work_on(info->eoi_cpu, system_wq, + &eoi->delayed, info->eoi_time - now); + + read_unlock_irqrestore(&evtchn_rwlock, flags); +} + +static void xen_cpu_init_eoi(unsigned int cpu) +{ + struct lateeoi_work *eoi = &per_cpu(lateeoi, cpu); + + INIT_DELAYED_WORK(&eoi->delayed, xen_irq_lateeoi_worker); + spin_lock_init(&eoi->eoi_list_lock); + INIT_LIST_HEAD(&eoi->eoi_list); +} + +void xen_irq_lateeoi(unsigned int irq, unsigned int eoi_flags) +{ + struct irq_info *info; + unsigned long flags; + + read_lock_irqsave(&evtchn_rwlock, flags); + + info = info_for_irq(irq); + + if (info) + xen_irq_lateeoi_locked(info, eoi_flags & XEN_EOI_FLAG_SPURIOUS); + + read_unlock_irqrestore(&evtchn_rwlock, flags); +} +EXPORT_SYMBOL_GPL(xen_irq_lateeoi); + static void xen_irq_init(unsigned irq) { struct irq_info *info; + #ifdef CONFIG_SMP /* By default all event channels notify CPU#0. */ cpumask_copy(irq_get_affinity_mask(irq), cpumask_of(0)); @@ -375,8 +617,9 @@ static void xen_irq_init(unsigned irq) info->type = IRQT_UNBOUND; info->refcnt = -1; - irq_set_handler_data(irq, info); + set_info_for_irq(irq, info); + INIT_LIST_HEAD(&info->eoi_list); list_add_tail(&info->list, &xen_irq_list_head); } @@ -424,17 +667,25 @@ static int __must_check xen_allocate_irq_gsi(unsigned gsi) static void xen_free_irq(unsigned irq) { - struct irq_info *info = irq_get_handler_data(irq); + struct irq_info *info = info_for_irq(irq); + unsigned long flags; if (WARN_ON(!info)) return; + write_lock_irqsave(&evtchn_rwlock, flags); + + if (!list_empty(&info->eoi_list)) + lateeoi_list_del(info); + list_del(&info->list); - irq_set_handler_data(irq, NULL); + set_info_for_irq(irq, NULL); WARN_ON(info->refcnt > 0); + write_unlock_irqrestore(&evtchn_rwlock, flags); + kfree(info); /* Legacy IRQ descriptors are managed by the arch. */ @@ -453,6 +704,12 @@ static void xen_evtchn_close(unsigned int port) BUG(); } +static void event_handler_exit(struct irq_info *info) +{ + smp_store_release(&info->is_active, 0); + clear_evtchn(info->evtchn); +} + static void pirq_query_unmask(int irq) { struct physdev_irq_status_query irq_status; @@ -471,7 +728,8 @@ static void pirq_query_unmask(int irq) static void eoi_pirq(struct irq_data *data) { - int evtchn = evtchn_from_irq(data->irq); + struct irq_info *info = info_for_irq(data->irq); + int evtchn = info ? info->evtchn : 0; struct physdev_eoi eoi = { .irq = pirq_from_irq(data->irq) }; int rc = 0; @@ -480,16 +738,15 @@ static void eoi_pirq(struct irq_data *data) if (unlikely(irqd_is_setaffinity_pending(data)) && likely(!irqd_irq_disabled(data))) { - int masked = test_and_set_mask(evtchn); + do_mask(info, EVT_MASK_REASON_TEMPORARY); - clear_evtchn(evtchn); + event_handler_exit(info); irq_move_masked_irq(data); - if (!masked) - unmask_evtchn(evtchn); + do_unmask(info, EVT_MASK_REASON_TEMPORARY); } else - clear_evtchn(evtchn); + event_handler_exit(info); if (pirq_needs_eoi(data->irq)) { rc = HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi); @@ -540,7 +797,8 @@ static unsigned int __startup_pirq(unsigned int irq) goto err; out: - unmask_evtchn(evtchn); + do_unmask(info, EVT_MASK_REASON_EXPLICIT); + eoi_pirq(irq_get_irq_data(irq)); return 0; @@ -567,7 +825,7 @@ static void shutdown_pirq(struct irq_data *data) if (!VALID_EVTCHN(evtchn)) return; - mask_evtchn(evtchn); + do_mask(info, EVT_MASK_REASON_EXPLICIT); xen_evtchn_close(evtchn); xen_irq_info_cleanup(info); } @@ -601,7 +859,7 @@ EXPORT_SYMBOL_GPL(xen_irq_from_gsi); static void __unbind_from_irq(unsigned int irq) { int evtchn = evtchn_from_irq(irq); - struct irq_info *info = irq_get_handler_data(irq); + struct irq_info *info = info_for_irq(irq); if (info->refcnt > 0) { info->refcnt--; @@ -826,7 +1084,7 @@ int xen_pirq_from_irq(unsigned irq) } EXPORT_SYMBOL_GPL(xen_pirq_from_irq); -int bind_evtchn_to_irq(unsigned int evtchn) +static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip) { int irq; int ret; @@ -843,7 +1101,7 @@ int bind_evtchn_to_irq(unsigned int evtchn) if (irq < 0) goto out; - irq_set_chip_and_handler_name(irq, &xen_dynamic_chip, + irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "event"); ret = xen_irq_info_evtchn_setup(irq, evtchn); @@ -864,8 +1122,19 @@ out: return irq; } + +int bind_evtchn_to_irq(evtchn_port_t evtchn) +{ + return bind_evtchn_to_irq_chip(evtchn, &xen_dynamic_chip); +} EXPORT_SYMBOL_GPL(bind_evtchn_to_irq); +int bind_evtchn_to_irq_lateeoi(evtchn_port_t evtchn) +{ + return bind_evtchn_to_irq_chip(evtchn, &xen_lateeoi_chip); +} +EXPORT_SYMBOL_GPL(bind_evtchn_to_irq_lateeoi); + static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) { struct evtchn_bind_ipi bind_ipi; @@ -907,8 +1176,9 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) return irq; } -int bind_interdomain_evtchn_to_irq(unsigned int remote_domain, - unsigned int remote_port) +static int bind_interdomain_evtchn_to_irq_chip(unsigned int remote_domain, + evtchn_port_t remote_port, + struct irq_chip *chip) { struct evtchn_bind_interdomain bind_interdomain; int err; @@ -919,10 +1189,26 @@ int bind_interdomain_evtchn_to_irq(unsigned int remote_domain, err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, &bind_interdomain); - return err ? : bind_evtchn_to_irq(bind_interdomain.local_port); + return err ? : bind_evtchn_to_irq_chip(bind_interdomain.local_port, + chip); +} + +int bind_interdomain_evtchn_to_irq(unsigned int remote_domain, + evtchn_port_t remote_port) +{ + return bind_interdomain_evtchn_to_irq_chip(remote_domain, remote_port, + &xen_dynamic_chip); } EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irq); +int bind_interdomain_evtchn_to_irq_lateeoi(unsigned int remote_domain, + evtchn_port_t remote_port) +{ + return bind_interdomain_evtchn_to_irq_chip(remote_domain, remote_port, + &xen_lateeoi_chip); +} +EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irq_lateeoi); + static int find_virq(unsigned int virq, unsigned int cpu) { struct evtchn_status status; @@ -1018,14 +1304,15 @@ static void unbind_from_irq(unsigned int irq) mutex_unlock(&irq_mapping_update_lock); } -int bind_evtchn_to_irqhandler(unsigned int evtchn, - irq_handler_t handler, - unsigned long irqflags, - const char *devname, void *dev_id) +static int bind_evtchn_to_irqhandler_chip(evtchn_port_t evtchn, + irq_handler_t handler, + unsigned long irqflags, + const char *devname, void *dev_id, + struct irq_chip *chip) { int irq, retval; - irq = bind_evtchn_to_irq(evtchn); + irq = bind_evtchn_to_irq_chip(evtchn, chip); if (irq < 0) return irq; retval = request_irq(irq, handler, irqflags, devname, dev_id); @@ -1036,18 +1323,38 @@ int bind_evtchn_to_irqhandler(unsigned int evtchn, return irq; } + +int bind_evtchn_to_irqhandler(evtchn_port_t evtchn, + irq_handler_t handler, + unsigned long irqflags, + const char *devname, void *dev_id) +{ + return bind_evtchn_to_irqhandler_chip(evtchn, handler, irqflags, + devname, dev_id, + &xen_dynamic_chip); +} EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler); -int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain, - unsigned int remote_port, - irq_handler_t handler, - unsigned long irqflags, - const char *devname, - void *dev_id) +int bind_evtchn_to_irqhandler_lateeoi(evtchn_port_t evtchn, + irq_handler_t handler, + unsigned long irqflags, + const char *devname, void *dev_id) +{ + return bind_evtchn_to_irqhandler_chip(evtchn, handler, irqflags, + devname, dev_id, + &xen_lateeoi_chip); +} +EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler_lateeoi); + +static int bind_interdomain_evtchn_to_irqhandler_chip( + unsigned int remote_domain, evtchn_port_t remote_port, + irq_handler_t handler, unsigned long irqflags, + const char *devname, void *dev_id, struct irq_chip *chip) { int irq, retval; - irq = bind_interdomain_evtchn_to_irq(remote_domain, remote_port); + irq = bind_interdomain_evtchn_to_irq_chip(remote_domain, remote_port, + chip); if (irq < 0) return irq; @@ -1059,8 +1366,33 @@ int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain, return irq; } + +int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain, + evtchn_port_t remote_port, + irq_handler_t handler, + unsigned long irqflags, + const char *devname, + void *dev_id) +{ + return bind_interdomain_evtchn_to_irqhandler_chip(remote_domain, + remote_port, handler, irqflags, devname, + dev_id, &xen_dynamic_chip); +} EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler); +int bind_interdomain_evtchn_to_irqhandler_lateeoi(unsigned int remote_domain, + evtchn_port_t remote_port, + irq_handler_t handler, + unsigned long irqflags, + const char *devname, + void *dev_id) +{ + return bind_interdomain_evtchn_to_irqhandler_chip(remote_domain, + remote_port, handler, irqflags, devname, + dev_id, &xen_lateeoi_chip); +} +EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler_lateeoi); + int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu, irq_handler_t handler, unsigned long irqflags, const char *devname, void *dev_id) @@ -1105,7 +1437,7 @@ int bind_ipi_to_irqhandler(enum ipi_vector ipi, void unbind_from_irqhandler(unsigned int irq, void *dev_id) { - struct irq_info *info = irq_get_handler_data(irq); + struct irq_info *info = info_for_irq(irq); if (WARN_ON(!info)) return; @@ -1139,7 +1471,7 @@ int evtchn_make_refcounted(unsigned int evtchn) if (irq == -1) return -ENOENT; - info = irq_get_handler_data(irq); + info = info_for_irq(irq); if (!info) return -ENOENT; @@ -1167,13 +1499,13 @@ int evtchn_get(unsigned int evtchn) if (irq == -1) goto done; - info = irq_get_handler_data(irq); + info = info_for_irq(irq); if (!info) goto done; err = -EINVAL; - if (info->refcnt <= 0) + if (info->refcnt <= 0 || info->refcnt == SHRT_MAX) goto done; info->refcnt++; @@ -1212,6 +1544,56 @@ void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector) notify_remote_via_irq(irq); } +struct evtchn_loop_ctrl { + ktime_t timeout; + unsigned count; + bool defer_eoi; +}; + +void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl) +{ + int irq; + struct irq_info *info; + + irq = get_evtchn_to_irq(port); + if (irq == -1) + return; + + /* + * Check for timeout every 256 events. + * We are setting the timeout value only after the first 256 + * events in order to not hurt the common case of few loop + * iterations. The 256 is basically an arbitrary value. + * + * In case we are hitting the timeout we need to defer all further + * EOIs in order to ensure to leave the event handling loop rather + * sooner than later. + */ + if (!ctrl->defer_eoi && !(++ctrl->count & 0xff)) { + ktime_t kt = ktime_get(); + + if (!ctrl->timeout) { + kt = ktime_add_ms(kt, + jiffies_to_msecs(event_loop_timeout)); + ctrl->timeout = kt; + } else if (kt > ctrl->timeout) { + ctrl->defer_eoi = true; + } + } + + info = info_for_irq(irq); + if (xchg_acquire(&info->is_active, 1)) + return; + + if (ctrl->defer_eoi) { + info->eoi_cpu = smp_processor_id(); + info->irq_epoch = __this_cpu_read(irq_epoch); + info->eoi_time = get_jiffies_64() + event_eoi_delay; + } + + generic_handle_irq(irq); +} + static DEFINE_PER_CPU(unsigned, xed_nesting_count); static void __xen_evtchn_do_upcall(void) @@ -1219,6 +1601,9 @@ static void __xen_evtchn_do_upcall(void) struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu); int cpu = get_cpu(); unsigned count; + struct evtchn_loop_ctrl ctrl = { 0 }; + + read_lock(&evtchn_rwlock); do { vcpu_info->evtchn_upcall_pending = 0; @@ -1226,7 +1611,7 @@ static void __xen_evtchn_do_upcall(void) if (__this_cpu_inc_return(xed_nesting_count) - 1) goto out; - xen_evtchn_handle_events(cpu); + xen_evtchn_handle_events(cpu, &ctrl); BUG_ON(!irqs_disabled()); @@ -1235,6 +1620,14 @@ static void __xen_evtchn_do_upcall(void) } while (count != 1 || vcpu_info->evtchn_upcall_pending); out: + read_unlock(&evtchn_rwlock); + + /* + * Increment irq_epoch only now to defer EOIs only for + * xen_irq_lateeoi() invocations occurring from inside the loop + * above. + */ + __this_cpu_inc(irq_epoch); put_cpu(); } @@ -1293,10 +1686,10 @@ void rebind_evtchn_irq(int evtchn, int irq) } /* Rebind an evtchn so that it gets delivered to a specific cpu */ -static int xen_rebind_evtchn_to_cpu(int evtchn, unsigned int tcpu) +static int xen_rebind_evtchn_to_cpu(struct irq_info *info, unsigned int tcpu) { struct evtchn_bind_vcpu bind_vcpu; - int masked; + evtchn_port_t evtchn = info ? info->evtchn : 0; if (!VALID_EVTCHN(evtchn)) return -1; @@ -1312,7 +1705,7 @@ static int xen_rebind_evtchn_to_cpu(int evtchn, unsigned int tcpu) * Mask the event while changing the VCPU binding to prevent * it being delivered on an unexpected VCPU. */ - masked = test_and_set_mask(evtchn); + do_mask(info, EVT_MASK_REASON_TEMPORARY); /* * If this fails, it usually just indicates that we're dealing with a @@ -1322,8 +1715,7 @@ static int xen_rebind_evtchn_to_cpu(int evtchn, unsigned int tcpu) if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0) bind_evtchn_to_cpu(evtchn, tcpu); - if (!masked) - unmask_evtchn(evtchn); + do_unmask(info, EVT_MASK_REASON_TEMPORARY); return 0; } @@ -1332,7 +1724,7 @@ static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest, bool force) { unsigned tcpu = cpumask_first_and(dest, cpu_online_mask); - int ret = xen_rebind_evtchn_to_cpu(evtchn_from_irq(data->irq), tcpu); + int ret = xen_rebind_evtchn_to_cpu(info_for_irq(data->irq), tcpu); if (!ret) irq_data_update_effective_affinity(data, cpumask_of(tcpu)); @@ -1351,39 +1743,41 @@ EXPORT_SYMBOL_GPL(xen_set_affinity_evtchn); static void enable_dynirq(struct irq_data *data) { - int evtchn = evtchn_from_irq(data->irq); + struct irq_info *info = info_for_irq(data->irq); + evtchn_port_t evtchn = info ? info->evtchn : 0; if (VALID_EVTCHN(evtchn)) - unmask_evtchn(evtchn); + do_unmask(info, EVT_MASK_REASON_EXPLICIT); } static void disable_dynirq(struct irq_data *data) { - int evtchn = evtchn_from_irq(data->irq); + struct irq_info *info = info_for_irq(data->irq); + evtchn_port_t evtchn = info ? info->evtchn : 0; if (VALID_EVTCHN(evtchn)) - mask_evtchn(evtchn); + do_mask(info, EVT_MASK_REASON_EXPLICIT); } static void ack_dynirq(struct irq_data *data) { - int evtchn = evtchn_from_irq(data->irq); + struct irq_info *info = info_for_irq(data->irq); + evtchn_port_t evtchn = info ? info->evtchn : 0; if (!VALID_EVTCHN(evtchn)) return; if (unlikely(irqd_is_setaffinity_pending(data)) && likely(!irqd_irq_disabled(data))) { - int masked = test_and_set_mask(evtchn); + do_mask(info, EVT_MASK_REASON_TEMPORARY); - clear_evtchn(evtchn); + event_handler_exit(info); irq_move_masked_irq(data); - if (!masked) - unmask_evtchn(evtchn); + do_unmask(info, EVT_MASK_REASON_TEMPORARY); } else - clear_evtchn(evtchn); + event_handler_exit(info); } static void mask_ack_dynirq(struct irq_data *data) @@ -1392,18 +1786,51 @@ static void mask_ack_dynirq(struct irq_data *data) ack_dynirq(data); } +static void lateeoi_ack_dynirq(struct irq_data *data) +{ + struct irq_info *info = info_for_irq(data->irq); + evtchn_port_t evtchn = info ? info->evtchn : 0; + + if (!VALID_EVTCHN(evtchn)) + return; + + do_mask(info, EVT_MASK_REASON_EOI_PENDING); + + if (unlikely(irqd_is_setaffinity_pending(data)) && + likely(!irqd_irq_disabled(data))) { + do_mask(info, EVT_MASK_REASON_TEMPORARY); + + clear_evtchn(evtchn); + + irq_move_masked_irq(data); + + do_unmask(info, EVT_MASK_REASON_TEMPORARY); + } else + clear_evtchn(evtchn); +} + +static void lateeoi_mask_ack_dynirq(struct irq_data *data) +{ + struct irq_info *info = info_for_irq(data->irq); + evtchn_port_t evtchn = info ? info->evtchn : 0; + + if (VALID_EVTCHN(evtchn)) { + do_mask(info, EVT_MASK_REASON_EXPLICIT); + ack_dynirq(data); + } +} + static int retrigger_dynirq(struct irq_data *data) { - unsigned int evtchn = evtchn_from_irq(data->irq); - int masked; + struct irq_info *info = info_for_irq(data->irq); + evtchn_port_t evtchn = info ? info->evtchn : 0; if (!VALID_EVTCHN(evtchn)) return 0; - masked = test_and_set_mask(evtchn); + do_mask(info, EVT_MASK_REASON_TEMPORARY); set_evtchn(evtchn); - if (!masked) - unmask_evtchn(evtchn); + do_unmask(info, EVT_MASK_REASON_TEMPORARY); return 1; } @@ -1498,10 +1925,11 @@ static void restore_cpu_ipis(unsigned int cpu) /* Clear an irq's pending state, in preparation for polling on it */ void xen_clear_irq_pending(int irq) { - int evtchn = evtchn_from_irq(irq); + struct irq_info *info = info_for_irq(irq); + evtchn_port_t evtchn = info ? info->evtchn : 0; if (VALID_EVTCHN(evtchn)) - clear_evtchn(evtchn); + event_handler_exit(info); } EXPORT_SYMBOL(xen_clear_irq_pending); void xen_set_irq_pending(int irq) @@ -1601,6 +2029,21 @@ static struct irq_chip xen_dynamic_chip __read_mostly = { .irq_retrigger = retrigger_dynirq, }; +static struct irq_chip xen_lateeoi_chip __read_mostly = { + /* The chip name needs to contain "xen-dyn" for irqbalance to work. */ + .name = "xen-dyn-lateeoi", + + .irq_disable = disable_dynirq, + .irq_mask = disable_dynirq, + .irq_unmask = enable_dynirq, + + .irq_ack = lateeoi_ack_dynirq, + .irq_mask_ack = lateeoi_mask_ack_dynirq, + + .irq_set_affinity = set_affinity_irq, + .irq_retrigger = retrigger_dynirq, +}; + static struct irq_chip xen_pirq_chip __read_mostly = { .name = "xen-pirq", @@ -1631,16 +2074,6 @@ static struct irq_chip xen_percpu_chip __read_mostly = { .irq_ack = ack_dynirq, }; -int xen_set_callback_via(uint64_t via) -{ - struct xen_hvm_param a; - a.domid = DOMID_SELF; - a.index = HVM_PARAM_CALLBACK_IRQ; - a.value = via; - return HYPERVISOR_hvm_op(HVMOP_set_param, &a); -} -EXPORT_SYMBOL_GPL(xen_set_callback_via); - #ifdef CONFIG_XEN_PVHVM /* Vector callbacks are better than PCI interrupts to receive event * channel notifications because we can receive vector callbacks on any @@ -1667,12 +2100,31 @@ void xen_callback_vector(void) void xen_callback_vector(void) {} #endif -#undef MODULE_PARAM_PREFIX -#define MODULE_PARAM_PREFIX "xen." - static bool fifo_events = true; module_param(fifo_events, bool, 0); +static int xen_evtchn_cpu_prepare(unsigned int cpu) +{ + int ret = 0; + + xen_cpu_init_eoi(cpu); + + if (evtchn_ops->percpu_init) + ret = evtchn_ops->percpu_init(cpu); + + return ret; +} + +static int xen_evtchn_cpu_dead(unsigned int cpu) +{ + int ret = 0; + + if (evtchn_ops->percpu_deinit) + ret = evtchn_ops->percpu_deinit(cpu); + + return ret; +} + void __init xen_init_IRQ(void) { int ret = -EINVAL; @@ -1683,6 +2135,12 @@ void __init xen_init_IRQ(void) if (ret < 0) xen_evtchn_2l_init(); + xen_cpu_init_eoi(smp_processor_id()); + + cpuhp_setup_state_nocalls(CPUHP_XEN_EVTCHN_PREPARE, + "xen/evtchn:prepare", + xen_evtchn_cpu_prepare, xen_evtchn_cpu_dead); + evtchn_to_irq = kcalloc(EVTCHN_ROW(xen_evtchn_max_channels()), sizeof(*evtchn_to_irq), GFP_KERNEL); BUG_ON(!evtchn_to_irq); |