diff options
Diffstat (limited to 'drivers')
62 files changed, 1631 insertions, 192 deletions
diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h index fded776236e2..bda523219d50 100644 --- a/drivers/acpi/acpica/acglobal.h +++ b/drivers/acpi/acpica/acglobal.h @@ -116,7 +116,7 @@ ACPI_GLOBAL(u8, acpi_gbl_global_lock_pending); * interrupt level */ ACPI_GLOBAL(acpi_spinlock, acpi_gbl_gpe_lock); /* For GPE data structs and registers */ -ACPI_GLOBAL(acpi_spinlock, acpi_gbl_hardware_lock); /* For ACPI H/W except GPE registers */ +ACPI_GLOBAL(acpi_raw_spinlock, acpi_gbl_hardware_lock); /* For ACPI H/W except GPE registers */ ACPI_GLOBAL(acpi_spinlock, acpi_gbl_reference_count_lock); /* Mutex for _OSI support */ diff --git a/drivers/acpi/acpica/hwregs.c b/drivers/acpi/acpica/hwregs.c index 3b7fb99362b6..696bf8e62afb 100644 --- a/drivers/acpi/acpica/hwregs.c +++ b/drivers/acpi/acpica/hwregs.c @@ -363,14 +363,14 @@ acpi_status acpi_hw_clear_acpi_status(void) ACPI_BITMASK_ALL_FIXED_STATUS, ACPI_FORMAT_UINT64(acpi_gbl_xpm1a_status.address))); - lock_flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock); + raw_spin_lock_irqsave(acpi_gbl_hardware_lock, lock_flags); /* Clear the fixed events in PM1 A/B */ status = acpi_hw_register_write(ACPI_REGISTER_PM1_STATUS, ACPI_BITMASK_ALL_FIXED_STATUS); - acpi_os_release_lock(acpi_gbl_hardware_lock, lock_flags); + raw_spin_unlock_irqrestore(acpi_gbl_hardware_lock, lock_flags); if (ACPI_FAILURE(status)) { goto exit; diff --git a/drivers/acpi/acpica/hwxface.c b/drivers/acpi/acpica/hwxface.c index 98c26ff39409..6e236f2ea791 100644 --- a/drivers/acpi/acpica/hwxface.c +++ b/drivers/acpi/acpica/hwxface.c @@ -373,7 +373,7 @@ acpi_status acpi_write_bit_register(u32 register_id, u32 value) return_ACPI_STATUS(AE_BAD_PARAMETER); } - lock_flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock); + raw_spin_lock_irqsave(acpi_gbl_hardware_lock, lock_flags); /* * At this point, we know that the parent register is one of the @@ -434,7 +434,7 @@ acpi_status acpi_write_bit_register(u32 register_id, u32 value) unlock_and_exit: - acpi_os_release_lock(acpi_gbl_hardware_lock, lock_flags); + raw_spin_unlock_irqrestore(acpi_gbl_hardware_lock, lock_flags); return_ACPI_STATUS(status); } diff --git a/drivers/acpi/acpica/utmutex.c b/drivers/acpi/acpica/utmutex.c index 15073375bd00..357e7ca5a587 100644 --- a/drivers/acpi/acpica/utmutex.c +++ b/drivers/acpi/acpica/utmutex.c @@ -88,7 +88,7 @@ acpi_status acpi_ut_mutex_initialize(void) return_ACPI_STATUS (status); } - status = acpi_os_create_lock (&acpi_gbl_hardware_lock); + status = acpi_os_create_raw_lock (&acpi_gbl_hardware_lock); if (ACPI_FAILURE (status)) { return_ACPI_STATUS (status); } @@ -145,7 +145,7 @@ void acpi_ut_mutex_terminate(void) /* Delete the spinlocks */ acpi_os_delete_lock(acpi_gbl_gpe_lock); - acpi_os_delete_lock(acpi_gbl_hardware_lock); + acpi_os_delete_raw_lock(acpi_gbl_hardware_lock); acpi_os_delete_lock(acpi_gbl_reference_count_lock); /* Delete the reader/writer lock */ diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c index 051b6158d1b7..7ad293bef6ed 100644 --- a/drivers/ata/libata-sff.c +++ b/drivers/ata/libata-sff.c @@ -678,9 +678,9 @@ unsigned int ata_sff_data_xfer_noirq(struct ata_device *dev, unsigned char *buf, unsigned long flags; unsigned int consumed; - local_irq_save(flags); + local_irq_save_nort(flags); consumed = ata_sff_data_xfer32(dev, buf, buflen, rw); - local_irq_restore(flags); + local_irq_restore_nort(flags); return consumed; } @@ -719,7 +719,7 @@ static void ata_pio_sector(struct ata_queued_cmd *qc) unsigned long flags; /* FIXME: use a bounce buffer */ - local_irq_save(flags); + local_irq_save_nort(flags); buf = kmap_atomic(page); /* do the actual data transfer */ @@ -727,7 +727,7 @@ static void ata_pio_sector(struct ata_queued_cmd *qc) do_write); kunmap_atomic(buf); - local_irq_restore(flags); + local_irq_restore_nort(flags); } else { buf = page_address(page); ap->ops->sff_data_xfer(qc->dev, buf + offset, qc->sect_size, @@ -864,7 +864,7 @@ next_sg: unsigned long flags; /* FIXME: use bounce buffer */ - local_irq_save(flags); + local_irq_save_nort(flags); buf = kmap_atomic(page); /* do the actual data transfer */ @@ -872,7 +872,7 @@ next_sg: count, rw); kunmap_atomic(buf); - local_irq_restore(flags); + local_irq_restore_nort(flags); } else { buf = page_address(page); consumed = ap->ops->sff_data_xfer(dev, buf + offset, diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c index 4b5cd3a7b2b6..fa8329ad79fd 100644 --- a/drivers/block/zram/zcomp.c +++ b/drivers/block/zram/zcomp.c @@ -118,12 +118,19 @@ ssize_t zcomp_available_show(const char *comp, char *buf) struct zcomp_strm *zcomp_stream_get(struct zcomp *comp) { - return *get_cpu_ptr(comp->stream); + struct zcomp_strm *zstrm; + + zstrm = *this_cpu_ptr(comp->stream); + spin_lock(&zstrm->zcomp_lock); + return zstrm; } void zcomp_stream_put(struct zcomp *comp) { - put_cpu_ptr(comp->stream); + struct zcomp_strm *zstrm; + + zstrm = *this_cpu_ptr(comp->stream); + spin_unlock(&zstrm->zcomp_lock); } int zcomp_compress(struct zcomp_strm *zstrm, @@ -174,6 +181,7 @@ static int __zcomp_cpu_notifier(struct zcomp *comp, pr_err("Can't allocate a compression stream\n"); return NOTIFY_BAD; } + spin_lock_init(&zstrm->zcomp_lock); *per_cpu_ptr(comp->stream, cpu) = zstrm; break; case CPU_DEAD: diff --git a/drivers/block/zram/zcomp.h b/drivers/block/zram/zcomp.h index 478cac2ed465..f7a6efdc3285 100644 --- a/drivers/block/zram/zcomp.h +++ b/drivers/block/zram/zcomp.h @@ -14,6 +14,7 @@ struct zcomp_strm { /* compression/decompression buffer */ void *buffer; struct crypto_comp *tfm; + spinlock_t zcomp_lock; }; /* dynamic per-device compression frontend */ diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index f874e2a529f4..42642fcc4f3e 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -528,6 +528,8 @@ static struct zram_meta *zram_meta_alloc(char *pool_name, u64 disksize) goto out_error; } + zram_meta_init_table_locks(meta, disksize); + return meta; out_error: @@ -575,28 +577,28 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index) struct zram_meta *meta = zram->meta; unsigned long handle; unsigned int size; + struct zcomp_strm *zstrm; - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); + zram_lock_table(&meta->table[index]); handle = meta->table[index].handle; size = zram_get_obj_size(meta, index); if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) { - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); + zram_unlock_table(&meta->table[index]); clear_page(mem); return 0; } + zstrm = zcomp_stream_get(zram->comp); cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_RO); if (size == PAGE_SIZE) { memcpy(mem, cmem, PAGE_SIZE); } else { - struct zcomp_strm *zstrm = zcomp_stream_get(zram->comp); - ret = zcomp_decompress(zstrm, cmem, size, mem); - zcomp_stream_put(zram->comp); } zs_unmap_object(meta->mem_pool, handle); - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); + zcomp_stream_put(zram->comp); + zram_unlock_table(&meta->table[index]); /* Should NEVER happen. Return bio error if it does. */ if (unlikely(ret)) { @@ -616,14 +618,14 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, struct zram_meta *meta = zram->meta; page = bvec->bv_page; - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); + zram_lock_table(&meta->table[index]); if (unlikely(!meta->table[index].handle) || zram_test_flag(meta, index, ZRAM_ZERO)) { - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); + zram_unlock_table(&meta->table[index]); handle_zero_page(bvec); return 0; } - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); + zram_unlock_table(&meta->table[index]); if (is_partial_io(bvec)) /* Use a temporary buffer to decompress the page */ @@ -700,10 +702,10 @@ compress_again: if (user_mem) kunmap_atomic(user_mem); /* Free memory associated with this sector now. */ - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); + zram_lock_table(&meta->table[index]); zram_free_page(zram, index); zram_set_flag(meta, index, ZRAM_ZERO); - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); + zram_unlock_table(&meta->table[index]); atomic64_inc(&zram->stats.zero_pages); ret = 0; @@ -794,12 +796,12 @@ compress_again: * Free memory associated with this sector * before overwriting unused sectors. */ - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); + zram_lock_table(&meta->table[index]); zram_free_page(zram, index); meta->table[index].handle = handle; zram_set_obj_size(meta, index, clen); - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); + zram_unlock_table(&meta->table[index]); /* Update stats */ atomic64_add(clen, &zram->stats.compr_data_size); @@ -842,9 +844,9 @@ static void zram_bio_discard(struct zram *zram, u32 index, } while (n >= PAGE_SIZE) { - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); + zram_lock_table(&meta->table[index]); zram_free_page(zram, index); - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); + zram_unlock_table(&meta->table[index]); atomic64_inc(&zram->stats.notify_free); index++; n -= PAGE_SIZE; @@ -973,9 +975,9 @@ static void zram_slot_free_notify(struct block_device *bdev, zram = bdev->bd_disk->private_data; meta = zram->meta; - bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); + zram_lock_table(&meta->table[index]); zram_free_page(zram, index); - bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); + zram_unlock_table(&meta->table[index]); atomic64_inc(&zram->stats.notify_free); } diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index 74fcf10da374..fd4020c99b9e 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -73,6 +73,9 @@ enum zram_pageflags { struct zram_table_entry { unsigned long handle; unsigned long value; +#ifdef CONFIG_PREEMPT_RT_BASE + spinlock_t lock; +#endif }; struct zram_stats { @@ -120,4 +123,42 @@ struct zram { */ bool claim; /* Protected by bdev->bd_mutex */ }; + +#ifndef CONFIG_PREEMPT_RT_BASE +static inline void zram_lock_table(struct zram_table_entry *table) +{ + bit_spin_lock(ZRAM_ACCESS, &table->value); +} + +static inline void zram_unlock_table(struct zram_table_entry *table) +{ + bit_spin_unlock(ZRAM_ACCESS, &table->value); +} + +static inline void zram_meta_init_table_locks(struct zram_meta *meta, u64 disksize) { } +#else /* CONFIG_PREEMPT_RT_BASE */ +static inline void zram_lock_table(struct zram_table_entry *table) +{ + spin_lock(&table->lock); + __set_bit(ZRAM_ACCESS, &table->value); +} + +static inline void zram_unlock_table(struct zram_table_entry *table) +{ + __clear_bit(ZRAM_ACCESS, &table->value); + spin_unlock(&table->lock); +} + +static inline void zram_meta_init_table_locks(struct zram_meta *meta, u64 disksize) +{ + size_t num_pages = disksize >> PAGE_SHIFT; + size_t index; + + for (index = 0; index < num_pages; index++) { + spinlock_t *lock = &meta->table[index].lock; + spin_lock_init(lock); + } +} +#endif /* CONFIG_PREEMPT_RT_BASE */ + #endif diff --git a/drivers/char/random.c b/drivers/char/random.c index 19d659d2ea3c..1c2f900da395 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1028,8 +1028,6 @@ static void add_timer_randomness(struct timer_rand_state *state, unsigned num) } sample; long delta, delta2, delta3; - preempt_disable(); - sample.jiffies = jiffies; sample.cycles = random_get_entropy(); sample.num = num; @@ -1070,7 +1068,6 @@ static void add_timer_randomness(struct timer_rand_state *state, unsigned num) */ credit_entropy_bits(r, min_t(int, fls(delta>>1), 11)); } - preempt_enable(); } void add_input_randomness(unsigned int type, unsigned int code, @@ -1123,28 +1120,27 @@ static __u32 get_reg(struct fast_pool *f, struct pt_regs *regs) return *(ptr + f->reg_idx++); } -void add_interrupt_randomness(int irq, int irq_flags) +void add_interrupt_randomness(int irq, int irq_flags, __u64 ip) { struct entropy_store *r; struct fast_pool *fast_pool = this_cpu_ptr(&irq_randomness); - struct pt_regs *regs = get_irq_regs(); unsigned long now = jiffies; cycles_t cycles = random_get_entropy(); __u32 c_high, j_high; - __u64 ip; unsigned long seed; int credit = 0; if (cycles == 0) - cycles = get_reg(fast_pool, regs); + cycles = get_reg(fast_pool, NULL); c_high = (sizeof(cycles) > 4) ? cycles >> 32 : 0; j_high = (sizeof(now) > 4) ? now >> 32 : 0; fast_pool->pool[0] ^= cycles ^ j_high ^ irq; fast_pool->pool[1] ^= now ^ c_high; - ip = regs ? instruction_pointer(regs) : _RET_IP_; + if (!ip) + ip = _RET_IP_; fast_pool->pool[2] ^= ip; fast_pool->pool[3] ^= (sizeof(ip) > 4) ? ip >> 32 : - get_reg(fast_pool, regs); + get_reg(fast_pool, NULL); fast_mix(fast_pool); add_interrupt_bench(cycles); diff --git a/drivers/clocksource/tcb_clksrc.c b/drivers/clocksource/tcb_clksrc.c index 4da2af9694a2..5b6f57f500b8 100644 --- a/drivers/clocksource/tcb_clksrc.c +++ b/drivers/clocksource/tcb_clksrc.c @@ -23,8 +23,7 @@ * this 32 bit free-running counter. the second channel is not used. * * - The third channel may be used to provide a 16-bit clockevent - * source, used in either periodic or oneshot mode. This runs - * at 32 KiHZ, and can handle delays of up to two seconds. + * source, used in either periodic or oneshot mode. * * A boot clocksource and clockevent source are also currently needed, * unless the relevant platforms (ARM/AT91, AVR32/AT32) are changed so @@ -74,6 +73,8 @@ static struct clocksource clksrc = { struct tc_clkevt_device { struct clock_event_device clkevt; struct clk *clk; + bool clk_enabled; + u32 freq; void __iomem *regs; }; @@ -82,15 +83,26 @@ static struct tc_clkevt_device *to_tc_clkevt(struct clock_event_device *clkevt) return container_of(clkevt, struct tc_clkevt_device, clkevt); } -/* For now, we always use the 32K clock ... this optimizes for NO_HZ, - * because using one of the divided clocks would usually mean the - * tick rate can never be less than several dozen Hz (vs 0.5 Hz). - * - * A divided clock could be good for high resolution timers, since - * 30.5 usec resolution can seem "low". - */ static u32 timer_clock; +static void tc_clk_disable(struct clock_event_device *d) +{ + struct tc_clkevt_device *tcd = to_tc_clkevt(d); + + clk_disable(tcd->clk); + tcd->clk_enabled = false; +} + +static void tc_clk_enable(struct clock_event_device *d) +{ + struct tc_clkevt_device *tcd = to_tc_clkevt(d); + + if (tcd->clk_enabled) + return; + clk_enable(tcd->clk); + tcd->clk_enabled = true; +} + static int tc_shutdown(struct clock_event_device *d) { struct tc_clkevt_device *tcd = to_tc_clkevt(d); @@ -98,8 +110,14 @@ static int tc_shutdown(struct clock_event_device *d) __raw_writel(0xff, regs + ATMEL_TC_REG(2, IDR)); __raw_writel(ATMEL_TC_CLKDIS, regs + ATMEL_TC_REG(2, CCR)); + return 0; +} + +static int tc_shutdown_clk_off(struct clock_event_device *d) +{ + tc_shutdown(d); if (!clockevent_state_detached(d)) - clk_disable(tcd->clk); + tc_clk_disable(d); return 0; } @@ -112,9 +130,9 @@ static int tc_set_oneshot(struct clock_event_device *d) if (clockevent_state_oneshot(d) || clockevent_state_periodic(d)) tc_shutdown(d); - clk_enable(tcd->clk); + tc_clk_enable(d); - /* slow clock, count up to RC, then irq and stop */ + /* count up to RC, then irq and stop */ __raw_writel(timer_clock | ATMEL_TC_CPCSTOP | ATMEL_TC_WAVE | ATMEL_TC_WAVESEL_UP_AUTO, regs + ATMEL_TC_REG(2, CMR)); __raw_writel(ATMEL_TC_CPCS, regs + ATMEL_TC_REG(2, IER)); @@ -134,12 +152,12 @@ static int tc_set_periodic(struct clock_event_device *d) /* By not making the gentime core emulate periodic mode on top * of oneshot, we get lower overhead and improved accuracy. */ - clk_enable(tcd->clk); + tc_clk_enable(d); - /* slow clock, count up to RC, then irq and restart */ + /* count up to RC, then irq and restart */ __raw_writel(timer_clock | ATMEL_TC_WAVE | ATMEL_TC_WAVESEL_UP_AUTO, regs + ATMEL_TC_REG(2, CMR)); - __raw_writel((32768 + HZ / 2) / HZ, tcaddr + ATMEL_TC_REG(2, RC)); + __raw_writel((tcd->freq + HZ / 2) / HZ, tcaddr + ATMEL_TC_REG(2, RC)); /* Enable clock and interrupts on RC compare */ __raw_writel(ATMEL_TC_CPCS, regs + ATMEL_TC_REG(2, IER)); @@ -166,9 +184,13 @@ static struct tc_clkevt_device clkevt = { .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, /* Should be lower than at91rm9200's system timer */ +#ifdef CONFIG_ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK .rating = 125, +#else + .rating = 200, +#endif .set_next_event = tc_next_event, - .set_state_shutdown = tc_shutdown, + .set_state_shutdown = tc_shutdown_clk_off, .set_state_periodic = tc_set_periodic, .set_state_oneshot = tc_set_oneshot, }, @@ -188,8 +210,9 @@ static irqreturn_t ch2_irq(int irq, void *handle) return IRQ_NONE; } -static int __init setup_clkevents(struct atmel_tc *tc, int clk32k_divisor_idx) +static int __init setup_clkevents(struct atmel_tc *tc, int divisor_idx) { + unsigned divisor = atmel_tc_divisors[divisor_idx]; int ret; struct clk *t2_clk = tc->clk[2]; int irq = tc->irq[2]; @@ -210,7 +233,11 @@ static int __init setup_clkevents(struct atmel_tc *tc, int clk32k_divisor_idx) clkevt.regs = tc->regs; clkevt.clk = t2_clk; - timer_clock = clk32k_divisor_idx; + timer_clock = divisor_idx; + if (!divisor) + clkevt.freq = 32768; + else + clkevt.freq = clk_get_rate(t2_clk) / divisor; clkevt.clkevt.cpumask = cpumask_of(0); @@ -221,7 +248,7 @@ static int __init setup_clkevents(struct atmel_tc *tc, int clk32k_divisor_idx) return ret; } - clockevents_config_and_register(&clkevt.clkevt, 32768, 1, 0xffff); + clockevents_config_and_register(&clkevt.clkevt, clkevt.freq, 1, 0xffff); return ret; } @@ -358,7 +385,11 @@ static int __init tcb_clksrc_init(void) goto err_disable_t1; /* channel 2: periodic and oneshot timer support */ +#ifdef CONFIG_ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK ret = setup_clkevents(tc, clk32k_divisor_idx); +#else + ret = setup_clkevents(tc, best_divisor_idx); +#endif if (ret) goto err_unregister_clksrc; diff --git a/drivers/clocksource/timer-atmel-pit.c b/drivers/clocksource/timer-atmel-pit.c index 7f0f5b26d8c5..1553f19e73e7 100644 --- a/drivers/clocksource/timer-atmel-pit.c +++ b/drivers/clocksource/timer-atmel-pit.c @@ -46,6 +46,7 @@ struct pit_data { u32 cycle; u32 cnt; unsigned int irq; + bool irq_requested; struct clk *mck; }; @@ -96,15 +97,29 @@ static int pit_clkevt_shutdown(struct clock_event_device *dev) /* disable irq, leaving the clocksource active */ pit_write(data->base, AT91_PIT_MR, (data->cycle - 1) | AT91_PIT_PITEN); + if (data->irq_requested) { + free_irq(data->irq, data); + data->irq_requested = false; + } return 0; } +static irqreturn_t at91sam926x_pit_interrupt(int irq, void *dev_id); /* * Clockevent device: interrupts every 1/HZ (== pit_cycles * MCK/16) */ static int pit_clkevt_set_periodic(struct clock_event_device *dev) { struct pit_data *data = clkevt_to_pit_data(dev); + int ret; + + ret = request_irq(data->irq, at91sam926x_pit_interrupt, + IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL, + "at91_tick", data); + if (ret) + panic(pr_fmt("Unable to setup IRQ\n")); + + data->irq_requested = true; /* update clocksource counter */ data->cnt += data->cycle * PIT_PICNT(pit_read(data->base, AT91_PIT_PIVR)); @@ -211,15 +226,6 @@ static int __init at91sam926x_pit_common_init(struct pit_data *data) return ret; } - /* Set up irq handler */ - ret = request_irq(data->irq, at91sam926x_pit_interrupt, - IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL, - "at91_tick", data); - if (ret) { - pr_err("Unable to setup IRQ\n"); - return ret; - } - /* Set up and register clockevents */ data->clkevt.name = "pit"; data->clkevt.features = CLOCK_EVT_FEAT_PERIODIC; diff --git a/drivers/clocksource/timer-atmel-st.c b/drivers/clocksource/timer-atmel-st.c index e90ab5b63a90..9e124087c55f 100644 --- a/drivers/clocksource/timer-atmel-st.c +++ b/drivers/clocksource/timer-atmel-st.c @@ -115,18 +115,29 @@ static void clkdev32k_disable_and_flush_irq(void) last_crtr = read_CRTR(); } +static int atmel_st_irq; + static int clkevt32k_shutdown(struct clock_event_device *evt) { clkdev32k_disable_and_flush_irq(); irqmask = 0; regmap_write(regmap_st, AT91_ST_IER, irqmask); + free_irq(atmel_st_irq, regmap_st); return 0; } static int clkevt32k_set_oneshot(struct clock_event_device *dev) { + int ret; + clkdev32k_disable_and_flush_irq(); + ret = request_irq(atmel_st_irq, at91rm9200_timer_interrupt, + IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL, + "at91_tick", regmap_st); + if (ret) + panic(pr_fmt("Unable to setup IRQ\n")); + /* * ALM for oneshot irqs, set by next_event() * before 32 seconds have passed. @@ -139,8 +150,16 @@ static int clkevt32k_set_oneshot(struct clock_event_device *dev) static int clkevt32k_set_periodic(struct clock_event_device *dev) { + int ret; + clkdev32k_disable_and_flush_irq(); + ret = request_irq(atmel_st_irq, at91rm9200_timer_interrupt, + IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL, + "at91_tick", regmap_st); + if (ret) + panic(pr_fmt("Unable to setup IRQ\n")); + /* PIT for periodic irqs; fixed rate of 1/HZ */ irqmask = AT91_ST_PITS; regmap_write(regmap_st, AT91_ST_PIMR, timer_latch); @@ -198,7 +217,7 @@ static int __init atmel_st_timer_init(struct device_node *node) { struct clk *sclk; unsigned int sclk_rate, val; - int irq, ret; + int ret; regmap_st = syscon_node_to_regmap(node); if (IS_ERR(regmap_st)) { @@ -212,21 +231,12 @@ static int __init atmel_st_timer_init(struct device_node *node) regmap_read(regmap_st, AT91_ST_SR, &val); /* Get the interrupts property */ - irq = irq_of_parse_and_map(node, 0); - if (!irq) { + atmel_st_irq = irq_of_parse_and_map(node, 0); + if (!atmel_st_irq) { pr_err("Unable to get IRQ from DT\n"); return -EINVAL; } - /* Make IRQs happen for the system timer */ - ret = request_irq(irq, at91rm9200_timer_interrupt, - IRQF_SHARED | IRQF_TIMER | IRQF_IRQPOLL, - "at91_tick", regmap_st); - if (ret) { - pr_err("Unable to setup IRQ\n"); - return ret; - } - sclk = of_clk_get(node, 0); if (IS_ERR(sclk)) { pr_err("Unable to get slow clock\n"); diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c index a782ce87715c..19d265948526 100644 --- a/drivers/connector/cn_proc.c +++ b/drivers/connector/cn_proc.c @@ -32,6 +32,7 @@ #include <linux/pid_namespace.h> #include <linux/cn_proc.h> +#include <linux/locallock.h> /* * Size of a cn_msg followed by a proc_event structure. Since the @@ -54,10 +55,11 @@ static struct cb_id cn_proc_event_id = { CN_IDX_PROC, CN_VAL_PROC }; /* proc_event_counts is used as the sequence number of the netlink message */ static DEFINE_PER_CPU(__u32, proc_event_counts) = { 0 }; +static DEFINE_LOCAL_IRQ_LOCK(send_msg_lock); static inline void send_msg(struct cn_msg *msg) { - preempt_disable(); + local_lock(send_msg_lock); msg->seq = __this_cpu_inc_return(proc_event_counts) - 1; ((struct proc_event *)msg->data)->cpu = smp_processor_id(); @@ -70,7 +72,7 @@ static inline void send_msg(struct cn_msg *msg) */ cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_NOWAIT); - preempt_enable(); + local_unlock(send_msg_lock); } void proc_fork_connector(struct task_struct *task) diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86 index adbd1de1cea5..1fac5074f2cf 100644 --- a/drivers/cpufreq/Kconfig.x86 +++ b/drivers/cpufreq/Kconfig.x86 @@ -124,7 +124,7 @@ config X86_POWERNOW_K7_ACPI config X86_POWERNOW_K8 tristate "AMD Opteron/Athlon64 PowerNow!" - depends on ACPI && ACPI_PROCESSOR && X86_ACPI_CPUFREQ + depends on ACPI && ACPI_PROCESSOR && X86_ACPI_CPUFREQ && !PREEMPT_RT_BASE help This adds the CPUFreq driver for K8/early Opteron/Athlon64 processors. Support for K10 and newer processors is now in acpi-cpufreq. diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 20caeeacba4d..d66914acd544 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1302,7 +1302,9 @@ i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params, if (ret) return ret; +#ifndef CONFIG_PREEMPT_RT_BASE trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags); +#endif i915_gem_execbuffer_move_to_active(vmas, params->request); diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index 6f10b421487b..dd3a9a6ace11 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -40,7 +40,7 @@ static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task) if (!mutex_is_locked(mutex)) return false; -#if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_MUTEX_SPIN_ON_OWNER) +#if (defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_MUTEX_SPIN_ON_OWNER)) && !defined(CONFIG_PREEMPT_RT_BASE) return mutex->owner == task; #else /* Since UP may be pre-empted, we cannot assume that we own the lock */ diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 234b29b80a19..6265aa1f2aeb 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -812,6 +812,7 @@ static int i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe, spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */ + preempt_disable_rt(); /* Get optional system timestamp before query. */ if (stime) @@ -863,6 +864,7 @@ static int i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe, *etime = ktime_get(); /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */ + preempt_enable_rt(); spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index fcdadc43a560..f9b8ae155b40 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -11649,7 +11649,7 @@ void intel_check_page_flip(struct drm_i915_private *dev_priv, int pipe) struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_flip_work *work; - WARN_ON(!in_interrupt()); + WARN_ON_NONRT(!in_interrupt()); if (crtc == NULL) return; diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index 4178849631ad..0eb939c92544 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -38,6 +38,7 @@ #include "intel_drv.h" #include <drm/i915_drm.h> #include "i915_drv.h" +#include <linux/locallock.h> static bool format_is_yuv(uint32_t format) @@ -64,6 +65,8 @@ int intel_usecs_to_scanlines(const struct drm_display_mode *adjusted_mode, 1000 * adjusted_mode->crtc_htotal); } +static DEFINE_LOCAL_IRQ_LOCK(pipe_update_lock); + /** * intel_pipe_update_start() - start update of a set of display registers * @crtc: the crtc of which the registers are going to be updated @@ -94,7 +97,7 @@ void intel_pipe_update_start(struct intel_crtc *crtc) min = vblank_start - intel_usecs_to_scanlines(adjusted_mode, 100); max = vblank_start - 1; - local_irq_disable(); + local_lock_irq(pipe_update_lock); if (min <= 0 || max <= 0) return; @@ -124,11 +127,11 @@ void intel_pipe_update_start(struct intel_crtc *crtc) break; } - local_irq_enable(); + local_unlock_irq(pipe_update_lock); timeout = schedule_timeout(timeout); - local_irq_disable(); + local_lock_irq(pipe_update_lock); } finish_wait(wq, &wait); @@ -180,7 +183,7 @@ void intel_pipe_update_end(struct intel_crtc *crtc, struct intel_flip_work *work crtc->base.state->event = NULL; } - local_irq_enable(); + local_unlock_irq(pipe_update_lock); if (crtc->debug.start_vbl_count && crtc->debug.start_vbl_count != end_vbl_count) { diff --git a/drivers/gpu/drm/msm/msm_gem_shrinker.c b/drivers/gpu/drm/msm/msm_gem_shrinker.c index 283d2841ba58..d01f6ed1977e 100644 --- a/drivers/gpu/drm/msm/msm_gem_shrinker.c +++ b/drivers/gpu/drm/msm/msm_gem_shrinker.c @@ -23,7 +23,7 @@ static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task) if (!mutex_is_locked(mutex)) return false; -#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_MUTEXES) +#if (defined(CONFIG_SMP) || defined(CONFIG_DEBUG_MUTEXES)) && !defined(CONFIG_PREEMPT_RT_BASE) return mutex->owner == task; #else /* Since UP may be pre-empted, we cannot assume that we own the lock */ diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index c3206fb8f4cf..6e2423186e2a 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -1869,6 +1869,7 @@ int radeon_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe, struct radeon_device *rdev = dev->dev_private; /* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */ + preempt_disable_rt(); /* Get optional system timestamp before query. */ if (stime) @@ -1961,6 +1962,7 @@ int radeon_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe, *etime = ktime_get(); /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */ + preempt_enable_rt(); /* Decode into vertical and horizontal scanout position. */ *vpos = position & 0x1fff; diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index e82f7e1c217c..b57d917b6ab7 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -761,6 +761,8 @@ static void vmbus_isr(void) void *page_addr; struct hv_message *msg; union hv_synic_event_flags *event; + struct pt_regs *regs = get_irq_regs(); + u64 ip = regs ? instruction_pointer(regs) : 0; bool handled = false; page_addr = hv_context.synic_event_page[cpu]; @@ -808,7 +810,7 @@ static void vmbus_isr(void) tasklet_schedule(hv_context.msg_dpc[cpu]); } - add_interrupt_randomness(HYPERVISOR_CALLBACK_VECTOR, 0); + add_interrupt_randomness(HYPERVISOR_CALLBACK_VECTOR, 0, ip); } diff --git a/drivers/ide/alim15x3.c b/drivers/ide/alim15x3.c index 36f76e28a0bf..394f142f90c7 100644 --- a/drivers/ide/alim15x3.c +++ b/drivers/ide/alim15x3.c @@ -234,7 +234,7 @@ static int init_chipset_ali15x3(struct pci_dev *dev) isa_dev = pci_get_device(PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M1533, NULL); - local_irq_save(flags); + local_irq_save_nort(flags); if (m5229_revision < 0xC2) { /* @@ -325,7 +325,7 @@ out: } pci_dev_put(north); pci_dev_put(isa_dev); - local_irq_restore(flags); + local_irq_restore_nort(flags); return 0; } diff --git a/drivers/ide/hpt366.c b/drivers/ide/hpt366.c index 0ceae5cbd89a..c212e85d7f3e 100644 --- a/drivers/ide/hpt366.c +++ b/drivers/ide/hpt366.c @@ -1236,7 +1236,7 @@ static int init_dma_hpt366(ide_hwif_t *hwif, dma_old = inb(base + 2); - local_irq_save(flags); + local_irq_save_nort(flags); dma_new = dma_old; pci_read_config_byte(dev, hwif->channel ? 0x4b : 0x43, &masterdma); @@ -1247,7 +1247,7 @@ static int init_dma_hpt366(ide_hwif_t *hwif, if (dma_new != dma_old) outb(dma_new, base + 2); - local_irq_restore(flags); + local_irq_restore_nort(flags); printk(KERN_INFO " %s: BM-DMA at 0x%04lx-0x%04lx\n", hwif->name, base, base + 7); diff --git a/drivers/ide/ide-io-std.c b/drivers/ide/ide-io-std.c index 19763977568c..4169433faab5 100644 --- a/drivers/ide/ide-io-std.c +++ b/drivers/ide/ide-io-std.c @@ -175,7 +175,7 @@ void ide_input_data(ide_drive_t *drive, struct ide_cmd *cmd, void *buf, unsigned long uninitialized_var(flags); if ((io_32bit & 2) && !mmio) { - local_irq_save(flags); + local_irq_save_nort(flags); ata_vlb_sync(io_ports->nsect_addr); } @@ -186,7 +186,7 @@ void ide_input_data(ide_drive_t *drive, struct ide_cmd *cmd, void *buf, insl(data_addr, buf, words); if ((io_32bit & 2) && !mmio) - local_irq_restore(flags); + local_irq_restore_nort(flags); if (((len + 1) & 3) < 2) return; @@ -219,7 +219,7 @@ void ide_output_data(ide_drive_t *drive, struct ide_cmd *cmd, void *buf, unsigned long uninitialized_var(flags); if ((io_32bit & 2) && !mmio) { - local_irq_save(flags); + local_irq_save_nort(flags); ata_vlb_sync(io_ports->nsect_addr); } @@ -230,7 +230,7 @@ void ide_output_data(ide_drive_t *drive, struct ide_cmd *cmd, void *buf, outsl(data_addr, buf, words); if ((io_32bit & 2) && !mmio) - local_irq_restore(flags); + local_irq_restore_nort(flags); if (((len + 1) & 3) < 2) return; diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index 669ea1e45795..e12e43e62245 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -659,7 +659,7 @@ void ide_timer_expiry (unsigned long data) /* disable_irq_nosync ?? */ disable_irq(hwif->irq); /* local CPU only, as if we were handling an interrupt */ - local_irq_disable(); + local_irq_disable_nort(); if (hwif->polling) { startstop = handler(drive); } else if (drive_is_ready(drive)) { diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c index 376f2dc410c5..f014dd1b73dc 100644 --- a/drivers/ide/ide-iops.c +++ b/drivers/ide/ide-iops.c @@ -129,12 +129,12 @@ int __ide_wait_stat(ide_drive_t *drive, u8 good, u8 bad, if ((stat & ATA_BUSY) == 0) break; - local_irq_restore(flags); + local_irq_restore_nort(flags); *rstat = stat; return -EBUSY; } } - local_irq_restore(flags); + local_irq_restore_nort(flags); } /* * Allow status to settle, then read it again. diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index 0b63facd1d87..4ceba37afc0c 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -196,10 +196,10 @@ static void do_identify(ide_drive_t *drive, u8 cmd, u16 *id) int bswap = 1; /* local CPU only; some systems need this */ - local_irq_save(flags); + local_irq_save_nort(flags); /* read 512 bytes of id info */ hwif->tp_ops->input_data(drive, NULL, id, SECTOR_SIZE); - local_irq_restore(flags); + local_irq_restore_nort(flags); drive->dev_flags |= IDE_DFLAG_ID_READ; #ifdef DEBUG diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c index a716693417a3..be0568c722d6 100644 --- a/drivers/ide/ide-taskfile.c +++ b/drivers/ide/ide-taskfile.c @@ -250,7 +250,7 @@ void ide_pio_bytes(ide_drive_t *drive, struct ide_cmd *cmd, page_is_high = PageHighMem(page); if (page_is_high) - local_irq_save(flags); + local_irq_save_nort(flags); buf = kmap_atomic(page) + offset; @@ -271,7 +271,7 @@ void ide_pio_bytes(ide_drive_t *drive, struct ide_cmd *cmd, kunmap_atomic(buf); if (page_is_high) - local_irq_restore(flags); + local_irq_restore_nort(flags); len -= nr_bytes; } @@ -414,7 +414,7 @@ static ide_startstop_t pre_task_out_intr(ide_drive_t *drive, } if ((drive->dev_flags & IDE_DFLAG_UNMASK) == 0) - local_irq_disable(); + local_irq_disable_nort(); ide_set_handler(drive, &task_pio_intr, WAIT_WORSTCASE); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index fddff403d5d2..cca1bb4fbfe3 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -902,7 +902,7 @@ void ipoib_mcast_restart_task(struct work_struct *work) ipoib_dbg_mcast(priv, "restarting multicast task\n"); - local_irq_save(flags); + local_irq_save_nort(flags); netif_addr_lock(dev); spin_lock(&priv->lock); @@ -984,7 +984,7 @@ void ipoib_mcast_restart_task(struct work_struct *work) spin_unlock(&priv->lock); netif_addr_unlock(dev); - local_irq_restore(flags); + local_irq_restore_nort(flags); /* * make sure the in-flight joins have finished before we attempt diff --git a/drivers/input/gameport/gameport.c b/drivers/input/gameport/gameport.c index 4a2a9e370be7..e970d9afd179 100644 --- a/drivers/input/gameport/gameport.c +++ b/drivers/input/gameport/gameport.c @@ -91,13 +91,13 @@ static int gameport_measure_speed(struct gameport *gameport) tx = ~0; for (i = 0; i < 50; i++) { - local_irq_save(flags); + local_irq_save_nort(flags); t1 = ktime_get_ns(); for (t = 0; t < 50; t++) gameport_read(gameport); t2 = ktime_get_ns(); t3 = ktime_get_ns(); - local_irq_restore(flags); + local_irq_restore_nort(flags); udelay(i * 10); t = (t2 - t1) - (t3 - t2); if (t < tx) @@ -124,12 +124,12 @@ static int old_gameport_measure_speed(struct gameport *gameport) tx = 1 << 30; for(i = 0; i < 50; i++) { - local_irq_save(flags); + local_irq_save_nort(flags); GET_TIME(t1); for (t = 0; t < 50; t++) gameport_read(gameport); GET_TIME(t2); GET_TIME(t3); - local_irq_restore(flags); + local_irq_restore_nort(flags); udelay(i * 10); if ((t = DELTA(t2,t1) - DELTA(t3,t2)) < tx) tx = t; } @@ -148,11 +148,11 @@ static int old_gameport_measure_speed(struct gameport *gameport) tx = 1 << 30; for(i = 0; i < 50; i++) { - local_irq_save(flags); + local_irq_save_nort(flags); t1 = rdtsc(); for (t = 0; t < 50; t++) gameport_read(gameport); t2 = rdtsc(); - local_irq_restore(flags); + local_irq_restore_nort(flags); udelay(i * 10); if (t2 - t1 < tx) tx = t2 - t1; } diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 62e5e64e3919..ca127b8c8fea 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -1854,10 +1854,10 @@ static int __attach_device(struct iommu_dev_data *dev_data, int ret; /* - * Must be called with IRQs disabled. Warn here to detect early - * when its not. + * Must be called with IRQs disabled on a non RT kernel. Warn here to + * detect early when its not. */ - WARN_ON(!irqs_disabled()); + WARN_ON_NONRT(!irqs_disabled()); /* lock domain */ spin_lock(&domain->lock); @@ -2025,10 +2025,10 @@ static void __detach_device(struct iommu_dev_data *dev_data) struct protection_domain *domain; /* - * Must be called with IRQs disabled. Warn here to detect early - * when its not. + * Must be called with IRQs disabled on a non RT kernel. Warn here to + * detect early when its not. */ - WARN_ON(!irqs_disabled()); + WARN_ON_NONRT(!irqs_disabled()); if (WARN_ON(!dev_data->domain)) return; diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index f88b2bbc3b56..ad432b5ebf07 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -479,7 +479,7 @@ struct deferred_flush_data { struct deferred_flush_table *tables; }; -DEFINE_PER_CPU(struct deferred_flush_data, deferred_flush); +static DEFINE_PER_CPU(struct deferred_flush_data, deferred_flush); /* bitmap for indexing intel_iommus */ static int g_num_of_iommus; @@ -3669,10 +3669,8 @@ static void add_unmap(struct dmar_domain *dom, unsigned long iova_pfn, struct intel_iommu *iommu; struct deferred_flush_entry *entry; struct deferred_flush_data *flush_data; - unsigned int cpuid; - cpuid = get_cpu(); - flush_data = per_cpu_ptr(&deferred_flush, cpuid); + flush_data = raw_cpu_ptr(&deferred_flush); /* Flush all CPUs' entries to avoid deferring too much. If * this becomes a bottleneck, can just flush us, and rely on @@ -3705,8 +3703,6 @@ static void add_unmap(struct dmar_domain *dom, unsigned long iova_pfn, } flush_data->size++; spin_unlock_irqrestore(&flush_data->lock, flags); - - put_cpu(); } static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index e23001bfcfee..359d5d169ec0 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -22,6 +22,7 @@ #include <linux/slab.h> #include <linux/smp.h> #include <linux/bitops.h> +#include <linux/cpu.h> static bool iova_rcache_insert(struct iova_domain *iovad, unsigned long pfn, @@ -420,10 +421,8 @@ retry: /* Try replenishing IOVAs by flushing rcache. */ flushed_rcache = true; - preempt_disable(); for_each_online_cpu(cpu) free_cpu_cached_iovas(cpu, iovad); - preempt_enable(); goto retry; } @@ -751,7 +750,7 @@ static bool __iova_rcache_insert(struct iova_domain *iovad, bool can_insert = false; unsigned long flags; - cpu_rcache = get_cpu_ptr(rcache->cpu_rcaches); + cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches); spin_lock_irqsave(&cpu_rcache->lock, flags); if (!iova_magazine_full(cpu_rcache->loaded)) { @@ -781,7 +780,6 @@ static bool __iova_rcache_insert(struct iova_domain *iovad, iova_magazine_push(cpu_rcache->loaded, iova_pfn); spin_unlock_irqrestore(&cpu_rcache->lock, flags); - put_cpu_ptr(rcache->cpu_rcaches); if (mag_to_free) { iova_magazine_free_pfns(mag_to_free, iovad); @@ -815,7 +813,7 @@ static unsigned long __iova_rcache_get(struct iova_rcache *rcache, bool has_pfn = false; unsigned long flags; - cpu_rcache = get_cpu_ptr(rcache->cpu_rcaches); + cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches); spin_lock_irqsave(&cpu_rcache->lock, flags); if (!iova_magazine_empty(cpu_rcache->loaded)) { @@ -837,7 +835,6 @@ static unsigned long __iova_rcache_get(struct iova_rcache *rcache, iova_pfn = iova_magazine_pop(cpu_rcache->loaded, limit_pfn); spin_unlock_irqrestore(&cpu_rcache->lock, flags); - put_cpu_ptr(rcache->cpu_rcaches); return iova_pfn; } diff --git a/drivers/leds/trigger/Kconfig b/drivers/leds/trigger/Kconfig index 3f9ddb9fafa7..09da5b6b44a1 100644 --- a/drivers/leds/trigger/Kconfig +++ b/drivers/leds/trigger/Kconfig @@ -69,7 +69,7 @@ config LEDS_TRIGGER_BACKLIGHT config LEDS_TRIGGER_CPU bool "LED CPU Trigger" - depends on LEDS_TRIGGERS + depends on LEDS_TRIGGERS && !PREEMPT_RT_BASE help This allows LEDs to be controlled by active CPUs. This shows the active CPUs across an array of LEDs so you can see which diff --git a/drivers/md/bcache/Kconfig b/drivers/md/bcache/Kconfig index 4d200883c505..98b64ed5cb81 100644 --- a/drivers/md/bcache/Kconfig +++ b/drivers/md/bcache/Kconfig @@ -1,6 +1,7 @@ config BCACHE tristate "Block device as cache" + depends on !PREEMPT_RT_FULL ---help--- Allows a block device to be used as cache for other devices; uses a btree for indexing and the layout is optimized for SSDs. diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index f8e03da6858d..175d184b40b3 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -821,7 +821,7 @@ static void dm_old_request_fn(struct request_queue *q) /* Establish tio->ti before queuing work (map_tio_request) */ tio->ti = ti; queue_kthread_work(&md->kworker, &tio->work); - BUG_ON(!irqs_disabled()); + BUG_ON_NONRT(!irqs_disabled()); } } diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index a87549be8e53..c9aa37234e46 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -1928,8 +1928,9 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request) struct raid5_percpu *percpu; unsigned long cpu; - cpu = get_cpu(); + cpu = get_cpu_light(); percpu = per_cpu_ptr(conf->percpu, cpu); + spin_lock(&percpu->lock); if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) { ops_run_biofill(sh); overlap_clear++; @@ -1985,7 +1986,8 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request) if (test_and_clear_bit(R5_Overlap, &dev->flags)) wake_up(&sh->raid_conf->wait_for_overlap); } - put_cpu(); + spin_unlock(&percpu->lock); + put_cpu_light(); } static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp, @@ -6438,6 +6440,7 @@ static int raid5_alloc_percpu(struct r5conf *conf) __func__, cpu); break; } + spin_lock_init(&per_cpu_ptr(conf->percpu, cpu)->lock); } put_online_cpus(); diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 517d4b68a1be..efe91887ecd7 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h @@ -504,6 +504,7 @@ struct r5conf { int recovery_disabled; /* per cpu variables */ struct raid5_percpu { + spinlock_t lock; /* Protection for -RT */ struct page *spare_page; /* Used when checking P/Q in raid6 */ struct flex_array *scribble; /* space for constructing buffer * lists and performing address diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index d00252828966..9faab404faac 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -54,6 +54,7 @@ config AD525X_DPOT_SPI config ATMEL_TCLIB bool "Atmel AT32/AT91 Timer/Counter Library" depends on (AVR32 || ARCH_AT91) + default y if PREEMPT_RT_FULL help Select this if you want a library to allocate the Timer/Counter blocks found on many Atmel processors. This facilitates using @@ -69,8 +70,7 @@ config ATMEL_TCB_CLKSRC are combined to make a single 32-bit timer. When GENERIC_CLOCKEVENTS is defined, the third timer channel - may be used as a clock event device supporting oneshot mode - (delays of up to two seconds) based on the 32 KiHz clock. + may be used as a clock event device supporting oneshot mode. config ATMEL_TCB_CLKSRC_BLOCK int @@ -84,6 +84,15 @@ config ATMEL_TCB_CLKSRC_BLOCK TC can be used for other purposes, such as PWM generation and interval timing. +config ATMEL_TCB_CLKSRC_USE_SLOW_CLOCK + bool "TC Block use 32 KiHz clock" + depends on ATMEL_TCB_CLKSRC + default y if !PREEMPT_RT_FULL + help + Select this to use 32 KiHz base clock rate as TC block clock + source for clock events. + + config DUMMY_IRQ tristate "Dummy IRQ handler" default n @@ -114,6 +123,35 @@ config IBM_ASM for information on the specific driver level and support statement for your IBM server. +config HWLAT_DETECTOR + tristate "Testing module to detect hardware-induced latencies" + depends on DEBUG_FS + depends on RING_BUFFER + default m + ---help--- + A simple hardware latency detector. Use this module to detect + large latencies introduced by the behavior of the underlying + system firmware external to Linux. We do this using periodic + use of stop_machine to grab all available CPUs and measure + for unexplainable gaps in the CPU timestamp counter(s). By + default, the module is not enabled until the "enable" file + within the "hwlat_detector" debugfs directory is toggled. + + This module is often used to detect SMI (System Management + Interrupts) on x86 systems, though is not x86 specific. To + this end, we default to using a sample window of 1 second, + during which we will sample for 0.5 seconds. If an SMI or + similar event occurs during that time, it is recorded + into an 8K samples global ring buffer until retreived. + + WARNING: This software should never be enabled (it can be built + but should not be turned on after it is loaded) in a production + environment where high latencies are a concern since the + sampling mechanism actually introduces latencies for + regular tasks while the CPU(s) are being held. + + If unsure, say N + config PHANTOM tristate "Sensable PHANToM (PCI)" depends on PCI diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile index fb32516ddfe2..8643df9af3c4 100644 --- a/drivers/misc/Makefile +++ b/drivers/misc/Makefile @@ -38,6 +38,7 @@ obj-$(CONFIG_C2PORT) += c2port/ obj-$(CONFIG_HMC6352) += hmc6352.o obj-y += eeprom/ obj-y += cb710/ +obj-$(CONFIG_HWLAT_DETECTOR) += hwlat_detector.o obj-$(CONFIG_SPEAR13XX_PCIE_GADGET) += spear13xx_pcie_gadget.o obj-$(CONFIG_VMWARE_BALLOON) += vmw_balloon.o obj-$(CONFIG_ARM_CHARLCD) += arm-charlcd.o diff --git a/drivers/misc/hwlat_detector.c b/drivers/misc/hwlat_detector.c new file mode 100644 index 000000000000..52f5ad5fd9c0 --- /dev/null +++ b/drivers/misc/hwlat_detector.c @@ -0,0 +1,1240 @@ +/* + * hwlat_detector.c - A simple Hardware Latency detector. + * + * Use this module to detect large system latencies induced by the behavior of + * certain underlying system hardware or firmware, independent of Linux itself. + * The code was developed originally to detect the presence of SMIs on Intel + * and AMD systems, although there is no dependency upon x86 herein. + * + * The classical example usage of this module is in detecting the presence of + * SMIs or System Management Interrupts on Intel and AMD systems. An SMI is a + * somewhat special form of hardware interrupt spawned from earlier CPU debug + * modes in which the (BIOS/EFI/etc.) firmware arranges for the South Bridge + * LPC (or other device) to generate a special interrupt under certain + * circumstances, for example, upon expiration of a special SMI timer device, + * due to certain external thermal readings, on certain I/O address accesses, + * and other situations. An SMI hits a special CPU pin, triggers a special + * SMI mode (complete with special memory map), and the OS is unaware. + * + * Although certain hardware-inducing latencies are necessary (for example, + * a modern system often requires an SMI handler for correct thermal control + * and remote management) they can wreak havoc upon any OS-level performance + * guarantees toward low-latency, especially when the OS is not even made + * aware of the presence of these interrupts. For this reason, we need a + * somewhat brute force mechanism to detect these interrupts. In this case, + * we do it by hogging all of the CPU(s) for configurable timer intervals, + * sampling the built-in CPU timer, looking for discontiguous readings. + * + * WARNING: This implementation necessarily introduces latencies. Therefore, + * you should NEVER use this module in a production environment + * requiring any kind of low-latency performance guarantee(s). + * + * Copyright (C) 2008-2009 Jon Masters, Red Hat, Inc. <jcm@redhat.com> + * + * Includes useful feedback from Clark Williams <clark@redhat.com> + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/ring_buffer.h> +#include <linux/time.h> +#include <linux/hrtimer.h> +#include <linux/kthread.h> +#include <linux/debugfs.h> +#include <linux/seq_file.h> +#include <linux/uaccess.h> +#include <linux/version.h> +#include <linux/delay.h> +#include <linux/slab.h> +#include <linux/trace_clock.h> + +#define BUF_SIZE_DEFAULT 262144UL /* 8K*(sizeof(entry)) */ +#define BUF_FLAGS (RB_FL_OVERWRITE) /* no block on full */ +#define U64STR_SIZE 22 /* 20 digits max */ + +#define VERSION "1.0.0" +#define BANNER "hwlat_detector: " +#define DRVNAME "hwlat_detector" +#define DEFAULT_SAMPLE_WINDOW 1000000 /* 1s */ +#define DEFAULT_SAMPLE_WIDTH 500000 /* 0.5s */ +#define DEFAULT_LAT_THRESHOLD 10 /* 10us */ + +/* Module metadata */ + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Jon Masters <jcm@redhat.com>"); +MODULE_DESCRIPTION("A simple hardware latency detector"); +MODULE_VERSION(VERSION); + +/* Module parameters */ + +static int debug; +static int enabled; +static int threshold; + +module_param(debug, int, 0); /* enable debug */ +module_param(enabled, int, 0); /* enable detector */ +module_param(threshold, int, 0); /* latency threshold */ + +/* Buffering and sampling */ + +static struct ring_buffer *ring_buffer; /* sample buffer */ +static DEFINE_MUTEX(ring_buffer_mutex); /* lock changes */ +static unsigned long buf_size = BUF_SIZE_DEFAULT; +static struct task_struct *kthread; /* sampling thread */ + +/* DebugFS filesystem entries */ + +static struct dentry *debug_dir; /* debugfs directory */ +static struct dentry *debug_max; /* maximum TSC delta */ +static struct dentry *debug_count; /* total detect count */ +static struct dentry *debug_sample_width; /* sample width us */ +static struct dentry *debug_sample_window; /* sample window us */ +static struct dentry *debug_sample; /* raw samples us */ +static struct dentry *debug_threshold; /* threshold us */ +static struct dentry *debug_enable; /* enable/disable */ + +/* Individual samples and global state */ + +struct sample; /* latency sample */ +struct data; /* Global state */ + +/* Sampling functions */ +static int __buffer_add_sample(struct sample *sample); +static struct sample *buffer_get_sample(struct sample *sample); + +/* Threading and state */ +static int kthread_fn(void *unused); +static int start_kthread(void); +static int stop_kthread(void); +static void __reset_stats(void); +static int init_stats(void); + +/* Debugfs interface */ +static ssize_t simple_data_read(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos, const u64 *entry); +static ssize_t simple_data_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos, u64 *entry); +static int debug_sample_fopen(struct inode *inode, struct file *filp); +static ssize_t debug_sample_fread(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos); +static int debug_sample_release(struct inode *inode, struct file *filp); +static int debug_enable_fopen(struct inode *inode, struct file *filp); +static ssize_t debug_enable_fread(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos); +static ssize_t debug_enable_fwrite(struct file *file, + const char __user *user_buffer, + size_t user_size, loff_t *offset); + +/* Initialization functions */ +static int init_debugfs(void); +static void free_debugfs(void); +static int detector_init(void); +static void detector_exit(void); + +/* Individual latency samples are stored here when detected and packed into + * the ring_buffer circular buffer, where they are overwritten when + * more than buf_size/sizeof(sample) samples are received. */ +struct sample { + u64 seqnum; /* unique sequence */ + u64 duration; /* ktime delta */ + u64 outer_duration; /* ktime delta (outer loop) */ + struct timespec timestamp; /* wall time */ + unsigned long lost; +}; + +/* keep the global state somewhere. */ +static struct data { + + struct mutex lock; /* protect changes */ + + u64 count; /* total since reset */ + u64 max_sample; /* max hardware latency */ + u64 threshold; /* sample threshold level */ + + u64 sample_window; /* total sampling window (on+off) */ + u64 sample_width; /* active sampling portion of window */ + + atomic_t sample_open; /* whether the sample file is open */ + + wait_queue_head_t wq; /* waitqeue for new sample values */ + +} data; + +/** + * __buffer_add_sample - add a new latency sample recording to the ring buffer + * @sample: The new latency sample value + * + * This receives a new latency sample and records it in a global ring buffer. + * No additional locking is used in this case. + */ +static int __buffer_add_sample(struct sample *sample) +{ + return ring_buffer_write(ring_buffer, + sizeof(struct sample), sample); +} + +/** + * buffer_get_sample - remove a hardware latency sample from the ring buffer + * @sample: Pre-allocated storage for the sample + * + * This retrieves a hardware latency sample from the global circular buffer + */ +static struct sample *buffer_get_sample(struct sample *sample) +{ + struct ring_buffer_event *e = NULL; + struct sample *s = NULL; + unsigned int cpu = 0; + + if (!sample) + return NULL; + + mutex_lock(&ring_buffer_mutex); + for_each_online_cpu(cpu) { + e = ring_buffer_consume(ring_buffer, cpu, NULL, &sample->lost); + if (e) + break; + } + + if (e) { + s = ring_buffer_event_data(e); + memcpy(sample, s, sizeof(struct sample)); + } else + sample = NULL; + mutex_unlock(&ring_buffer_mutex); + + return sample; +} + +#ifndef CONFIG_TRACING +#define time_type ktime_t +#define time_get() ktime_get() +#define time_to_us(x) ktime_to_us(x) +#define time_sub(a, b) ktime_sub(a, b) +#define init_time(a, b) (a).tv64 = b +#define time_u64(a) ((a).tv64) +#else +#define time_type u64 +#define time_get() trace_clock_local() +#define time_to_us(x) div_u64(x, 1000) +#define time_sub(a, b) ((a) - (b)) +#define init_time(a, b) (a = b) +#define time_u64(a) a +#endif +/** + * get_sample - sample the CPU TSC and look for likely hardware latencies + * + * Used to repeatedly capture the CPU TSC (or similar), looking for potential + * hardware-induced latency. Called with interrupts disabled and with + * data.lock held. + */ +static int get_sample(void) +{ + time_type start, t1, t2, last_t2; + s64 diff, total = 0; + u64 sample = 0; + u64 outer_sample = 0; + int ret = -1; + + init_time(last_t2, 0); + start = time_get(); /* start timestamp */ + + do { + + t1 = time_get(); /* we'll look for a discontinuity */ + t2 = time_get(); + + if (time_u64(last_t2)) { + /* Check the delta from outer loop (t2 to next t1) */ + diff = time_to_us(time_sub(t1, last_t2)); + /* This shouldn't happen */ + if (diff < 0) { + pr_err(BANNER "time running backwards\n"); + goto out; + } + if (diff > outer_sample) + outer_sample = diff; + } + last_t2 = t2; + + total = time_to_us(time_sub(t2, start)); /* sample width */ + + /* This checks the inner loop (t1 to t2) */ + diff = time_to_us(time_sub(t2, t1)); /* current diff */ + + /* This shouldn't happen */ + if (diff < 0) { + pr_err(BANNER "time running backwards\n"); + goto out; + } + + if (diff > sample) + sample = diff; /* only want highest value */ + + } while (total <= data.sample_width); + + ret = 0; + + /* If we exceed the threshold value, we have found a hardware latency */ + if (sample > data.threshold || outer_sample > data.threshold) { + struct sample s; + + ret = 1; + + data.count++; + s.seqnum = data.count; + s.duration = sample; + s.outer_duration = outer_sample; + s.timestamp = CURRENT_TIME; + __buffer_add_sample(&s); + + /* Keep a running maximum ever recorded hardware latency */ + if (sample > data.max_sample) + data.max_sample = sample; + } + +out: + return ret; +} + +/* + * kthread_fn - The CPU time sampling/hardware latency detection kernel thread + * @unused: A required part of the kthread API. + * + * Used to periodically sample the CPU TSC via a call to get_sample. We + * disable interrupts, which does (intentionally) introduce latency since we + * need to ensure nothing else might be running (and thus pre-empting). + * Obviously this should never be used in production environments. + * + * Currently this runs on which ever CPU it was scheduled on, but most + * real-worald hardware latency situations occur across several CPUs, + * but we might later generalize this if we find there are any actualy + * systems with alternate SMI delivery or other hardware latencies. + */ +static int kthread_fn(void *unused) +{ + int ret; + u64 interval; + + while (!kthread_should_stop()) { + + mutex_lock(&data.lock); + + local_irq_disable(); + ret = get_sample(); + local_irq_enable(); + + if (ret > 0) + wake_up(&data.wq); /* wake up reader(s) */ + + interval = data.sample_window - data.sample_width; + do_div(interval, USEC_PER_MSEC); /* modifies interval value */ + + mutex_unlock(&data.lock); + + if (msleep_interruptible(interval)) + break; + } + + return 0; +} + +/** + * start_kthread - Kick off the hardware latency sampling/detector kthread + * + * This starts a kernel thread that will sit and sample the CPU timestamp + * counter (TSC or similar) and look for potential hardware latencies. + */ +static int start_kthread(void) +{ + kthread = kthread_run(kthread_fn, NULL, + DRVNAME); + if (IS_ERR(kthread)) { + pr_err(BANNER "could not start sampling thread\n"); + enabled = 0; + return -ENOMEM; + } + + return 0; +} + +/** + * stop_kthread - Inform the hardware latency samping/detector kthread to stop + * + * This kicks the running hardware latency sampling/detector kernel thread and + * tells it to stop sampling now. Use this on unload and at system shutdown. + */ +static int stop_kthread(void) +{ + int ret; + + ret = kthread_stop(kthread); + + return ret; +} + +/** + * __reset_stats - Reset statistics for the hardware latency detector + * + * We use data to store various statistics and global state. We call this + * function in order to reset those when "enable" is toggled on or off, and + * also at initialization. Should be called with data.lock held. + */ +static void __reset_stats(void) +{ + data.count = 0; + data.max_sample = 0; + ring_buffer_reset(ring_buffer); /* flush out old sample entries */ +} + +/** + * init_stats - Setup global state statistics for the hardware latency detector + * + * We use data to store various statistics and global state. We also use + * a global ring buffer (ring_buffer) to keep raw samples of detected hardware + * induced system latencies. This function initializes these structures and + * allocates the global ring buffer also. + */ +static int init_stats(void) +{ + int ret = -ENOMEM; + + mutex_init(&data.lock); + init_waitqueue_head(&data.wq); + atomic_set(&data.sample_open, 0); + + ring_buffer = ring_buffer_alloc(buf_size, BUF_FLAGS); + + if (WARN(!ring_buffer, KERN_ERR BANNER + "failed to allocate ring buffer!\n")) + goto out; + + __reset_stats(); + data.threshold = threshold ?: DEFAULT_LAT_THRESHOLD; /* threshold us */ + data.sample_window = DEFAULT_SAMPLE_WINDOW; /* window us */ + data.sample_width = DEFAULT_SAMPLE_WIDTH; /* width us */ + + ret = 0; + +out: + return ret; + +} + +/* + * simple_data_read - Wrapper read function for global state debugfs entries + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The userspace provided buffer to read value into + * @cnt: The maximum number of bytes to read + * @ppos: The current "file" position + * @entry: The entry to read from + * + * This function provides a generic read implementation for the global state + * "data" structure debugfs filesystem entries. It would be nice to use + * simple_attr_read directly, but we need to make sure that the data.lock + * is held during the actual read. + */ +static ssize_t simple_data_read(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos, const u64 *entry) +{ + char buf[U64STR_SIZE]; + u64 val = 0; + int len = 0; + + memset(buf, 0, sizeof(buf)); + + if (!entry) + return -EFAULT; + + mutex_lock(&data.lock); + val = *entry; + mutex_unlock(&data.lock); + + len = snprintf(buf, sizeof(buf), "%llu\n", (unsigned long long)val); + + return simple_read_from_buffer(ubuf, cnt, ppos, buf, len); + +} + +/* + * simple_data_write - Wrapper write function for global state debugfs entries + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The userspace provided buffer to write value from + * @cnt: The maximum number of bytes to write + * @ppos: The current "file" position + * @entry: The entry to write to + * + * This function provides a generic write implementation for the global state + * "data" structure debugfs filesystem entries. It would be nice to use + * simple_attr_write directly, but we need to make sure that the data.lock + * is held during the actual write. + */ +static ssize_t simple_data_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos, u64 *entry) +{ + char buf[U64STR_SIZE]; + int csize = min(cnt, sizeof(buf)); + u64 val = 0; + int err = 0; + + memset(buf, '\0', sizeof(buf)); + if (copy_from_user(buf, ubuf, csize)) + return -EFAULT; + + buf[U64STR_SIZE-1] = '\0'; /* just in case */ + err = kstrtoull(buf, 10, &val); + if (err) + return -EINVAL; + + mutex_lock(&data.lock); + *entry = val; + mutex_unlock(&data.lock); + + return csize; +} + +/** + * debug_count_fopen - Open function for "count" debugfs entry + * @inode: The in-kernel inode representation of the debugfs "file" + * @filp: The active open file structure for the debugfs "file" + * + * This function provides an open implementation for the "count" debugfs + * interface to the hardware latency detector. + */ +static int debug_count_fopen(struct inode *inode, struct file *filp) +{ + return 0; +} + +/** + * debug_count_fread - Read function for "count" debugfs entry + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The userspace provided buffer to read value into + * @cnt: The maximum number of bytes to read + * @ppos: The current "file" position + * + * This function provides a read implementation for the "count" debugfs + * interface to the hardware latency detector. Can be used to read the + * number of latency readings exceeding the configured threshold since + * the detector was last reset (e.g. by writing a zero into "count"). + */ +static ssize_t debug_count_fread(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + return simple_data_read(filp, ubuf, cnt, ppos, &data.count); +} + +/** + * debug_count_fwrite - Write function for "count" debugfs entry + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The user buffer that contains the value to write + * @cnt: The maximum number of bytes to write to "file" + * @ppos: The current position in the debugfs "file" + * + * This function provides a write implementation for the "count" debugfs + * interface to the hardware latency detector. Can be used to write a + * desired value, especially to zero the total count. + */ +static ssize_t debug_count_fwrite(struct file *filp, + const char __user *ubuf, + size_t cnt, + loff_t *ppos) +{ + return simple_data_write(filp, ubuf, cnt, ppos, &data.count); +} + +/** + * debug_enable_fopen - Dummy open function for "enable" debugfs interface + * @inode: The in-kernel inode representation of the debugfs "file" + * @filp: The active open file structure for the debugfs "file" + * + * This function provides an open implementation for the "enable" debugfs + * interface to the hardware latency detector. + */ +static int debug_enable_fopen(struct inode *inode, struct file *filp) +{ + return 0; +} + +/** + * debug_enable_fread - Read function for "enable" debugfs interface + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The userspace provided buffer to read value into + * @cnt: The maximum number of bytes to read + * @ppos: The current "file" position + * + * This function provides a read implementation for the "enable" debugfs + * interface to the hardware latency detector. Can be used to determine + * whether the detector is currently enabled ("0\n" or "1\n" returned). + */ +static ssize_t debug_enable_fread(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char buf[4]; + + if ((cnt < sizeof(buf)) || (*ppos)) + return 0; + + buf[0] = enabled ? '1' : '0'; + buf[1] = '\n'; + buf[2] = '\0'; + if (copy_to_user(ubuf, buf, strlen(buf))) + return -EFAULT; + return *ppos = strlen(buf); +} + +/** + * debug_enable_fwrite - Write function for "enable" debugfs interface + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The user buffer that contains the value to write + * @cnt: The maximum number of bytes to write to "file" + * @ppos: The current position in the debugfs "file" + * + * This function provides a write implementation for the "enable" debugfs + * interface to the hardware latency detector. Can be used to enable or + * disable the detector, which will have the side-effect of possibly + * also resetting the global stats and kicking off the measuring + * kthread (on an enable) or the converse (upon a disable). + */ +static ssize_t debug_enable_fwrite(struct file *filp, + const char __user *ubuf, + size_t cnt, + loff_t *ppos) +{ + char buf[4]; + int csize = min(cnt, sizeof(buf)); + long val = 0; + int err = 0; + + memset(buf, '\0', sizeof(buf)); + if (copy_from_user(buf, ubuf, csize)) + return -EFAULT; + + buf[sizeof(buf)-1] = '\0'; /* just in case */ + err = kstrtoul(buf, 10, &val); + if (err) + return -EINVAL; + + if (val) { + if (enabled) + goto unlock; + enabled = 1; + __reset_stats(); + if (start_kthread()) + return -EFAULT; + } else { + if (!enabled) + goto unlock; + enabled = 0; + err = stop_kthread(); + if (err) { + pr_err(BANNER "cannot stop kthread\n"); + return -EFAULT; + } + wake_up(&data.wq); /* reader(s) should return */ + } +unlock: + return csize; +} + +/** + * debug_max_fopen - Open function for "max" debugfs entry + * @inode: The in-kernel inode representation of the debugfs "file" + * @filp: The active open file structure for the debugfs "file" + * + * This function provides an open implementation for the "max" debugfs + * interface to the hardware latency detector. + */ +static int debug_max_fopen(struct inode *inode, struct file *filp) +{ + return 0; +} + +/** + * debug_max_fread - Read function for "max" debugfs entry + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The userspace provided buffer to read value into + * @cnt: The maximum number of bytes to read + * @ppos: The current "file" position + * + * This function provides a read implementation for the "max" debugfs + * interface to the hardware latency detector. Can be used to determine + * the maximum latency value observed since it was last reset. + */ +static ssize_t debug_max_fread(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + return simple_data_read(filp, ubuf, cnt, ppos, &data.max_sample); +} + +/** + * debug_max_fwrite - Write function for "max" debugfs entry + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The user buffer that contains the value to write + * @cnt: The maximum number of bytes to write to "file" + * @ppos: The current position in the debugfs "file" + * + * This function provides a write implementation for the "max" debugfs + * interface to the hardware latency detector. Can be used to reset the + * maximum or set it to some other desired value - if, then, subsequent + * measurements exceed this value, the maximum will be updated. + */ +static ssize_t debug_max_fwrite(struct file *filp, + const char __user *ubuf, + size_t cnt, + loff_t *ppos) +{ + return simple_data_write(filp, ubuf, cnt, ppos, &data.max_sample); +} + + +/** + * debug_sample_fopen - An open function for "sample" debugfs interface + * @inode: The in-kernel inode representation of this debugfs "file" + * @filp: The active open file structure for the debugfs "file" + * + * This function handles opening the "sample" file within the hardware + * latency detector debugfs directory interface. This file is used to read + * raw samples from the global ring_buffer and allows the user to see a + * running latency history. Can be opened blocking or non-blocking, + * affecting whether it behaves as a buffer read pipe, or does not. + * Implements simple locking to prevent multiple simultaneous use. + */ +static int debug_sample_fopen(struct inode *inode, struct file *filp) +{ + if (!atomic_add_unless(&data.sample_open, 1, 1)) + return -EBUSY; + else + return 0; +} + +/** + * debug_sample_fread - A read function for "sample" debugfs interface + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The user buffer that will contain the samples read + * @cnt: The maximum bytes to read from the debugfs "file" + * @ppos: The current position in the debugfs "file" + * + * This function handles reading from the "sample" file within the hardware + * latency detector debugfs directory interface. This file is used to read + * raw samples from the global ring_buffer and allows the user to see a + * running latency history. By default this will block pending a new + * value written into the sample buffer, unless there are already a + * number of value(s) waiting in the buffer, or the sample file was + * previously opened in a non-blocking mode of operation. + */ +static ssize_t debug_sample_fread(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + int len = 0; + char buf[64]; + struct sample *sample = NULL; + + if (!enabled) + return 0; + + sample = kzalloc(sizeof(struct sample), GFP_KERNEL); + if (!sample) + return -ENOMEM; + + while (!buffer_get_sample(sample)) { + + DEFINE_WAIT(wait); + + if (filp->f_flags & O_NONBLOCK) { + len = -EAGAIN; + goto out; + } + + prepare_to_wait(&data.wq, &wait, TASK_INTERRUPTIBLE); + schedule(); + finish_wait(&data.wq, &wait); + + if (signal_pending(current)) { + len = -EINTR; + goto out; + } + + if (!enabled) { /* enable was toggled */ + len = 0; + goto out; + } + } + + len = snprintf(buf, sizeof(buf), "%010lu.%010lu\t%llu\t%llu\n", + sample->timestamp.tv_sec, + sample->timestamp.tv_nsec, + sample->duration, + sample->outer_duration); + + + /* handling partial reads is more trouble than it's worth */ + if (len > cnt) + goto out; + + if (copy_to_user(ubuf, buf, len)) + len = -EFAULT; + +out: + kfree(sample); + return len; +} + +/** + * debug_sample_release - Release function for "sample" debugfs interface + * @inode: The in-kernel inode represenation of the debugfs "file" + * @filp: The active open file structure for the debugfs "file" + * + * This function completes the close of the debugfs interface "sample" file. + * Frees the sample_open "lock" so that other users may open the interface. + */ +static int debug_sample_release(struct inode *inode, struct file *filp) +{ + atomic_dec(&data.sample_open); + + return 0; +} + +/** + * debug_threshold_fopen - Open function for "threshold" debugfs entry + * @inode: The in-kernel inode representation of the debugfs "file" + * @filp: The active open file structure for the debugfs "file" + * + * This function provides an open implementation for the "threshold" debugfs + * interface to the hardware latency detector. + */ +static int debug_threshold_fopen(struct inode *inode, struct file *filp) +{ + return 0; +} + +/** + * debug_threshold_fread - Read function for "threshold" debugfs entry + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The userspace provided buffer to read value into + * @cnt: The maximum number of bytes to read + * @ppos: The current "file" position + * + * This function provides a read implementation for the "threshold" debugfs + * interface to the hardware latency detector. It can be used to determine + * the current threshold level at which a latency will be recorded in the + * global ring buffer, typically on the order of 10us. + */ +static ssize_t debug_threshold_fread(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + return simple_data_read(filp, ubuf, cnt, ppos, &data.threshold); +} + +/** + * debug_threshold_fwrite - Write function for "threshold" debugfs entry + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The user buffer that contains the value to write + * @cnt: The maximum number of bytes to write to "file" + * @ppos: The current position in the debugfs "file" + * + * This function provides a write implementation for the "threshold" debugfs + * interface to the hardware latency detector. It can be used to configure + * the threshold level at which any subsequently detected latencies will + * be recorded into the global ring buffer. + */ +static ssize_t debug_threshold_fwrite(struct file *filp, + const char __user *ubuf, + size_t cnt, + loff_t *ppos) +{ + int ret; + + ret = simple_data_write(filp, ubuf, cnt, ppos, &data.threshold); + + if (enabled) + wake_up_process(kthread); + + return ret; +} + +/** + * debug_width_fopen - Open function for "width" debugfs entry + * @inode: The in-kernel inode representation of the debugfs "file" + * @filp: The active open file structure for the debugfs "file" + * + * This function provides an open implementation for the "width" debugfs + * interface to the hardware latency detector. + */ +static int debug_width_fopen(struct inode *inode, struct file *filp) +{ + return 0; +} + +/** + * debug_width_fread - Read function for "width" debugfs entry + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The userspace provided buffer to read value into + * @cnt: The maximum number of bytes to read + * @ppos: The current "file" position + * + * This function provides a read implementation for the "width" debugfs + * interface to the hardware latency detector. It can be used to determine + * for how many us of the total window us we will actively sample for any + * hardware-induced latecy periods. Obviously, it is not possible to + * sample constantly and have the system respond to a sample reader, or, + * worse, without having the system appear to have gone out to lunch. + */ +static ssize_t debug_width_fread(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + return simple_data_read(filp, ubuf, cnt, ppos, &data.sample_width); +} + +/** + * debug_width_fwrite - Write function for "width" debugfs entry + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The user buffer that contains the value to write + * @cnt: The maximum number of bytes to write to "file" + * @ppos: The current position in the debugfs "file" + * + * This function provides a write implementation for the "width" debugfs + * interface to the hardware latency detector. It can be used to configure + * for how many us of the total window us we will actively sample for any + * hardware-induced latency periods. Obviously, it is not possible to + * sample constantly and have the system respond to a sample reader, or, + * worse, without having the system appear to have gone out to lunch. It + * is enforced that width is less that the total window size. + */ +static ssize_t debug_width_fwrite(struct file *filp, + const char __user *ubuf, + size_t cnt, + loff_t *ppos) +{ + char buf[U64STR_SIZE]; + int csize = min(cnt, sizeof(buf)); + u64 val = 0; + int err = 0; + + memset(buf, '\0', sizeof(buf)); + if (copy_from_user(buf, ubuf, csize)) + return -EFAULT; + + buf[U64STR_SIZE-1] = '\0'; /* just in case */ + err = kstrtoull(buf, 10, &val); + if (err) + return -EINVAL; + + mutex_lock(&data.lock); + if (val < data.sample_window) + data.sample_width = val; + else { + mutex_unlock(&data.lock); + return -EINVAL; + } + mutex_unlock(&data.lock); + + if (enabled) + wake_up_process(kthread); + + return csize; +} + +/** + * debug_window_fopen - Open function for "window" debugfs entry + * @inode: The in-kernel inode representation of the debugfs "file" + * @filp: The active open file structure for the debugfs "file" + * + * This function provides an open implementation for the "window" debugfs + * interface to the hardware latency detector. The window is the total time + * in us that will be considered one sample period. Conceptually, windows + * occur back-to-back and contain a sample width period during which + * actual sampling occurs. + */ +static int debug_window_fopen(struct inode *inode, struct file *filp) +{ + return 0; +} + +/** + * debug_window_fread - Read function for "window" debugfs entry + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The userspace provided buffer to read value into + * @cnt: The maximum number of bytes to read + * @ppos: The current "file" position + * + * This function provides a read implementation for the "window" debugfs + * interface to the hardware latency detector. The window is the total time + * in us that will be considered one sample period. Conceptually, windows + * occur back-to-back and contain a sample width period during which + * actual sampling occurs. Can be used to read the total window size. + */ +static ssize_t debug_window_fread(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + return simple_data_read(filp, ubuf, cnt, ppos, &data.sample_window); +} + +/** + * debug_window_fwrite - Write function for "window" debugfs entry + * @filp: The active open file structure for the debugfs "file" + * @ubuf: The user buffer that contains the value to write + * @cnt: The maximum number of bytes to write to "file" + * @ppos: The current position in the debugfs "file" + * + * This function provides a write implementation for the "window" debufds + * interface to the hardware latency detetector. The window is the total time + * in us that will be considered one sample period. Conceptually, windows + * occur back-to-back and contain a sample width period during which + * actual sampling occurs. Can be used to write a new total window size. It + * is enfoced that any value written must be greater than the sample width + * size, or an error results. + */ +static ssize_t debug_window_fwrite(struct file *filp, + const char __user *ubuf, + size_t cnt, + loff_t *ppos) +{ + char buf[U64STR_SIZE]; + int csize = min(cnt, sizeof(buf)); + u64 val = 0; + int err = 0; + + memset(buf, '\0', sizeof(buf)); + if (copy_from_user(buf, ubuf, csize)) + return -EFAULT; + + buf[U64STR_SIZE-1] = '\0'; /* just in case */ + err = kstrtoull(buf, 10, &val); + if (err) + return -EINVAL; + + mutex_lock(&data.lock); + if (data.sample_width < val) + data.sample_window = val; + else { + mutex_unlock(&data.lock); + return -EINVAL; + } + mutex_unlock(&data.lock); + + return csize; +} + +/* + * Function pointers for the "count" debugfs file operations + */ +static const struct file_operations count_fops = { + .open = debug_count_fopen, + .read = debug_count_fread, + .write = debug_count_fwrite, + .owner = THIS_MODULE, +}; + +/* + * Function pointers for the "enable" debugfs file operations + */ +static const struct file_operations enable_fops = { + .open = debug_enable_fopen, + .read = debug_enable_fread, + .write = debug_enable_fwrite, + .owner = THIS_MODULE, +}; + +/* + * Function pointers for the "max" debugfs file operations + */ +static const struct file_operations max_fops = { + .open = debug_max_fopen, + .read = debug_max_fread, + .write = debug_max_fwrite, + .owner = THIS_MODULE, +}; + +/* + * Function pointers for the "sample" debugfs file operations + */ +static const struct file_operations sample_fops = { + .open = debug_sample_fopen, + .read = debug_sample_fread, + .release = debug_sample_release, + .owner = THIS_MODULE, +}; + +/* + * Function pointers for the "threshold" debugfs file operations + */ +static const struct file_operations threshold_fops = { + .open = debug_threshold_fopen, + .read = debug_threshold_fread, + .write = debug_threshold_fwrite, + .owner = THIS_MODULE, +}; + +/* + * Function pointers for the "width" debugfs file operations + */ +static const struct file_operations width_fops = { + .open = debug_width_fopen, + .read = debug_width_fread, + .write = debug_width_fwrite, + .owner = THIS_MODULE, +}; + +/* + * Function pointers for the "window" debugfs file operations + */ +static const struct file_operations window_fops = { + .open = debug_window_fopen, + .read = debug_window_fread, + .write = debug_window_fwrite, + .owner = THIS_MODULE, +}; + +/** + * init_debugfs - A function to initialize the debugfs interface files + * + * This function creates entries in debugfs for "hwlat_detector", including + * files to read values from the detector, current samples, and the + * maximum sample that has been captured since the hardware latency + * dectector was started. + */ +static int init_debugfs(void) +{ + int ret = -ENOMEM; + + debug_dir = debugfs_create_dir(DRVNAME, NULL); + if (!debug_dir) + goto err_debug_dir; + + debug_sample = debugfs_create_file("sample", 0444, + debug_dir, NULL, + &sample_fops); + if (!debug_sample) + goto err_sample; + + debug_count = debugfs_create_file("count", 0444, + debug_dir, NULL, + &count_fops); + if (!debug_count) + goto err_count; + + debug_max = debugfs_create_file("max", 0444, + debug_dir, NULL, + &max_fops); + if (!debug_max) + goto err_max; + + debug_sample_window = debugfs_create_file("window", 0644, + debug_dir, NULL, + &window_fops); + if (!debug_sample_window) + goto err_window; + + debug_sample_width = debugfs_create_file("width", 0644, + debug_dir, NULL, + &width_fops); + if (!debug_sample_width) + goto err_width; + + debug_threshold = debugfs_create_file("threshold", 0644, + debug_dir, NULL, + &threshold_fops); + if (!debug_threshold) + goto err_threshold; + + debug_enable = debugfs_create_file("enable", 0644, + debug_dir, &enabled, + &enable_fops); + if (!debug_enable) + goto err_enable; + + else { + ret = 0; + goto out; + } + +err_enable: + debugfs_remove(debug_threshold); +err_threshold: + debugfs_remove(debug_sample_width); +err_width: + debugfs_remove(debug_sample_window); +err_window: + debugfs_remove(debug_max); +err_max: + debugfs_remove(debug_count); +err_count: + debugfs_remove(debug_sample); +err_sample: + debugfs_remove(debug_dir); +err_debug_dir: +out: + return ret; +} + +/** + * free_debugfs - A function to cleanup the debugfs file interface + */ +static void free_debugfs(void) +{ + /* could also use a debugfs_remove_recursive */ + debugfs_remove(debug_enable); + debugfs_remove(debug_threshold); + debugfs_remove(debug_sample_width); + debugfs_remove(debug_sample_window); + debugfs_remove(debug_max); + debugfs_remove(debug_count); + debugfs_remove(debug_sample); + debugfs_remove(debug_dir); +} + +/** + * detector_init - Standard module initialization code + */ +static int detector_init(void) +{ + int ret = -ENOMEM; + + pr_info(BANNER "version %s\n", VERSION); + + ret = init_stats(); + if (ret) + goto out; + + ret = init_debugfs(); + if (ret) + goto err_stats; + + if (enabled) + ret = start_kthread(); + + goto out; + +err_stats: + ring_buffer_free(ring_buffer); +out: + return ret; + +} + +/** + * detector_exit - Standard module cleanup code + */ +static void detector_exit(void) +{ + int err; + + if (enabled) { + enabled = 0; + err = stop_kthread(); + if (err) + pr_err(BANNER "cannot stop kthread\n"); + } + + free_debugfs(); + ring_buffer_free(ring_buffer); /* free up the ring buffer */ + +} + +module_init(detector_init); +module_exit(detector_exit); diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c index df990bb8c873..1a162709a85e 100644 --- a/drivers/mmc/host/mmci.c +++ b/drivers/mmc/host/mmci.c @@ -1147,15 +1147,12 @@ static irqreturn_t mmci_pio_irq(int irq, void *dev_id) struct sg_mapping_iter *sg_miter = &host->sg_miter; struct variant_data *variant = host->variant; void __iomem *base = host->base; - unsigned long flags; u32 status; status = readl(base + MMCISTATUS); dev_dbg(mmc_dev(host->mmc), "irq1 (pio) %08x\n", status); - local_irq_save(flags); - do { unsigned int remain, len; char *buffer; @@ -1195,8 +1192,6 @@ static irqreturn_t mmci_pio_irq(int irq, void *dev_id) sg_miter_stop(sg_miter); - local_irq_restore(flags); - /* * If we have less than the fifo 'half-full' threshold to transfer, * trigger a PIO interrupt as soon as any data is available. diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c index 25c55ab05c7d..5a1d117a8744 100644 --- a/drivers/net/ethernet/3com/3c59x.c +++ b/drivers/net/ethernet/3com/3c59x.c @@ -842,9 +842,9 @@ static void poll_vortex(struct net_device *dev) { struct vortex_private *vp = netdev_priv(dev); unsigned long flags; - local_irq_save(flags); + local_irq_save_nort(flags); (vp->full_bus_master_rx ? boomerang_interrupt:vortex_interrupt)(dev->irq,dev); - local_irq_restore(flags); + local_irq_restore_nort(flags); } #endif @@ -1910,12 +1910,12 @@ static void vortex_tx_timeout(struct net_device *dev) * Block interrupts because vortex_interrupt does a bare spin_lock() */ unsigned long flags; - local_irq_save(flags); + local_irq_save_nort(flags); if (vp->full_bus_master_tx) boomerang_interrupt(dev->irq, dev); else vortex_interrupt(dev->irq, dev); - local_irq_restore(flags); + local_irq_restore_nort(flags); } } diff --git a/drivers/net/ethernet/realtek/8139too.c b/drivers/net/ethernet/realtek/8139too.c index da4c2d8a4173..1420dfb56bac 100644 --- a/drivers/net/ethernet/realtek/8139too.c +++ b/drivers/net/ethernet/realtek/8139too.c @@ -2233,7 +2233,7 @@ static void rtl8139_poll_controller(struct net_device *dev) struct rtl8139_private *tp = netdev_priv(dev); const int irq = tp->pci_dev->irq; - disable_irq(irq); + disable_irq_nosync(irq); rtl8139_interrupt(irq, dev); enable_irq(irq); } diff --git a/drivers/net/wireless/intersil/orinoco/orinoco_usb.c b/drivers/net/wireless/intersil/orinoco/orinoco_usb.c index 56f109bc8394..02afc796bc71 100644 --- a/drivers/net/wireless/intersil/orinoco/orinoco_usb.c +++ b/drivers/net/wireless/intersil/orinoco/orinoco_usb.c @@ -697,7 +697,7 @@ static void ezusb_req_ctx_wait(struct ezusb_priv *upriv, while (!ctx->done.done && msecs--) udelay(1000); } else { - wait_event_interruptible(ctx->done.wait, + swait_event_interruptible(ctx->done.wait, ctx->done.done); } break; diff --git a/drivers/pci/access.c b/drivers/pci/access.c index d11cdbb8fba3..223bbb9acb03 100644 --- a/drivers/pci/access.c +++ b/drivers/pci/access.c @@ -672,7 +672,7 @@ void pci_cfg_access_unlock(struct pci_dev *dev) WARN_ON(!dev->block_cfg_access); dev->block_cfg_access = 0; - wake_up_all(&pci_cfg_wait); + wake_up_all_locked(&pci_cfg_wait); raw_spin_unlock_irqrestore(&pci_lock, flags); } EXPORT_SYMBOL_GPL(pci_cfg_access_unlock); diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c index 9bd41a35a78a..8e2d436c2e3f 100644 --- a/drivers/scsi/fcoe/fcoe.c +++ b/drivers/scsi/fcoe/fcoe.c @@ -1455,11 +1455,11 @@ err2: static int fcoe_alloc_paged_crc_eof(struct sk_buff *skb, int tlen) { struct fcoe_percpu_s *fps; - int rc; + int rc, cpu = get_cpu_light(); - fps = &get_cpu_var(fcoe_percpu); + fps = &per_cpu(fcoe_percpu, cpu); rc = fcoe_get_paged_crc_eof(skb, tlen, fps); - put_cpu_var(fcoe_percpu); + put_cpu_light(); return rc; } @@ -1646,11 +1646,11 @@ static inline int fcoe_filter_frames(struct fc_lport *lport, return 0; } - stats = per_cpu_ptr(lport->stats, get_cpu()); + stats = per_cpu_ptr(lport->stats, get_cpu_light()); stats->InvalidCRCCount++; if (stats->InvalidCRCCount < 5) printk(KERN_WARNING "fcoe: dropping frame with CRC error\n"); - put_cpu(); + put_cpu_light(); return -EINVAL; } @@ -1693,7 +1693,7 @@ static void fcoe_recv_frame(struct sk_buff *skb) */ hp = (struct fcoe_hdr *) skb_network_header(skb); - stats = per_cpu_ptr(lport->stats, get_cpu()); + stats = per_cpu_ptr(lport->stats, get_cpu_light()); if (unlikely(FC_FCOE_DECAPS_VER(hp) != FC_FCOE_VER)) { if (stats->ErrorFrames < 5) printk(KERN_WARNING "fcoe: FCoE version " @@ -1725,13 +1725,13 @@ static void fcoe_recv_frame(struct sk_buff *skb) goto drop; if (!fcoe_filter_frames(lport, fp)) { - put_cpu(); + put_cpu_light(); fc_exch_recv(lport, fp); return; } drop: stats->ErrorFrames++; - put_cpu(); + put_cpu_light(); kfree_skb(skb); } diff --git a/drivers/scsi/fcoe/fcoe_ctlr.c b/drivers/scsi/fcoe/fcoe_ctlr.c index dcf36537a767..1a1f2e46452c 100644 --- a/drivers/scsi/fcoe/fcoe_ctlr.c +++ b/drivers/scsi/fcoe/fcoe_ctlr.c @@ -834,7 +834,7 @@ static unsigned long fcoe_ctlr_age_fcfs(struct fcoe_ctlr *fip) INIT_LIST_HEAD(&del_list); - stats = per_cpu_ptr(fip->lp->stats, get_cpu()); + stats = per_cpu_ptr(fip->lp->stats, get_cpu_light()); list_for_each_entry_safe(fcf, next, &fip->fcfs, list) { deadline = fcf->time + fcf->fka_period + fcf->fka_period / 2; @@ -870,7 +870,7 @@ static unsigned long fcoe_ctlr_age_fcfs(struct fcoe_ctlr *fip) sel_time = fcf->time; } } - put_cpu(); + put_cpu_light(); list_for_each_entry_safe(fcf, next, &del_list, list) { /* Removes fcf from current list */ diff --git a/drivers/scsi/libfc/fc_exch.c b/drivers/scsi/libfc/fc_exch.c index e72673b0a8fb..da598a6caa22 100644 --- a/drivers/scsi/libfc/fc_exch.c +++ b/drivers/scsi/libfc/fc_exch.c @@ -814,10 +814,10 @@ static struct fc_exch *fc_exch_em_alloc(struct fc_lport *lport, } memset(ep, 0, sizeof(*ep)); - cpu = get_cpu(); + cpu = get_cpu_light(); pool = per_cpu_ptr(mp->pool, cpu); spin_lock_bh(&pool->lock); - put_cpu(); + put_cpu_light(); /* peek cache of free slot */ if (pool->left != FC_XID_UNKNOWN) { diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c index 87f5e694dbed..23c0a50fb6aa 100644 --- a/drivers/scsi/libsas/sas_ata.c +++ b/drivers/scsi/libsas/sas_ata.c @@ -190,7 +190,7 @@ static unsigned int sas_ata_qc_issue(struct ata_queued_cmd *qc) /* TODO: audit callers to ensure they are ready for qc_issue to * unconditionally re-enable interrupts */ - local_irq_save(flags); + local_irq_save_nort(flags); spin_unlock(ap->lock); /* If the device fell off, no sense in issuing commands */ @@ -252,7 +252,7 @@ static unsigned int sas_ata_qc_issue(struct ata_queued_cmd *qc) out: spin_lock(ap->lock); - local_irq_restore(flags); + local_irq_restore_nort(flags); return ret; } diff --git a/drivers/scsi/qla2xxx/qla_inline.h b/drivers/scsi/qla2xxx/qla_inline.h index edc48f3b8230..ee5c6f9dfb6f 100644 --- a/drivers/scsi/qla2xxx/qla_inline.h +++ b/drivers/scsi/qla2xxx/qla_inline.h @@ -59,12 +59,12 @@ qla2x00_poll(struct rsp_que *rsp) { unsigned long flags; struct qla_hw_data *ha = rsp->hw; - local_irq_save(flags); + local_irq_save_nort(flags); if (IS_P3P_TYPE(ha)) qla82xx_poll(0, rsp); else ha->isp_ops->intr_handler(0, rsp); - local_irq_restore(flags); + local_irq_restore_nort(flags); } static inline uint8_t * diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index 987f1c729e9c..18391e07d70f 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -3125,7 +3125,11 @@ qla24xx_enable_msix(struct qla_hw_data *ha, struct rsp_que *rsp) * kref_put(). */ kref_get(&qentry->irq_notify.kref); +#ifdef CONFIG_PREEMPT_RT_BASE + swork_queue(&qentry->irq_notify.swork); +#else schedule_work(&qentry->irq_notify.work); +#endif } /* diff --git a/drivers/thermal/x86_pkg_temp_thermal.c b/drivers/thermal/x86_pkg_temp_thermal.c index 97f0a2bd93ed..a4f45aaa9ad4 100644 --- a/drivers/thermal/x86_pkg_temp_thermal.c +++ b/drivers/thermal/x86_pkg_temp_thermal.c @@ -29,6 +29,7 @@ #include <linux/pm.h> #include <linux/thermal.h> #include <linux/debugfs.h> +#include <linux/swork.h> #include <asm/cpu_device_id.h> #include <asm/mce.h> @@ -352,7 +353,7 @@ static void pkg_temp_thermal_threshold_work_fn(struct work_struct *work) } } -static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val) +static void platform_thermal_notify_work(struct swork_event *event) { unsigned long flags; int cpu = smp_processor_id(); @@ -369,7 +370,7 @@ static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val) pkg_work_scheduled[phy_id]) { disable_pkg_thres_interrupt(); spin_unlock_irqrestore(&pkg_work_lock, flags); - return -EINVAL; + return; } pkg_work_scheduled[phy_id] = 1; spin_unlock_irqrestore(&pkg_work_lock, flags); @@ -378,9 +379,48 @@ static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val) schedule_delayed_work_on(cpu, &per_cpu(pkg_temp_thermal_threshold_work, cpu), msecs_to_jiffies(notify_delay_ms)); +} + +#ifdef CONFIG_PREEMPT_RT_FULL +static struct swork_event notify_work; + +static int thermal_notify_work_init(void) +{ + int err; + + err = swork_get(); + if (err) + return err; + + INIT_SWORK(¬ify_work, platform_thermal_notify_work); return 0; } +static void thermal_notify_work_cleanup(void) +{ + swork_put(); +} + +static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val) +{ + swork_queue(¬ify_work); + return 0; +} + +#else /* !CONFIG_PREEMPT_RT_FULL */ + +static int thermal_notify_work_init(void) { return 0; } + +static void thermal_notify_work_cleanup(void) { } + +static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val) +{ + platform_thermal_notify_work(NULL); + + return 0; +} +#endif /* CONFIG_PREEMPT_RT_FULL */ + static int find_siblings_cpu(int cpu) { int i; @@ -584,6 +624,9 @@ static int __init pkg_temp_thermal_init(void) if (!x86_match_cpu(pkg_temp_thermal_ids)) return -ENODEV; + if (!thermal_notify_work_init()) + return -ENODEV; + spin_lock_init(&pkg_work_lock); platform_thermal_package_notify = pkg_temp_thermal_platform_thermal_notify; @@ -608,7 +651,7 @@ err_ret: kfree(pkg_work_scheduled); platform_thermal_package_notify = NULL; platform_thermal_package_rate_control = NULL; - + thermal_notify_work_cleanup(); return -ENODEV; } @@ -633,6 +676,7 @@ static void __exit pkg_temp_thermal_exit(void) mutex_unlock(&phy_dev_list_mutex); platform_thermal_package_notify = NULL; platform_thermal_package_rate_control = NULL; + thermal_notify_work_cleanup(); for_each_online_cpu(i) cancel_delayed_work_sync( &per_cpu(pkg_temp_thermal_threshold_work, i)); diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c index fa823a54cf35..491ec453ae8f 100644 --- a/drivers/tty/serial/8250/8250_core.c +++ b/drivers/tty/serial/8250/8250_core.c @@ -58,7 +58,16 @@ static struct uart_driver serial8250_reg; static unsigned int skip_txen_test; /* force skip of txen test at init time */ -#define PASS_LIMIT 512 +/* + * On -rt we can have a more delays, and legitimately + * so - so don't drop work spuriously and spam the + * syslog: + */ +#ifdef CONFIG_PREEMPT_RT_FULL +# define PASS_LIMIT 1000000 +#else +# define PASS_LIMIT 512 +#endif #include <asm/serial.h> /* diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index 922c32137cd7..0da2c2ff9d2d 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -35,6 +35,7 @@ #include <linux/nmi.h> #include <linux/mutex.h> #include <linux/slab.h> +#include <linux/kdb.h> #include <linux/uaccess.h> #include <linux/pm_runtime.h> #include <linux/timer.h> @@ -3109,9 +3110,9 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s, serial8250_rpm_get(up); - if (port->sysrq) + if (port->sysrq || oops_in_progress) locked = 0; - else if (oops_in_progress) + else if (in_kdb_printk()) locked = spin_trylock_irqsave(&port->lock, flags); else spin_lock_irqsave(&port->lock, flags); diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c index 8a9e213387a7..dd1f9a426b74 100644 --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c @@ -2167,13 +2167,19 @@ pl011_console_write(struct console *co, const char *s, unsigned int count) clk_enable(uap->clk); - local_irq_save(flags); + /* + * local_irq_save(flags); + * + * This local_irq_save() is nonsense. If we come in via sysrq + * handling then interrupts are already disabled. Aside of + * that the port.sysrq check is racy on SMP regardless. + */ if (uap->port.sysrq) locked = 0; else if (oops_in_progress) - locked = spin_trylock(&uap->port.lock); + locked = spin_trylock_irqsave(&uap->port.lock, flags); else - spin_lock(&uap->port.lock); + spin_lock_irqsave(&uap->port.lock, flags); /* * First save the CR then disable the interrupts @@ -2197,8 +2203,7 @@ pl011_console_write(struct console *co, const char *s, unsigned int count) pl011_write(old_cr, uap, REG_CR); if (locked) - spin_unlock(&uap->port.lock); - local_irq_restore(flags); + spin_unlock_irqrestore(&uap->port.lock, flags); clk_disable(uap->clk); } diff --git a/drivers/tty/serial/omap-serial.c b/drivers/tty/serial/omap-serial.c index 44e5b5bf713b..400140d1dfff 100644 --- a/drivers/tty/serial/omap-serial.c +++ b/drivers/tty/serial/omap-serial.c @@ -1257,13 +1257,10 @@ serial_omap_console_write(struct console *co, const char *s, pm_runtime_get_sync(up->dev); - local_irq_save(flags); - if (up->port.sysrq) - locked = 0; - else if (oops_in_progress) - locked = spin_trylock(&up->port.lock); + if (up->port.sysrq || oops_in_progress) + locked = spin_trylock_irqsave(&up->port.lock, flags); else - spin_lock(&up->port.lock); + spin_lock_irqsave(&up->port.lock, flags); /* * First save the IER then disable the interrupts @@ -1292,8 +1289,7 @@ serial_omap_console_write(struct console *co, const char *s, pm_runtime_mark_last_busy(up->dev); pm_runtime_put_autosuspend(up->dev); if (locked) - spin_unlock(&up->port.lock); - local_irq_restore(flags); + spin_unlock_irqrestore(&up->port.lock, flags); } static int __init diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index 7fdfda105b8a..d58615f21c20 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -1763,9 +1763,9 @@ static void __usb_hcd_giveback_urb(struct urb *urb) * and no one may trigger the above deadlock situation when * running complete() in tasklet. */ - local_irq_save(flags); + local_irq_save_nort(flags); urb->complete(urb); - local_irq_restore(flags); + local_irq_restore_nort(flags); usb_anchor_resume_wakeups(anchor); atomic_dec(&urb->use_count); diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 367e6198a10a..73eb2f86e7d8 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -1590,7 +1590,7 @@ static void ffs_data_put(struct ffs_data *ffs) pr_info("%s(): freeing\n", __func__); ffs_data_clear(ffs); BUG_ON(waitqueue_active(&ffs->ev.waitq) || - waitqueue_active(&ffs->ep0req_completion.wait)); + swait_active(&ffs->ep0req_completion.wait)); kfree(ffs->dev_name); kfree(ffs); } diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c index 420fd2eedb71..86b23165f679 100644 --- a/drivers/usb/gadget/legacy/inode.c +++ b/drivers/usb/gadget/legacy/inode.c @@ -346,7 +346,7 @@ ep_io (struct ep_data *epdata, void *buf, unsigned len) spin_unlock_irq (&epdata->dev->lock); if (likely (value == 0)) { - value = wait_event_interruptible (done.wait, done.done); + value = swait_event_interruptible (done.wait, done.done); if (value != 0) { spin_lock_irq (&epdata->dev->lock); if (likely (epdata->ep != NULL)) { @@ -355,7 +355,7 @@ ep_io (struct ep_data *epdata, void *buf, unsigned len) usb_ep_dequeue (epdata->ep, epdata->req); spin_unlock_irq (&epdata->dev->lock); - wait_event (done.wait, done.done); + swait_event (done.wait, done.done); if (epdata->status == -ECONNRESET) epdata->status = -EINTR; } else { |