diff options
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/acpi/boot.c | 14 | ||||
-rw-r--r-- | arch/x86/kernel/acpi/cstate.c | 11 | ||||
-rw-r--r-- | arch/x86/kernel/apic/apic.c | 8 | ||||
-rw-r--r-- | arch/x86/kernel/apic/io_apic.c | 1 | ||||
-rw-r--r-- | arch/x86/kernel/apic/probe_64.c | 7 | ||||
-rw-r--r-- | arch/x86/kernel/apic/x2apic_uv_x.c | 9 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 1 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mtrr/main.c | 10 | ||||
-rw-r--r-- | arch/x86/kernel/e820.c | 10 | ||||
-rw-r--r-- | arch/x86/kernel/early-quirks.c | 21 | ||||
-rw-r--r-- | arch/x86/kernel/entry_64.S | 2 | ||||
-rw-r--r-- | arch/x86/kernel/head_32.S | 8 | ||||
-rw-r--r-- | arch/x86/kernel/pvclock.c | 5 | ||||
-rw-r--r-- | arch/x86/kernel/setup.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/smpboot.c | 112 | ||||
-rw-r--r-- | arch/x86/kernel/trampoline.c | 17 |
16 files changed, 177 insertions, 61 deletions
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index cd40aba6aa95..37311ba2b699 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -73,6 +73,7 @@ u8 acpi_sci_flags __initdata; int acpi_sci_override_gsi __initdata; int acpi_skip_timer_override __initdata; int acpi_use_timer_override __initdata; +int acpi_fix_pin2_polarity __initdata; #ifdef CONFIG_X86_LOCAL_APIC static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; @@ -363,10 +364,15 @@ acpi_parse_int_src_ovr(struct acpi_subtable_header * header, return 0; } - if (acpi_skip_timer_override && - intsrc->source_irq == 0 && intsrc->global_irq == 2) { - printk(PREFIX "BIOS IRQ0 pin2 override ignored.\n"); - return 0; + if (intsrc->source_irq == 0 && intsrc->global_irq == 2) { + if (acpi_skip_timer_override) { + printk(PREFIX "BIOS IRQ0 pin2 override ignored.\n"); + return 0; + } + if (acpi_fix_pin2_polarity && (intsrc->inti_flags & ACPI_MADT_POLARITY_MASK)) { + intsrc->inti_flags &= ~ACPI_MADT_POLARITY_MASK; + printk(PREFIX "BIOS IRQ0 pin2 override: forcing polarity to high active.\n"); + } } mp_override_legacy_irq(intsrc->source_irq, diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index fb7a5f052e2b..bcc4adda7b9b 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c @@ -13,6 +13,7 @@ #include <acpi/processor.h> #include <asm/acpi.h> +#include <asm/mwait.h> /* * Initialize bm_flags based on the CPU cache properties @@ -65,16 +66,6 @@ static struct cstate_entry *cpu_cstate_entry; /* per CPU ptr */ static short mwait_supported[ACPI_PROCESSOR_MAX_POWER]; -#define MWAIT_SUBSTATE_MASK (0xf) -#define MWAIT_CSTATE_MASK (0xf) -#define MWAIT_SUBSTATE_SIZE (4) - -#define CPUID_MWAIT_LEAF (5) -#define CPUID5_ECX_EXTENSIONS_SUPPORTED (0x1) -#define CPUID5_ECX_INTERRUPT_BREAK (0x2) - -#define MWAIT_ECX_INTERRUPT_BREAK (0x1) - #define NATIVE_CSTATE_BEYOND_HALT (2) static long acpi_processor_ffh_cstate_probe_cpu(void *_cx) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index bbbf9293d9b5..5ee1674505a8 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1343,6 +1343,14 @@ void __cpuinit end_local_APIC_setup(void) setup_apic_nmi_watchdog(NULL); apic_pm_activate(); + + /* + * Now that local APIC setup is completed for BP, configure the fault + * handling for interrupt remapping. + */ + if (!smp_processor_id() && intr_remapping_enabled) + enable_drhd_fault_handling(); + } #ifdef CONFIG_X86_X2APIC diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 38128cc318cf..4a809bf319d9 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -3642,6 +3642,7 @@ static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) msg.data |= MSI_DATA_VECTOR(cfg->vector); msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; msg.address_lo |= MSI_ADDR_DEST_ID(dest); + msg.address_hi = MSI_ADDR_BASE_HI | MSI_ADDR_EXT_DEST_ID(dest); dmar_msi_write(irq, &msg); diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c index 83e9be4778e2..fac49a845064 100644 --- a/arch/x86/kernel/apic/probe_64.c +++ b/arch/x86/kernel/apic/probe_64.c @@ -76,13 +76,6 @@ void __init default_setup_apic_routing(void) /* need to update phys_pkg_id */ apic->phys_pkg_id = apicid_phys_pkg_id; } - - /* - * Now that apic routing model is selected, configure the - * fault handling for intr remapping. - */ - if (intr_remapping_enabled) - enable_drhd_fault_handling(); } /* Same for both flat and physical. */ diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index c085d52dbaf2..25a1b3ccae9f 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -694,9 +694,11 @@ void __init uv_system_init(void) for (j = 0; j < 64; j++) { if (!test_bit(j, &present)) continue; - uv_blade_info[blade].pnode = (i * 64 + j); + pnode = (i * 64 + j); + uv_blade_info[blade].pnode = pnode; uv_blade_info[blade].nr_possible_cpus = 0; uv_blade_info[blade].nr_online_cpus = 0; + max_pnode = max(pnode, max_pnode); blade++; } } @@ -734,10 +736,6 @@ void __init uv_system_init(void) uv_cpu_hub_info(cpu)->scir.offset = uv_scir_offset(apicid); uv_node_to_blade[nid] = blade; uv_cpu_to_blade[cpu] = blade; - max_pnode = max(pnode, max_pnode); - - printk(KERN_DEBUG "UV: cpu %d, apicid 0x%x, pnode %d, nid %d, lcpu %d, blade %d\n", - cpu, apicid, pnode, nid, lcpu, blade); } /* Add blade/pnode info for nodes without cpus */ @@ -749,7 +747,6 @@ void __init uv_system_init(void) pnode = (paddr >> m_val) & pnode_mask; blade = boot_pnode_to_blade(pnode); uv_node_to_blade[nid] = blade; - max_pnode = max(pnode, max_pnode); } map_gru_high(max_pnode); diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 459168083b77..59ca19f8a670 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -744,6 +744,7 @@ static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy) per_cpu(acfreq_data, policy->cpu) = NULL; acpi_processor_unregister_performance(data->acpi_data, policy->cpu); + kfree(data->freq_table); kfree(data); } diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 79556bd9b602..e253288df914 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -763,13 +763,21 @@ void set_mtrr_aps_delayed_init(void) } /* - * MTRR initialization for all AP's + * Delayed MTRR initialization for all AP's */ void mtrr_aps_init(void) { if (!use_intel()) return; + /* + * Check if someone has requested the delay of AP MTRR initialization, + * by doing set_mtrr_aps_delayed_init(), prior to this point. If not, + * then we are done. + */ + if (!mtrr_aps_delayed_init) + return; + set_mtrr(~0U, 0, 0, 0); mtrr_aps_delayed_init = false; } diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 7bca3c6a02fb..41ac10ebb2fb 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -980,15 +980,21 @@ static int __init parse_memopt(char *p) if (!p) return -EINVAL; -#ifdef CONFIG_X86_32 if (!strcmp(p, "nopentium")) { +#ifdef CONFIG_X86_32 setup_clear_cpu_cap(X86_FEATURE_PSE); return 0; - } +#else + printk(KERN_WARNING "mem=nopentium ignored! (only supported on x86_32)\n"); + return -EINVAL; #endif + } userdef = 1; mem_size = memparse(p, &p); + /* don't remove all of memory when handling "mem={invalid}" param */ + if (mem_size == 0) + return -EINVAL; e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1); return 0; diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index ebdb85cf2686..f67a33c7f415 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -145,15 +145,10 @@ static void __init ati_bugs(int num, int slot, int func) static u32 __init ati_sbx00_rev(int num, int slot, int func) { - u32 old, d; + u32 d; - d = read_pci_config(num, slot, func, 0x70); - old = d; - d &= ~(1<<8); - write_pci_config(num, slot, func, 0x70, d); d = read_pci_config(num, slot, func, 0x8); d &= 0xff; - write_pci_config(num, slot, func, 0x70, old); return d; } @@ -162,11 +157,19 @@ static void __init ati_bugs_contd(int num, int slot, int func) { u32 d, rev; - if (acpi_use_timer_override) + rev = ati_sbx00_rev(num, slot, func); + if (rev >= 0x40) + acpi_fix_pin2_polarity = 1; + + /* + * SB600: revisions 0x11, 0x12, 0x13, 0x14, ... + * SB700: revisions 0x39, 0x3a, ... + * SB800: revisions 0x40, 0x41, ... + */ + if (rev >= 0x39) return; - rev = ati_sbx00_rev(num, slot, func); - if (rev > 0x13) + if (acpi_use_timer_override) return; /* check for IRQ0 interrupt swap */ diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index fa41066a8255..b1bd97538af4 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1338,7 +1338,7 @@ ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs) decl PER_CPU_VAR(irq_count) jmp error_exit CFI_ENDPROC -END(do_hypervisor_callback) +END(xen_do_hypervisor_callback) /* * Hypervisor uses this for application faults while it executes. diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 37c3d4b17d85..75e398199643 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -328,7 +328,7 @@ ENTRY(startup_32_smp) /* * Enable paging */ - movl $pa(swapper_pg_dir),%eax + movl pa(initial_page_table), %eax movl %eax,%cr3 /* set the page table pointer.. */ movl %cr0,%eax orl $X86_CR0_PG,%eax @@ -608,6 +608,8 @@ ignore_int: .align 4 ENTRY(initial_code) .long i386_start_kernel +ENTRY(initial_page_table) + .long pa(swapper_pg_dir) /* * BSS section @@ -623,6 +625,10 @@ ENTRY(swapper_pg_dir) #endif swapper_pg_fixmap: .fill 1024,4,0 +#ifdef CONFIG_X86_TRAMPOLINE +ENTRY(trampoline_pg_dir) + .fill 1024,4,0 +#endif ENTRY(empty_zero_page) .fill 4096,1,0 diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index dfdfe4662e05..b12fe8de6652 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c @@ -111,6 +111,11 @@ unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src) static atomic64_t last_value = ATOMIC64_INIT(0); +void pvclock_resume(void) +{ + atomic64_set(&last_value, 0); +} + cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) { struct pvclock_shadow_time shadow; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index b4ae4acbd031..6600cfde6b7c 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1008,6 +1008,8 @@ void __init setup_arch(char **cmdline_p) paging_init(); x86_init.paging.pagetable_setup_done(swapper_pg_dir); + setup_trampoline_page_table(); + tboot_probe(); #ifdef CONFIG_X86_64 diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 8931d05ee25f..9a4fdc5dd106 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -62,6 +62,7 @@ #include <asm/pgtable.h> #include <asm/tlbflush.h> #include <asm/mtrr.h> +#include <asm/mwait.h> #include <asm/vmi.h> #include <asm/apic.h> #include <asm/setup.h> @@ -73,7 +74,6 @@ #ifdef CONFIG_X86_32 u8 apicid_2_node[MAX_APICID]; -static int low_mappings; #endif /* State of each CPU */ @@ -300,6 +300,18 @@ notrace static void __cpuinit start_secondary(void *unused) * fragile that we want to limit the things done here to the * most necessary things. */ + +#ifdef CONFIG_X86_32 + /* + * Switch away from the trampoline page-table + * + * Do this before cpu_init() because it needs to access per-cpu + * data which may not be mapped in the trampoline page-table. + */ + load_cr3(swapper_pg_dir); + __flush_tlb_all(); +#endif + vmi_bringup(); cpu_init(); preempt_disable(); @@ -318,12 +330,6 @@ notrace static void __cpuinit start_secondary(void *unused) legacy_pic->chip->unmask(0); } -#ifdef CONFIG_X86_32 - while (low_mappings) - cpu_relax(); - __flush_tlb_all(); -#endif - /* This must be done before setting cpu_online_mask */ set_cpu_sibling_map(raw_smp_processor_id()); wmb(); @@ -773,6 +779,7 @@ do_rest: #ifdef CONFIG_X86_32 /* Stack for startup_32 can be just as for start_secondary onwards */ irq_ctx_init(cpu); + initial_page_table = __pa(&trampoline_pg_dir); #else clear_tsk_thread_flag(c_idle.idle, TIF_FORK); initial_gs = per_cpu_offset(cpu); @@ -913,20 +920,8 @@ int __cpuinit native_cpu_up(unsigned int cpu) per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; -#ifdef CONFIG_X86_32 - /* init low mem mapping */ - clone_pgd_range(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY, - min_t(unsigned long, KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY)); - flush_tlb_all(); - low_mappings = 1; - err = do_boot_cpu(apicid, cpu); - zap_low_mappings(false); - low_mappings = 0; -#else - err = do_boot_cpu(apicid, cpu); -#endif if (err) { pr_debug("do_boot_cpu failed %d\n", err); return -EIO; @@ -1366,11 +1361,88 @@ void play_dead_common(void) local_irq_disable(); } +/* + * We need to flush the caches before going to sleep, lest we have + * dirty data in our caches when we come back up. + */ +static inline void mwait_play_dead(void) +{ + unsigned int eax, ebx, ecx, edx; + unsigned int highest_cstate = 0; + unsigned int highest_subcstate = 0; + int i; + void *mwait_ptr; + + if (!cpu_has(¤t_cpu_data, X86_FEATURE_MWAIT)) + return; + if (!cpu_has(¤t_cpu_data, X86_FEATURE_CLFLSH)) + return; + if (current_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) + return; + + eax = CPUID_MWAIT_LEAF; + ecx = 0; + native_cpuid(&eax, &ebx, &ecx, &edx); + + /* + * eax will be 0 if EDX enumeration is not valid. + * Initialized below to cstate, sub_cstate value when EDX is valid. + */ + if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED)) { + eax = 0; + } else { + edx >>= MWAIT_SUBSTATE_SIZE; + for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) { + if (edx & MWAIT_SUBSTATE_MASK) { + highest_cstate = i; + highest_subcstate = edx & MWAIT_SUBSTATE_MASK; + } + } + eax = (highest_cstate << MWAIT_SUBSTATE_SIZE) | + (highest_subcstate - 1); + } + + /* + * This should be a memory location in a cache line which is + * unlikely to be touched by other processors. The actual + * content is immaterial as it is not actually modified in any way. + */ + mwait_ptr = ¤t_thread_info()->flags; + + wbinvd(); + + while (1) { + /* + * The CLFLUSH is a workaround for erratum AAI65 for + * the Xeon 7400 series. It's not clear it is actually + * needed, but it should be harmless in either case. + * The WBINVD is insufficient due to the spurious-wakeup + * case where we return around the loop. + */ + clflush(mwait_ptr); + __monitor(mwait_ptr, 0, 0); + mb(); + __mwait(eax, 0); + } +} + +static inline void hlt_play_dead(void) +{ + if (current_cpu_data.x86 >= 4) + wbinvd(); + + while (1) { + native_halt(); + } +} + void native_play_dead(void) { play_dead_common(); tboot_shutdown(TB_SHUTDOWN_WFS); - wbinvd_halt(); + + mwait_play_dead(); /* Only returns on failure */ + hlt_play_dead(); } #else /* ... !CONFIG_HOTPLUG_CPU */ diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c index c652ef62742d..e2a595257390 100644 --- a/arch/x86/kernel/trampoline.c +++ b/arch/x86/kernel/trampoline.c @@ -1,6 +1,7 @@ #include <linux/io.h> #include <asm/trampoline.h> +#include <asm/pgtable.h> #include <asm/e820.h> #if defined(CONFIG_X86_64) && defined(CONFIG_ACPI_SLEEP) @@ -37,3 +38,19 @@ unsigned long __trampinit setup_trampoline(void) memcpy(trampoline_base, trampoline_data, TRAMPOLINE_SIZE); return virt_to_phys(trampoline_base); } + +void __init setup_trampoline_page_table(void) +{ +#ifdef CONFIG_X86_32 + /* Copy kernel address range */ + clone_pgd_range(trampoline_pg_dir + KERNEL_PGD_BOUNDARY, + swapper_pg_dir + KERNEL_PGD_BOUNDARY, + KERNEL_PGD_PTRS); + + /* Initialize low mappings */ + clone_pgd_range(trampoline_pg_dir, + swapper_pg_dir + KERNEL_PGD_BOUNDARY, + min_t(unsigned long, KERNEL_PGD_PTRS, + KERNEL_PGD_BOUNDARY)); +#endif +} |