summaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig26
-rw-r--r--arch/x86/Kconfig.cpu3
-rw-r--r--arch/x86/boot/compressed/head_32.S3
-rw-r--r--arch/x86/boot/compressed/head_64.S3
-rw-r--r--arch/x86/boot/compressed/vmlinux.lds.S6
-rw-r--r--arch/x86/ia32/ia32entry.S36
-rw-r--r--arch/x86/include/asm/checksum_32.h3
-rw-r--r--arch/x86/include/asm/cmpxchg_32.h30
-rw-r--r--arch/x86/include/asm/kvm_host.h1
-rw-r--r--arch/x86/include/asm/mce.h2
-rw-r--r--arch/x86/include/asm/paravirt.h28
-rw-r--r--arch/x86/include/asm/paravirt_types.h10
-rw-r--r--arch/x86/include/asm/pgtable_types.h1
-rw-r--r--arch/x86/include/asm/topology.h10
-rw-r--r--arch/x86/kernel/acpi/cstate.c2
-rw-r--r--arch/x86/kernel/acpi/realmode/wakeup.lds.S4
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-inject.c7
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c84
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel.c1
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c67
-rw-r--r--arch/x86/kernel/cpu/mtrr/if.c17
-rw-r--r--arch/x86/kernel/cpu/perf_event.c3
-rw-r--r--arch/x86/kernel/e820.c4
-rw-r--r--arch/x86/kernel/early_printk.c10
-rw-r--r--arch/x86/kernel/head_32.S2
-rw-r--r--arch/x86/kernel/head_64.S2
-rw-r--r--arch/x86/kernel/i386_ksyms_32.c10
-rw-r--r--arch/x86/kernel/pci-dma.c4
-rw-r--r--arch/x86/kernel/pci-gart_64.c1
-rw-r--r--arch/x86/kernel/reboot.c1
-rw-r--r--arch/x86/kernel/time.c3
-rw-r--r--arch/x86/kernel/trampoline.c12
-rw-r--r--arch/x86/kernel/trampoline_64.S4
-rw-r--r--arch/x86/kernel/traps.c6
-rw-r--r--arch/x86/kernel/tsc_sync.c2
-rw-r--r--arch/x86/kernel/vmi_32.c2
-rw-r--r--arch/x86/kernel/vmlinux.lds.S96
-rw-r--r--arch/x86/kvm/lapic.c2
-rw-r--r--arch/x86/kvm/mmu.c84
-rw-r--r--arch/x86/kvm/paging_tmpl.h18
-rw-r--r--arch/x86/kvm/svm.c25
-rw-r--r--arch/x86/kvm/vmx.c2
-rw-r--r--arch/x86/kvm/x86.c2
-rw-r--r--arch/x86/lib/Makefile4
-rw-r--r--arch/x86/lib/cmpxchg8b_emu.S57
-rw-r--r--arch/x86/mm/Makefile3
-rw-r--r--arch/x86/mm/init.c63
-rw-r--r--arch/x86/mm/pat.c7
-rw-r--r--arch/x86/mm/setup_nx.c69
-rw-r--r--arch/x86/pci/i386.c2
-rw-r--r--arch/x86/xen/debugfs.c2
-rw-r--r--arch/x86/xen/enlighten.c10
52 files changed, 524 insertions, 332 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 93698794aa3a..07e01149e3bf 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -86,10 +86,6 @@ config STACKTRACE_SUPPORT
config HAVE_LATENCYTOP_SUPPORT
def_bool y
-config FAST_CMPXCHG_LOCAL
- bool
- default y
-
config MMU
def_bool y
@@ -432,6 +428,17 @@ config X86_NUMAQ
of Flat Logical. You will need a new lynxer.elf file to flash your
firmware with - send email to <Martin.Bligh@us.ibm.com>.
+config X86_SUPPORTS_MEMORY_FAILURE
+ bool
+ # MCE code calls memory_failure():
+ depends on X86_MCE
+ # On 32-bit this adds too big of NODES_SHIFT and we run out of page flags:
+ depends on !X86_NUMAQ
+ # On 32-bit SPARSEMEM adds too big of SECTIONS_WIDTH:
+ depends on X86_64 || !SPARSEMEM
+ select ARCH_SUPPORTS_MEMORY_FAILURE
+ default y
+
config X86_VISWS
bool "SGI 320/540 (Visual Workstation)"
depends on X86_32 && PCI && X86_MPPARSE && PCI_GODIRECT
@@ -484,7 +491,7 @@ if PARAVIRT_GUEST
source "arch/x86/xen/Kconfig"
config VMI
- bool "VMI Guest support"
+ bool "VMI Guest support (DEPRECATED)"
select PARAVIRT
depends on X86_32
---help---
@@ -493,6 +500,15 @@ config VMI
at the moment), by linking the kernel to a GPL-ed ROM module
provided by the hypervisor.
+ As of September 2009, VMware has started a phased retirement
+ of this feature from VMware's products. Please see
+ feature-removal-schedule.txt for details. If you are
+ planning to enable this option, please note that you cannot
+ live migrate a VMI enabled VM to a future VMware product,
+ which doesn't support VMI. So if you expect your kernel to
+ seamlessly migrate to newer VMware products, keep this
+ disabled.
+
config KVM_CLOCK
bool "KVM paravirtualized clock"
select PARAVIRT
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 527519b8a9f9..f2824fb8c79c 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -400,7 +400,7 @@ config X86_TSC
config X86_CMPXCHG64
def_bool y
- depends on X86_PAE || X86_64
+ depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MATOM
# this should be set for all -march=.. options where the compiler
# generates cmov.
@@ -412,6 +412,7 @@ config X86_MINIMUM_CPU_FAMILY
int
default "64" if X86_64
default "6" if X86_32 && X86_P6_NOP
+ default "5" if X86_32 && X86_CMPXCHG64
default "4" if X86_32 && (X86_XADD || X86_CMPXCHG || X86_BSWAP || X86_WP_WORKS_OK)
default "3"
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index 75e4f001e706..f543b70ffae2 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -23,13 +23,14 @@
*/
.text
+#include <linux/init.h>
#include <linux/linkage.h>
#include <asm/segment.h>
#include <asm/page_types.h>
#include <asm/boot.h>
#include <asm/asm-offsets.h>
- .section ".text.head","ax",@progbits
+ __HEAD
ENTRY(startup_32)
cld
/*
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index f62c284db9eb..077e1b69198e 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -24,6 +24,7 @@
.code32
.text
+#include <linux/init.h>
#include <linux/linkage.h>
#include <asm/segment.h>
#include <asm/pgtable_types.h>
@@ -33,7 +34,7 @@
#include <asm/processor-flags.h>
#include <asm/asm-offsets.h>
- .section ".text.head"
+ __HEAD
.code32
ENTRY(startup_32)
cld
diff --git a/arch/x86/boot/compressed/vmlinux.lds.S b/arch/x86/boot/compressed/vmlinux.lds.S
index cc353e1b3ffd..f4193bb48782 100644
--- a/arch/x86/boot/compressed/vmlinux.lds.S
+++ b/arch/x86/boot/compressed/vmlinux.lds.S
@@ -1,3 +1,5 @@
+#include <asm-generic/vmlinux.lds.h>
+
OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT)
#undef i386
@@ -18,9 +20,9 @@ SECTIONS
* address 0.
*/
. = 0;
- .text.head : {
+ .head.text : {
_head = . ;
- *(.text.head)
+ HEAD_TEXT
_ehead = . ;
}
.rodata.compressed : {
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 74619c4f9fda..1733f9f65e82 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -21,8 +21,8 @@
#define __AUDIT_ARCH_LE 0x40000000
#ifndef CONFIG_AUDITSYSCALL
-#define sysexit_audit int_ret_from_sys_call
-#define sysretl_audit int_ret_from_sys_call
+#define sysexit_audit ia32_ret_from_sys_call
+#define sysretl_audit ia32_ret_from_sys_call
#endif
#define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8)
@@ -39,12 +39,12 @@
.endm
/* clobbers %eax */
- .macro CLEAR_RREGS _r9=rax
+ .macro CLEAR_RREGS offset=0, _r9=rax
xorl %eax,%eax
- movq %rax,R11(%rsp)
- movq %rax,R10(%rsp)
- movq %\_r9,R9(%rsp)
- movq %rax,R8(%rsp)
+ movq %rax,\offset+R11(%rsp)
+ movq %rax,\offset+R10(%rsp)
+ movq %\_r9,\offset+R9(%rsp)
+ movq %rax,\offset+R8(%rsp)
.endm
/*
@@ -172,6 +172,10 @@ sysexit_from_sys_call:
movl RIP-R11(%rsp),%edx /* User %eip */
CFI_REGISTER rip,rdx
RESTORE_ARGS 1,24,1,1,1,1
+ xorq %r8,%r8
+ xorq %r9,%r9
+ xorq %r10,%r10
+ xorq %r11,%r11
popfq
CFI_ADJUST_CFA_OFFSET -8
/*CFI_RESTORE rflags*/
@@ -202,7 +206,7 @@ sysexit_from_sys_call:
.macro auditsys_exit exit,ebpsave=RBP
testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10)
- jnz int_ret_from_sys_call
+ jnz ia32_ret_from_sys_call
TRACE_IRQS_ON
sti
movl %eax,%esi /* second arg, syscall return value */
@@ -218,8 +222,9 @@ sysexit_from_sys_call:
cli
TRACE_IRQS_OFF
testl %edi,TI_flags(%r10)
- jnz int_with_check
- jmp \exit
+ jz \exit
+ CLEAR_RREGS -ARGOFFSET
+ jmp int_with_check
.endm
sysenter_auditsys:
@@ -329,6 +334,9 @@ sysretl_from_sys_call:
CFI_REGISTER rip,rcx
movl EFLAGS-ARGOFFSET(%rsp),%r11d
/*CFI_REGISTER rflags,r11*/
+ xorq %r10,%r10
+ xorq %r9,%r9
+ xorq %r8,%r8
TRACE_IRQS_ON
movl RSP-ARGOFFSET(%rsp),%esp
CFI_RESTORE rsp
@@ -353,7 +361,7 @@ cstar_tracesys:
#endif
xchgl %r9d,%ebp
SAVE_REST
- CLEAR_RREGS r9
+ CLEAR_RREGS 0, r9
movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
movq %rsp,%rdi /* &pt_regs -> arg1 */
call syscall_trace_enter
@@ -425,6 +433,8 @@ ia32_do_call:
call *ia32_sys_call_table(,%rax,8) # xxx: rip relative
ia32_sysret:
movq %rax,RAX-ARGOFFSET(%rsp)
+ia32_ret_from_sys_call:
+ CLEAR_RREGS -ARGOFFSET
jmp int_ret_from_sys_call
ia32_tracesys:
@@ -442,8 +452,8 @@ END(ia32_syscall)
ia32_badsys:
movq $0,ORIG_RAX-ARGOFFSET(%rsp)
- movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
- jmp int_ret_from_sys_call
+ movq $-ENOSYS,%rax
+ jmp ia32_sysret
quiet_ni_syscall:
movq $-ENOSYS,%rax
diff --git a/arch/x86/include/asm/checksum_32.h b/arch/x86/include/asm/checksum_32.h
index 7c5ef8b14d92..46fc474fd819 100644
--- a/arch/x86/include/asm/checksum_32.h
+++ b/arch/x86/include/asm/checksum_32.h
@@ -161,7 +161,8 @@ static inline __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
"adcl $0, %0 ;\n"
: "=&r" (sum)
: "r" (saddr), "r" (daddr),
- "r" (htonl(len)), "r" (htonl(proto)), "0" (sum));
+ "r" (htonl(len)), "r" (htonl(proto)), "0" (sum)
+ : "memory");
return csum_fold(sum);
}
diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h
index 82ceb788a981..ee1931be6593 100644
--- a/arch/x86/include/asm/cmpxchg_32.h
+++ b/arch/x86/include/asm/cmpxchg_32.h
@@ -312,19 +312,23 @@ static inline unsigned long cmpxchg_386(volatile void *ptr, unsigned long old,
extern unsigned long long cmpxchg_486_u64(volatile void *, u64, u64);
-#define cmpxchg64(ptr, o, n) \
-({ \
- __typeof__(*(ptr)) __ret; \
- if (likely(boot_cpu_data.x86 > 4)) \
- __ret = (__typeof__(*(ptr)))__cmpxchg64((ptr), \
- (unsigned long long)(o), \
- (unsigned long long)(n)); \
- else \
- __ret = (__typeof__(*(ptr)))cmpxchg_486_u64((ptr), \
- (unsigned long long)(o), \
- (unsigned long long)(n)); \
- __ret; \
-})
+#define cmpxchg64(ptr, o, n) \
+({ \
+ __typeof__(*(ptr)) __ret; \
+ __typeof__(*(ptr)) __old = (o); \
+ __typeof__(*(ptr)) __new = (n); \
+ alternative_io("call cmpxchg8b_emu", \
+ "lock; cmpxchg8b (%%esi)" , \
+ X86_FEATURE_CX8, \
+ "=A" (__ret), \
+ "S" ((ptr)), "0" (__old), \
+ "b" ((unsigned int)__new), \
+ "c" ((unsigned int)(__new>>32)) \
+ : "memory"); \
+ __ret; })
+
+
+
#define cmpxchg64_local(ptr, o, n) \
({ \
__typeof__(*(ptr)) __ret; \
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 3be000435fad..d83892226f73 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -796,6 +796,7 @@ asmlinkage void kvm_handle_fault_on_reboot(void);
#define KVM_ARCH_WANT_MMU_NOTIFIER
int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
int kvm_age_hva(struct kvm *kvm, unsigned long hva);
+void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
int cpuid_maxphyaddr(struct kvm_vcpu *vcpu);
int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index b608a64c5814..f1363b72364f 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -133,6 +133,8 @@ static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {}
static inline void enable_p5_mce(void) {}
#endif
+extern void (*x86_mce_decode_callback)(struct mce *m);
+
void mce_setup(struct mce *m);
void mce_log(struct mce *m);
DECLARE_PER_CPU(struct sys_device, mce_dev);
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 8aebcc41041d..efb38994859c 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -840,42 +840,22 @@ static __always_inline void __raw_spin_unlock(struct raw_spinlock *lock)
static inline unsigned long __raw_local_save_flags(void)
{
- unsigned long f;
-
- asm volatile(paravirt_alt(PARAVIRT_CALL)
- : "=a"(f)
- : paravirt_type(pv_irq_ops.save_fl),
- paravirt_clobber(CLBR_EAX)
- : "memory", "cc");
- return f;
+ return PVOP_CALLEE0(unsigned long, pv_irq_ops.save_fl);
}
static inline void raw_local_irq_restore(unsigned long f)
{
- asm volatile(paravirt_alt(PARAVIRT_CALL)
- : "=a"(f)
- : PV_FLAGS_ARG(f),
- paravirt_type(pv_irq_ops.restore_fl),
- paravirt_clobber(CLBR_EAX)
- : "memory", "cc");
+ PVOP_VCALLEE1(pv_irq_ops.restore_fl, f);
}
static inline void raw_local_irq_disable(void)
{
- asm volatile(paravirt_alt(PARAVIRT_CALL)
- :
- : paravirt_type(pv_irq_ops.irq_disable),
- paravirt_clobber(CLBR_EAX)
- : "memory", "eax", "cc");
+ PVOP_VCALLEE0(pv_irq_ops.irq_disable);
}
static inline void raw_local_irq_enable(void)
{
- asm volatile(paravirt_alt(PARAVIRT_CALL)
- :
- : paravirt_type(pv_irq_ops.irq_enable),
- paravirt_clobber(CLBR_EAX)
- : "memory", "eax", "cc");
+ PVOP_VCALLEE0(pv_irq_ops.irq_enable);
}
static inline unsigned long __raw_local_irq_save(void)
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index dd0f5b32489d..9357473c8da0 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -494,10 +494,11 @@ int paravirt_disable_iospace(void);
#define EXTRA_CLOBBERS
#define VEXTRA_CLOBBERS
#else /* CONFIG_X86_64 */
+/* [re]ax isn't an arg, but the return val */
#define PVOP_VCALL_ARGS \
unsigned long __edi = __edi, __esi = __esi, \
- __edx = __edx, __ecx = __ecx
-#define PVOP_CALL_ARGS PVOP_VCALL_ARGS, __eax
+ __edx = __edx, __ecx = __ecx, __eax = __eax
+#define PVOP_CALL_ARGS PVOP_VCALL_ARGS
#define PVOP_CALL_ARG1(x) "D" ((unsigned long)(x))
#define PVOP_CALL_ARG2(x) "S" ((unsigned long)(x))
@@ -509,6 +510,7 @@ int paravirt_disable_iospace(void);
"=c" (__ecx)
#define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS, "=a" (__eax)
+/* void functions are still allowed [re]ax for scratch */
#define PVOP_VCALLEE_CLOBBERS "=a" (__eax)
#define PVOP_CALLEE_CLOBBERS PVOP_VCALLEE_CLOBBERS
@@ -583,8 +585,8 @@ int paravirt_disable_iospace(void);
VEXTRA_CLOBBERS, \
pre, post, ##__VA_ARGS__)
-#define __PVOP_VCALLEESAVE(rettype, op, pre, post, ...) \
- ____PVOP_CALL(rettype, op.func, CLBR_RET_REG, \
+#define __PVOP_VCALLEESAVE(op, pre, post, ...) \
+ ____PVOP_VCALL(op.func, CLBR_RET_REG, \
PVOP_VCALLEE_CLOBBERS, , \
pre, post, ##__VA_ARGS__)
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 7b467bf3c680..d1f4a760be23 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -277,6 +277,7 @@ static inline pteval_t pte_flags(pte_t pte)
typedef struct page *pgtable_t;
extern pteval_t __supported_pte_mask;
+extern void set_nx(void);
extern int nx_enabled;
#define pgprot_writecombine pgprot_writecombine
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 6f0695d744bf..25a92842dd99 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -165,21 +165,11 @@ static inline int numa_node_id(void)
return 0;
}
-static inline int cpu_to_node(int cpu)
-{
- return 0;
-}
-
static inline int early_cpu_to_node(int cpu)
{
return 0;
}
-static inline const struct cpumask *cpumask_of_node(int node)
-{
- return cpu_online_mask;
-}
-
static inline void setup_node_to_cpumask_map(void) { }
#endif
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index 8c44c232efcb..59cdfa4686b2 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -48,7 +48,7 @@ void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags,
* P4, Core and beyond CPUs
*/
if (c->x86_vendor == X86_VENDOR_INTEL &&
- (c->x86 > 0x6 || (c->x86 == 6 && c->x86_model >= 14)))
+ (c->x86 > 0xf || (c->x86 == 6 && c->x86_model >= 14)))
flags->bm_control = 0;
}
EXPORT_SYMBOL(acpi_processor_power_init_bm_check);
diff --git a/arch/x86/kernel/acpi/realmode/wakeup.lds.S b/arch/x86/kernel/acpi/realmode/wakeup.lds.S
index 7da00b799cda..0e50e1e5c573 100644
--- a/arch/x86/kernel/acpi/realmode/wakeup.lds.S
+++ b/arch/x86/kernel/acpi/realmode/wakeup.lds.S
@@ -56,6 +56,6 @@ SECTIONS
/DISCARD/ : {
*(.note*)
}
-
- . = ASSERT(_end <= WAKEUP_SIZE, "Wakeup too big!");
}
+
+ASSERT(_end <= WAKEUP_SIZE, "Wakeup too big!");
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index 7029f0e2acad..472763d92098 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -98,8 +98,9 @@ static struct notifier_block mce_raise_nb = {
};
/* Inject mce on current CPU */
-static int raise_local(struct mce *m)
+static int raise_local(void)
{
+ struct mce *m = &__get_cpu_var(injectm);
int context = MCJ_CTX(m->inject_flags);
int ret = 0;
int cpu = m->extcpu;
@@ -167,12 +168,12 @@ static void raise_mce(struct mce *m)
}
cpu_relax();
}
- raise_local(m);
+ raise_local();
put_cpu();
put_online_cpus();
} else
#endif
- raise_local(m);
+ raise_local();
}
/* Error injection interface */
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 2f5aab26320e..b1598a9436d0 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -85,6 +85,18 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_wait);
static DEFINE_PER_CPU(struct mce, mces_seen);
static int cpu_missing;
+static void default_decode_mce(struct mce *m)
+{
+ pr_emerg("No human readable MCE decoding support on this CPU type.\n");
+ pr_emerg("Run the message through 'mcelog --ascii' to decode.\n");
+}
+
+/*
+ * CPU/chipset specific EDAC code can register a callback here to print
+ * MCE errors in a human-readable form:
+ */
+void (*x86_mce_decode_callback)(struct mce *m) = default_decode_mce;
+EXPORT_SYMBOL(x86_mce_decode_callback);
/* MCA banks polled by the period polling timer for corrected events */
DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
@@ -165,49 +177,46 @@ void mce_log(struct mce *mce)
set_bit(0, &mce_need_notify);
}
-void __weak decode_mce(struct mce *m)
-{
- return;
-}
-
static void print_mce(struct mce *m)
{
- printk(KERN_EMERG
- "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n",
+ pr_emerg("CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n",
m->extcpu, m->mcgstatus, m->bank, m->status);
+
if (m->ip) {
- printk(KERN_EMERG "RIP%s %02x:<%016Lx> ",
- !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "",
- m->cs, m->ip);
+ pr_emerg("RIP%s %02x:<%016Lx> ",
+ !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "",
+ m->cs, m->ip);
+
if (m->cs == __KERNEL_CS)
print_symbol("{%s}", m->ip);
- printk(KERN_CONT "\n");
+ pr_cont("\n");
}
- printk(KERN_EMERG "TSC %llx ", m->tsc);
+
+ pr_emerg("TSC %llx ", m->tsc);
if (m->addr)
- printk(KERN_CONT "ADDR %llx ", m->addr);
+ pr_cont("ADDR %llx ", m->addr);
if (m->misc)
- printk(KERN_CONT "MISC %llx ", m->misc);
- printk(KERN_CONT "\n");
- printk(KERN_EMERG "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n",
- m->cpuvendor, m->cpuid, m->time, m->socketid,
- m->apicid);
+ pr_cont("MISC %llx ", m->misc);
+
+ pr_cont("\n");
+ pr_emerg("PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n",
+ m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid);
- decode_mce(m);
+ /*
+ * Print out human-readable details about the MCE error,
+ * (if the CPU has an implementation for that):
+ */
+ x86_mce_decode_callback(m);
}
static void print_mce_head(void)
{
- printk(KERN_EMERG "\nHARDWARE ERROR\n");
+ pr_emerg("\nHARDWARE ERROR\n");
}
static void print_mce_tail(void)
{
- printk(KERN_EMERG "This is not a software problem!\n"
-#if (!defined(CONFIG_EDAC) || !defined(CONFIG_CPU_SUP_AMD))
- "Run through mcelog --ascii to decode and contact your hardware vendor\n"
-#endif
- );
+ pr_emerg("This is not a software problem!\n");
}
#define PANIC_TIMEOUT 5 /* 5 seconds */
@@ -221,6 +230,7 @@ static atomic_t mce_fake_paniced;
static void wait_for_panic(void)
{
long timeout = PANIC_TIMEOUT*USEC_PER_SEC;
+
preempt_disable();
local_irq_enable();
while (timeout-- > 0)
@@ -288,6 +298,7 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
static int msr_to_offset(u32 msr)
{
unsigned bank = __get_cpu_var(injectm.bank);
+
if (msr == rip_msr)
return offsetof(struct mce, ip);
if (msr == MSR_IA32_MCx_STATUS(bank))
@@ -305,13 +316,25 @@ static int msr_to_offset(u32 msr)
static u64 mce_rdmsrl(u32 msr)
{
u64 v;
+
if (__get_cpu_var(injectm).finished) {
int offset = msr_to_offset(msr);
+
if (offset < 0)
return 0;
return *(u64 *)((char *)&__get_cpu_var(injectm) + offset);
}
- rdmsrl(msr, v);
+
+ if (rdmsrl_safe(msr, &v)) {
+ WARN_ONCE(1, "mce: Unable to read msr %d!\n", msr);
+ /*
+ * Return zero in case the access faulted. This should
+ * not happen normally but can happen if the CPU does
+ * something weird, or if the code is buggy.
+ */
+ v = 0;
+ }
+
return v;
}
@@ -319,6 +342,7 @@ static void mce_wrmsrl(u32 msr, u64 v)
{
if (__get_cpu_var(injectm).finished) {
int offset = msr_to_offset(msr);
+
if (offset >= 0)
*(u64 *)((char *)&__get_cpu_var(injectm) + offset) = v;
return;
@@ -415,7 +439,7 @@ static inline void mce_get_rip(struct mce *m, struct pt_regs *regs)
m->ip = mce_rdmsrl(rip_msr);
}
-#ifdef CONFIG_X86_LOCAL_APIC
+#ifdef CONFIG_X86_LOCAL_APIC
/*
* Called after interrupts have been reenabled again
* when a MCE happened during an interrupts off region
@@ -1172,6 +1196,7 @@ static int mce_banks_init(void)
return -ENOMEM;
for (i = 0; i < banks; i++) {
struct mce_bank *b = &mce_banks[i];
+
b->ctl = -1ULL;
b->init = 1;
}
@@ -1203,6 +1228,7 @@ static int __cpuinit mce_cap_init(void)
banks = b;
if (!mce_banks) {
int err = mce_banks_init();
+
if (err)
return err;
}
@@ -1237,6 +1263,7 @@ static void mce_init(void)
for (i = 0; i < banks; i++) {
struct mce_bank *b = &mce_banks[i];
+
if (!b->init)
continue;
wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl);
@@ -1626,6 +1653,7 @@ static int mce_disable(void)
for (i = 0; i < banks; i++) {
struct mce_bank *b = &mce_banks[i];
+
if (b->init)
wrmsrl(MSR_IA32_MCx_CTL(i), 0);
}
@@ -1911,6 +1939,7 @@ static void mce_disable_cpu(void *h)
cmci_clear();
for (i = 0; i < banks; i++) {
struct mce_bank *b = &mce_banks[i];
+
if (b->init)
wrmsrl(MSR_IA32_MCx_CTL(i), 0);
}
@@ -1928,6 +1957,7 @@ static void mce_reenable_cpu(void *h)
cmci_reenable();
for (i = 0; i < banks; i++) {
struct mce_bank *b = &mce_banks[i];
+
if (b->init)
wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl);
}
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index 889f665fe93d..7c785634af2b 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -8,6 +8,7 @@
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/percpu.h>
+#include <linux/sched.h>
#include <asm/apic.h>
#include <asm/processor.h>
#include <asm/msr.h>
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 63a56d147e4a..b3a1dba75330 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -34,20 +34,31 @@
/* How long to wait between reporting thermal events */
#define CHECK_INTERVAL (300 * HZ)
-static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES;
-static DEFINE_PER_CPU(unsigned long, thermal_throttle_count);
-static DEFINE_PER_CPU(bool, thermal_throttle_active);
+/*
+ * Current thermal throttling state:
+ */
+struct thermal_state {
+ bool is_throttled;
+
+ u64 next_check;
+ unsigned long throttle_count;
+ unsigned long last_throttle_count;
+};
+
+static DEFINE_PER_CPU(struct thermal_state, thermal_state);
-static atomic_t therm_throt_en = ATOMIC_INIT(0);
+static atomic_t therm_throt_en = ATOMIC_INIT(0);
#ifdef CONFIG_SYSFS
#define define_therm_throt_sysdev_one_ro(_name) \
static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL)
#define define_therm_throt_sysdev_show_func(name) \
-static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \
- struct sysdev_attribute *attr, \
- char *buf) \
+ \
+static ssize_t therm_throt_sysdev_show_##name( \
+ struct sys_device *dev, \
+ struct sysdev_attribute *attr, \
+ char *buf) \
{ \
unsigned int cpu = dev->id; \
ssize_t ret; \
@@ -55,7 +66,7 @@ static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \
preempt_disable(); /* CPU hotplug */ \
if (cpu_online(cpu)) \
ret = sprintf(buf, "%lu\n", \
- per_cpu(thermal_throttle_##name, cpu)); \
+ per_cpu(thermal_state, cpu).name); \
else \
ret = 0; \
preempt_enable(); \
@@ -63,11 +74,11 @@ static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \
return ret; \
}
-define_therm_throt_sysdev_show_func(count);
-define_therm_throt_sysdev_one_ro(count);
+define_therm_throt_sysdev_show_func(throttle_count);
+define_therm_throt_sysdev_one_ro(throttle_count);
static struct attribute *thermal_throttle_attrs[] = {
- &attr_count.attr,
+ &attr_throttle_count.attr,
NULL
};
@@ -93,33 +104,39 @@ static struct attribute_group thermal_throttle_attr_group = {
* 1 : Event should be logged further, and a message has been
* printed to the syslog.
*/
-static int therm_throt_process(int curr)
+static int therm_throt_process(bool is_throttled)
{
- unsigned int cpu = smp_processor_id();
- __u64 tmp_jiffs = get_jiffies_64();
- bool was_throttled = __get_cpu_var(thermal_throttle_active);
- bool is_throttled = __get_cpu_var(thermal_throttle_active) = curr;
+ struct thermal_state *state;
+ unsigned int this_cpu;
+ bool was_throttled;
+ u64 now;
+
+ this_cpu = smp_processor_id();
+ now = get_jiffies_64();
+ state = &per_cpu(thermal_state, this_cpu);
+
+ was_throttled = state->is_throttled;
+ state->is_throttled = is_throttled;
if (is_throttled)
- __get_cpu_var(thermal_throttle_count)++;
+ state->throttle_count++;
- if (!(was_throttled ^ is_throttled) &&
- time_before64(tmp_jiffs, __get_cpu_var(next_check)))
+ if (time_before64(now, state->next_check) &&
+ state->throttle_count != state->last_throttle_count)
return 0;
- __get_cpu_var(next_check) = tmp_jiffs + CHECK_INTERVAL;
+ state->next_check = now + CHECK_INTERVAL;
+ state->last_throttle_count = state->throttle_count;
/* if we just entered the thermal event */
if (is_throttled) {
- printk(KERN_CRIT "CPU%d: Temperature above threshold, "
- "cpu clock throttled (total events = %lu)\n",
- cpu, __get_cpu_var(thermal_throttle_count));
+ printk(KERN_CRIT "CPU%d: Temperature above threshold, cpu clock throttled (total events = %lu)\n", this_cpu, state->throttle_count);
add_taint(TAINT_MACHINE_CHECK);
return 1;
}
if (was_throttled) {
- printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu);
+ printk(KERN_INFO "CPU%d: Temperature/speed normal\n", this_cpu);
return 1;
}
@@ -213,7 +230,7 @@ static void intel_thermal_interrupt(void)
__u64 msr_val;
rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
- if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT))
+ if (therm_throt_process((msr_val & THERM_STATUS_PROCHOT) != 0))
mce_log_therm_throt_event(msr_val);
}
diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c
index f04e72527604..3c1b12d461d1 100644
--- a/arch/x86/kernel/cpu/mtrr/if.c
+++ b/arch/x86/kernel/cpu/mtrr/if.c
@@ -96,17 +96,24 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos)
unsigned long long base, size;
char *ptr;
char line[LINE_SIZE];
+ int length;
size_t linelen;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- if (!len)
- return -EINVAL;
memset(line, 0, LINE_SIZE);
- if (len > LINE_SIZE)
- len = LINE_SIZE;
- if (copy_from_user(line, buf, len - 1))
+
+ length = len;
+ length--;
+
+ if (length > LINE_SIZE - 1)
+ length = LINE_SIZE - 1;
+
+ if (length < 0)
+ return -EINVAL;
+
+ if (copy_from_user(line, buf, length))
return -EFAULT;
linelen = strlen(line);
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index a3c7adb06b78..b5801c311846 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1790,6 +1790,9 @@ void smp_perf_pending_interrupt(struct pt_regs *regs)
void set_perf_event_pending(void)
{
#ifdef CONFIG_X86_LOCAL_APIC
+ if (!x86_pmu.apic || !x86_pmu_initialized())
+ return;
+
apic->send_IPI_self(LOCAL_PENDING_VECTOR);
#endif
}
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 85419bb7d4ab..d17d482a04f4 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -1378,8 +1378,8 @@ static unsigned long ram_alignment(resource_size_t pos)
if (mb < 16)
return 1024*1024;
- /* To 32MB for anything above that */
- return 32*1024*1024;
+ /* To 64MB for anything above that */
+ return 64*1024*1024;
}
#define MAX_RESOURCE_SIZE ((resource_size_t)-1)
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index 2acfd3fdc0cc..b9c830c12b4a 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -178,6 +178,11 @@ asmlinkage void early_printk(const char *fmt, ...)
static inline void early_console_register(struct console *con, int keep_early)
{
+ if (early_console->index != -1) {
+ printk(KERN_CRIT "ERROR: earlyprintk= %s already used\n",
+ con->name);
+ return;
+ }
early_console = con;
if (keep_early)
early_console->flags &= ~CON_BOOT;
@@ -201,8 +206,11 @@ static int __init setup_early_printk(char *buf)
while (*buf != '\0') {
if (!strncmp(buf, "serial", 6)) {
- early_serial_init(buf + 6);
+ buf += 6;
+ early_serial_init(buf);
early_console_register(&early_serial_console, keep);
+ if (!strncmp(buf, ",ttyS", 5))
+ buf += 5;
}
if (!strncmp(buf, "ttyS", 4)) {
early_serial_init(buf + 4);
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 218aad7ee76e..050c278481b1 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -79,7 +79,7 @@ RESERVE_BRK(pagetables, INIT_MAP_SIZE)
* any particular GDT layout, because we load our own as soon as we
* can.
*/
-.section .text.head,"ax",@progbits
+__HEAD
ENTRY(startup_32)
/* test KEEP_SEGMENTS flag to see if the bootloader is asking
us to not reload segments */
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index d0bc0a13a437..780cd928fcd5 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -40,7 +40,7 @@ L4_START_KERNEL = pgd_index(__START_KERNEL_map)
L3_START_KERNEL = pud_index(__START_KERNEL_map)
.text
- .section .text.head
+ __HEAD
.code64
.globl startup_64
startup_64:
diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c
index 43cec6bdda63..9c3bd4a2050e 100644
--- a/arch/x86/kernel/i386_ksyms_32.c
+++ b/arch/x86/kernel/i386_ksyms_32.c
@@ -10,6 +10,16 @@
EXPORT_SYMBOL(mcount);
#endif
+/*
+ * Note, this is a prototype to get at the symbol for
+ * the export, but dont use it from C code, it is used
+ * by assembly code and is not using C calling convention!
+ */
+#ifndef CONFIG_X86_CMPXCHG64
+extern void cmpxchg8b_emu(void);
+EXPORT_SYMBOL(cmpxchg8b_emu);
+#endif
+
/* Networking helper routines. */
EXPORT_SYMBOL(csum_partial_copy_generic);
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 64b838eac18c..b2a71dca5642 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -35,7 +35,7 @@ int iommu_detected __read_mostly = 0;
/*
* This variable becomes 1 if iommu=pt is passed on the kernel command line.
- * If this variable is 1, IOMMU implementations do no DMA ranslation for
+ * If this variable is 1, IOMMU implementations do no DMA translation for
* devices and allow every device to access to whole physical memory. This is
* useful if a user want to use an IOMMU only for KVM device assignment to
* guests and not for driver dma translation.
@@ -311,7 +311,7 @@ void pci_iommu_shutdown(void)
amd_iommu_shutdown();
}
/* Must execute after PCI subsystem */
-fs_initcall(pci_iommu_init);
+rootfs_initcall(pci_iommu_init);
#ifdef CONFIG_PCI
/* Many VIA bridges seem to corrupt data for DAC. Disable it here */
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index 98a827ee9ed7..a7f1b64f86e0 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -16,6 +16,7 @@
#include <linux/agp_backend.h>
#include <linux/init.h>
#include <linux/mm.h>
+#include <linux/sched.h>
#include <linux/string.h>
#include <linux/spinlock.h>
#include <linux/pci.h>
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 27349f92a6d7..a1a3cdda06e1 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -4,6 +4,7 @@
#include <linux/pm.h>
#include <linux/efi.h>
#include <linux/dmi.h>
+#include <linux/sched.h>
#include <linux/tboot.h>
#include <acpi/reboot.h>
#include <asm/io.h>
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index dcb00d278512..be2573448ed9 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -38,7 +38,8 @@ unsigned long profile_pc(struct pt_regs *regs)
#ifdef CONFIG_FRAME_POINTER
return *(unsigned long *)(regs->bp + sizeof(long));
#else
- unsigned long *sp = (unsigned long *)regs->sp;
+ unsigned long *sp =
+ (unsigned long *)kernel_stack_pointer(regs);
/*
* Return address is either directly at stack pointer
* or above a saved flags. Eflags has bits 22-31 zero,
diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c
index 699f7eeb896a..cd022121cab6 100644
--- a/arch/x86/kernel/trampoline.c
+++ b/arch/x86/kernel/trampoline.c
@@ -3,8 +3,16 @@
#include <asm/trampoline.h>
#include <asm/e820.h>
+#if defined(CONFIG_X86_64) && defined(CONFIG_ACPI_SLEEP)
+#define __trampinit
+#define __trampinitdata
+#else
+#define __trampinit __cpuinit
+#define __trampinitdata __cpuinitdata
+#endif
+
/* ready for x86_64 and x86 */
-unsigned char *__cpuinitdata trampoline_base = __va(TRAMPOLINE_BASE);
+unsigned char *__trampinitdata trampoline_base = __va(TRAMPOLINE_BASE);
void __init reserve_trampoline_memory(void)
{
@@ -26,7 +34,7 @@ void __init reserve_trampoline_memory(void)
* bootstrap into the page concerned. The caller
* has made sure it's suitably aligned.
*/
-unsigned long __cpuinit setup_trampoline(void)
+unsigned long __trampinit setup_trampoline(void)
{
memcpy(trampoline_base, trampoline_data, TRAMPOLINE_SIZE);
return virt_to_phys(trampoline_base);
diff --git a/arch/x86/kernel/trampoline_64.S b/arch/x86/kernel/trampoline_64.S
index 596d54c660a5..3af2dff58b21 100644
--- a/arch/x86/kernel/trampoline_64.S
+++ b/arch/x86/kernel/trampoline_64.S
@@ -32,8 +32,12 @@
#include <asm/segment.h>
#include <asm/processor-flags.h>
+#ifdef CONFIG_ACPI_SLEEP
+.section .rodata, "a", @progbits
+#else
/* We can free up the trampoline after bootup if cpu hotplug is not supported. */
__CPUINITRODATA
+#endif
.code16
ENTRY(trampoline_data)
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index a665c71352b8..7e37dcee0cc3 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -72,11 +72,9 @@ char ignore_fpu_irq;
/*
* The IDT has to be page-aligned to simplify the Pentium
- * F0 0F bug workaround.. We have a special link segment
- * for this.
+ * F0 0F bug workaround.
*/
-gate_desc idt_table[NR_VECTORS]
- __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, };
+gate_desc idt_table[NR_VECTORS] __page_aligned_data = { { { { 0, 0 } } }, };
#endif
DECLARE_BITMAP(used_vectors, NR_VECTORS);
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index 027b5b498993..f37930954d15 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -114,7 +114,7 @@ void __cpuinit check_tsc_sync_source(int cpu)
return;
if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) {
- pr_info("Skipping synchronization checks as TSC is reliable.\n");
+ printk_once(KERN_INFO "Skipping synchronization checks as TSC is reliable.\n");
return;
}
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 31e6f6cfe53e..d430e4c30193 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -648,7 +648,7 @@ static inline int __init activate_vmi(void)
pv_info.paravirt_enabled = 1;
pv_info.kernel_rpl = kernel_cs & SEGMENT_RPL_MASK;
- pv_info.name = "vmi";
+ pv_info.name = "vmi [deprecated]";
pv_init_ops.patch = vmi_patch;
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index a46acccec38a..8d6001ad8d8d 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -65,17 +65,11 @@ SECTIONS
#endif
/* Text and read-only data */
-
- /* bootstrapping code */
- .text.head : AT(ADDR(.text.head) - LOAD_OFFSET) {
- _text = .;
- *(.text.head)
- } :text = 0x9090
-
- /* The rest of the text */
.text : AT(ADDR(.text) - LOAD_OFFSET) {
+ _text = .;
+ /* bootstrapping code */
+ HEAD_TEXT
#ifdef CONFIG_X86_32
- /* not really needed, already page aligned */
. = ALIGN(PAGE_SIZE);
*(.text.page_aligned)
#endif
@@ -94,13 +88,7 @@ SECTIONS
NOTES :text :note
- /* Exception table */
- . = ALIGN(16);
- __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) {
- __start___ex_table = .;
- *(__ex_table)
- __stop___ex_table = .;
- } :text = 0x9090
+ EXCEPTION_TABLE(16) :text = 0x9090
RO_DATA(PAGE_SIZE)
@@ -118,7 +106,6 @@ SECTIONS
#endif
PAGE_ALIGNED_DATA(PAGE_SIZE)
- *(.data.idt)
CACHELINE_ALIGNED_DATA(CONFIG_X86_L1_CACHE_BYTES)
@@ -135,24 +122,21 @@ SECTIONS
#ifdef CONFIG_X86_64
#define VSYSCALL_ADDR (-10*1024*1024)
-#define VSYSCALL_PHYS_ADDR ((LOADADDR(.data) + SIZEOF(.data) + \
- PAGE_SIZE - 1) & ~(PAGE_SIZE - 1))
-#define VSYSCALL_VIRT_ADDR ((ADDR(.data) + SIZEOF(.data) + \
- PAGE_SIZE - 1) & ~(PAGE_SIZE - 1))
-#define VLOAD_OFFSET (VSYSCALL_ADDR - VSYSCALL_PHYS_ADDR)
+#define VLOAD_OFFSET (VSYSCALL_ADDR - __vsyscall_0 + LOAD_OFFSET)
#define VLOAD(x) (ADDR(x) - VLOAD_OFFSET)
-#define VVIRT_OFFSET (VSYSCALL_ADDR - VSYSCALL_VIRT_ADDR)
+#define VVIRT_OFFSET (VSYSCALL_ADDR - __vsyscall_0)
#define VVIRT(x) (ADDR(x) - VVIRT_OFFSET)
+ . = ALIGN(4096);
+ __vsyscall_0 = .;
+
. = VSYSCALL_ADDR;
- .vsyscall_0 : AT(VSYSCALL_PHYS_ADDR) {
+ .vsyscall_0 : AT(VLOAD(.vsyscall_0)) {
*(.vsyscall_0)
} :user
- __vsyscall_0 = VSYSCALL_VIRT_ADDR;
-
. = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
.vsyscall_fn : AT(VLOAD(.vsyscall_fn)) {
*(.vsyscall_fn)
@@ -192,11 +176,9 @@ SECTIONS
*(.vsyscall_3)
}
- . = VSYSCALL_VIRT_ADDR + PAGE_SIZE;
+ . = __vsyscall_0 + PAGE_SIZE;
#undef VSYSCALL_ADDR
-#undef VSYSCALL_PHYS_ADDR
-#undef VSYSCALL_VIRT_ADDR
#undef VLOAD_OFFSET
#undef VLOAD
#undef VVIRT_OFFSET
@@ -219,36 +201,12 @@ SECTIONS
PERCPU_VADDR(0, :percpu)
#endif
- .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
- _sinittext = .;
- INIT_TEXT
- _einittext = .;
- }
+ INIT_TEXT_SECTION(PAGE_SIZE)
#ifdef CONFIG_X86_64
:init
#endif
- .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) {
- INIT_DATA
- }
-
- . = ALIGN(16);
- .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) {
- __setup_start = .;
- *(.init.setup)
- __setup_end = .;
- }
- .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) {
- __initcall_start = .;
- INITCALLS
- __initcall_end = .;
- }
-
- .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) {
- __con_initcall_start = .;
- *(.con_initcall.init)
- __con_initcall_end = .;
- }
+ INIT_DATA_SECTION(16)
.x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) {
__x86_cpu_dev_start = .;
@@ -256,8 +214,6 @@ SECTIONS
__x86_cpu_dev_end = .;
}
- SECURITY_INIT
-
. = ALIGN(8);
.parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) {
__parainstructions = .;
@@ -288,15 +244,6 @@ SECTIONS
EXIT_DATA
}
-#ifdef CONFIG_BLK_DEV_INITRD
- . = ALIGN(PAGE_SIZE);
- .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) {
- __initramfs_start = .;
- *(.init.ramfs)
- __initramfs_end = .;
- }
-#endif
-
#if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP)
PERCPU(PAGE_SIZE)
#endif
@@ -358,8 +305,8 @@ SECTIONS
#ifdef CONFIG_X86_32
-. = ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE),
- "kernel image bigger than KERNEL_IMAGE_SIZE");
+ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE),
+ "kernel image bigger than KERNEL_IMAGE_SIZE");
#else
/*
* Per-cpu symbols which need to be offset from __per_cpu_load
@@ -372,12 +319,12 @@ INIT_PER_CPU(irq_stack_union);
/*
* Build-time check on the image size:
*/
-. = ASSERT((_end - _text <= KERNEL_IMAGE_SIZE),
- "kernel image bigger than KERNEL_IMAGE_SIZE");
+ASSERT((_end - _text <= KERNEL_IMAGE_SIZE),
+ "kernel image bigger than KERNEL_IMAGE_SIZE");
#ifdef CONFIG_SMP
-. = ASSERT((per_cpu__irq_stack_union == 0),
- "irq_stack_union is not at start of per-cpu area");
+ASSERT((per_cpu__irq_stack_union == 0),
+ "irq_stack_union is not at start of per-cpu area");
#endif
#endif /* CONFIG_X86_32 */
@@ -385,7 +332,6 @@ INIT_PER_CPU(irq_stack_union);
#ifdef CONFIG_KEXEC
#include <asm/kexec.h>
-. = ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE,
- "kexec control code size is too big");
+ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE,
+ "kexec control code size is too big");
#endif
-
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 1ae5ceba7eb2..7024224f0fc8 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -664,7 +664,7 @@ static void start_apic_timer(struct kvm_lapic *apic)
{
ktime_t now = apic->lapic_timer.timer.base->get_time();
- apic->lapic_timer.period = apic_get_reg(apic, APIC_TMICT) *
+ apic->lapic_timer.period = (u64)apic_get_reg(apic, APIC_TMICT) *
APIC_BUS_CYCLE_NS * apic->divide_count;
atomic_set(&apic->lapic_timer.pending, 0);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index eca41ae9f453..685a4ffac8e6 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -156,6 +156,8 @@ module_param(oos_shadow, bool, 0644);
#define CREATE_TRACE_POINTS
#include "mmutrace.h"
+#define SPTE_HOST_WRITEABLE (1ULL << PT_FIRST_AVAIL_BITS_SHIFT)
+
#define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level)
struct kvm_rmap_desc {
@@ -634,9 +636,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
if (*spte & shadow_accessed_mask)
kvm_set_pfn_accessed(pfn);
if (is_writeble_pte(*spte))
- kvm_release_pfn_dirty(pfn);
- else
- kvm_release_pfn_clean(pfn);
+ kvm_set_pfn_dirty(pfn);
rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], sp->role.level);
if (!*rmapp) {
printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte);
@@ -748,7 +748,7 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn)
return write_protected;
}
-static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp)
+static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, u64 data)
{
u64 *spte;
int need_tlb_flush = 0;
@@ -763,8 +763,45 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp)
return need_tlb_flush;
}
-static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
- int (*handler)(struct kvm *kvm, unsigned long *rmapp))
+static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, u64 data)
+{
+ int need_flush = 0;
+ u64 *spte, new_spte;
+ pte_t *ptep = (pte_t *)data;
+ pfn_t new_pfn;
+
+ WARN_ON(pte_huge(*ptep));
+ new_pfn = pte_pfn(*ptep);
+ spte = rmap_next(kvm, rmapp, NULL);
+ while (spte) {
+ BUG_ON(!is_shadow_present_pte(*spte));
+ rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", spte, *spte);
+ need_flush = 1;
+ if (pte_write(*ptep)) {
+ rmap_remove(kvm, spte);
+ __set_spte(spte, shadow_trap_nonpresent_pte);
+ spte = rmap_next(kvm, rmapp, NULL);
+ } else {
+ new_spte = *spte &~ (PT64_BASE_ADDR_MASK);
+ new_spte |= (u64)new_pfn << PAGE_SHIFT;
+
+ new_spte &= ~PT_WRITABLE_MASK;
+ new_spte &= ~SPTE_HOST_WRITEABLE;
+ if (is_writeble_pte(*spte))
+ kvm_set_pfn_dirty(spte_to_pfn(*spte));
+ __set_spte(spte, new_spte);
+ spte = rmap_next(kvm, rmapp, spte);
+ }
+ }
+ if (need_flush)
+ kvm_flush_remote_tlbs(kvm);
+
+ return 0;
+}
+
+static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, u64 data,
+ int (*handler)(struct kvm *kvm, unsigned long *rmapp,
+ u64 data))
{
int i, j;
int retval = 0;
@@ -786,13 +823,15 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
if (hva >= start && hva < end) {
gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
- retval |= handler(kvm, &memslot->rmap[gfn_offset]);
+ retval |= handler(kvm, &memslot->rmap[gfn_offset],
+ data);
for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) {
int idx = gfn_offset;
idx /= KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL + j);
retval |= handler(kvm,
- &memslot->lpage_info[j][idx].rmap_pde);
+ &memslot->lpage_info[j][idx].rmap_pde,
+ data);
}
}
}
@@ -802,10 +841,15 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
{
- return kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
+ return kvm_handle_hva(kvm, hva, 0, kvm_unmap_rmapp);
}
-static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp)
+void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
+{
+ kvm_handle_hva(kvm, hva, (u64)&pte, kvm_set_pte_rmapp);
+}
+
+static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, u64 data)
{
u64 *spte;
int young = 0;
@@ -841,13 +885,13 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
gfn = unalias_gfn(vcpu->kvm, gfn);
rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level);
- kvm_unmap_rmapp(vcpu->kvm, rmapp);
+ kvm_unmap_rmapp(vcpu->kvm, rmapp, 0);
kvm_flush_remote_tlbs(vcpu->kvm);
}
int kvm_age_hva(struct kvm *kvm, unsigned long hva)
{
- return kvm_handle_hva(kvm, hva, kvm_age_rmapp);
+ return kvm_handle_hva(kvm, hva, 0, kvm_age_rmapp);
}
#ifdef MMU_DEBUG
@@ -1756,7 +1800,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
unsigned pte_access, int user_fault,
int write_fault, int dirty, int level,
gfn_t gfn, pfn_t pfn, bool speculative,
- bool can_unsync)
+ bool can_unsync, bool reset_host_protection)
{
u64 spte;
int ret = 0;
@@ -1783,6 +1827,9 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn,
kvm_is_mmio_pfn(pfn));
+ if (reset_host_protection)
+ spte |= SPTE_HOST_WRITEABLE;
+
spte |= (u64)pfn << PAGE_SHIFT;
if ((pte_access & ACC_WRITE_MASK)
@@ -1828,7 +1875,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
unsigned pt_access, unsigned pte_access,
int user_fault, int write_fault, int dirty,
int *ptwrite, int level, gfn_t gfn,
- pfn_t pfn, bool speculative)
+ pfn_t pfn, bool speculative,
+ bool reset_host_protection)
{
int was_rmapped = 0;
int was_writeble = is_writeble_pte(*sptep);
@@ -1860,7 +1908,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
}
if (set_spte(vcpu, sptep, pte_access, user_fault, write_fault,
- dirty, level, gfn, pfn, speculative, true)) {
+ dirty, level, gfn, pfn, speculative, true,
+ reset_host_protection)) {
if (write_fault)
*ptwrite = 1;
kvm_x86_ops->tlb_flush(vcpu);
@@ -1877,8 +1926,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
page_header_update_slot(vcpu->kvm, sptep, gfn);
if (!was_rmapped) {
rmap_count = rmap_add(vcpu, sptep, gfn);
- if (!is_rmap_spte(*sptep))
- kvm_release_pfn_clean(pfn);
+ kvm_release_pfn_clean(pfn);
if (rmap_count > RMAP_RECYCLE_THRESHOLD)
rmap_recycle(vcpu, sptep, gfn);
} else {
@@ -1909,7 +1957,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
if (iterator.level == level) {
mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL,
0, write, 1, &pt_write,
- level, gfn, pfn, false);
+ level, gfn, pfn, false, true);
++vcpu->stat.pf_fixed;
break;
}
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index d2fec9c12d22..72558f8ff3f5 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -273,9 +273,13 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
if (mmu_notifier_retry(vcpu, vcpu->arch.update_pte.mmu_seq))
return;
kvm_get_pfn(pfn);
+ /*
+ * we call mmu_set_spte() with reset_host_protection = true beacuse that
+ * vcpu->arch.update_pte.pfn was fetched from get_user_pages(write = 1).
+ */
mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0,
gpte & PT_DIRTY_MASK, NULL, PT_PAGE_TABLE_LEVEL,
- gpte_to_gfn(gpte), pfn, true);
+ gpte_to_gfn(gpte), pfn, true, true);
}
/*
@@ -308,7 +312,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
user_fault, write_fault,
gw->ptes[gw->level-1] & PT_DIRTY_MASK,
ptwrite, level,
- gw->gfn, pfn, false);
+ gw->gfn, pfn, false, true);
break;
}
@@ -558,6 +562,7 @@ static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu,
static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
{
int i, offset, nr_present;
+ bool reset_host_protection;
offset = nr_present = 0;
@@ -595,9 +600,16 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
nr_present++;
pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
+ if (!(sp->spt[i] & SPTE_HOST_WRITEABLE)) {
+ pte_access &= ~ACC_WRITE_MASK;
+ reset_host_protection = 0;
+ } else {
+ reset_host_protection = 1;
+ }
set_spte(vcpu, &sp->spt[i], pte_access, 0, 0,
is_dirty_gpte(gpte), PT_PAGE_TABLE_LEVEL, gfn,
- spte_to_pfn(sp->spt[i]), true, false);
+ spte_to_pfn(sp->spt[i]), true, false,
+ reset_host_protection);
}
return !nr_present;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 944cc9c04b3c..c17404add91f 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -767,6 +767,8 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
rdtscll(tsc_this);
delta = vcpu->arch.host_tsc - tsc_this;
svm->vmcb->control.tsc_offset += delta;
+ if (is_nested(svm))
+ svm->nested.hsave->control.tsc_offset += delta;
vcpu->cpu = cpu;
kvm_migrate_timers(vcpu);
svm->asid_generation = 0;
@@ -2057,10 +2059,14 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
switch (ecx) {
case MSR_IA32_TSC: {
- u64 tsc;
+ u64 tsc_offset;
+
+ if (is_nested(svm))
+ tsc_offset = svm->nested.hsave->control.tsc_offset;
+ else
+ tsc_offset = svm->vmcb->control.tsc_offset;
- rdtscll(tsc);
- *data = svm->vmcb->control.tsc_offset + tsc;
+ *data = tsc_offset + native_read_tsc();
break;
}
case MSR_K6_STAR:
@@ -2146,10 +2152,17 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
switch (ecx) {
case MSR_IA32_TSC: {
- u64 tsc;
+ u64 tsc_offset = data - native_read_tsc();
+ u64 g_tsc_offset = 0;
+
+ if (is_nested(svm)) {
+ g_tsc_offset = svm->vmcb->control.tsc_offset -
+ svm->nested.hsave->control.tsc_offset;
+ svm->nested.hsave->control.tsc_offset = tsc_offset;
+ }
+
+ svm->vmcb->control.tsc_offset = tsc_offset + g_tsc_offset;
- rdtscll(tsc);
- svm->vmcb->control.tsc_offset = data - tsc;
break;
}
case MSR_K6_STAR:
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index f3812014bd0b..ed53b42caba1 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -709,7 +709,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
if (vcpu->cpu != cpu) {
vcpu_clear(vmx);
kvm_migrate_timers(vcpu);
- vpid_sync_vcpu_all(vmx);
+ set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests);
local_irq_disable();
list_add(&vmx->local_vcpus_link,
&per_cpu(vcpus_on_cpu, cpu));
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index be451ee44249..9b9695322f56 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1591,6 +1591,8 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
if (cpuid->nent < 1)
goto out;
+ if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
+ cpuid->nent = KVM_MAX_CPUID_ENTRIES;
r = -ENOMEM;
cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent);
if (!cpuid_entries)
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 9e609206fac9..85f5db95c60f 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -16,7 +16,9 @@ ifeq ($(CONFIG_X86_32),y)
lib-y += checksum_32.o
lib-y += strstr_32.o
lib-y += semaphore_32.o string_32.o
-
+ifneq ($(CONFIG_X86_CMPXCHG64),y)
+ lib-y += cmpxchg8b_emu.o
+endif
lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o
else
obj-y += io_64.o iomap_copy_64.o
diff --git a/arch/x86/lib/cmpxchg8b_emu.S b/arch/x86/lib/cmpxchg8b_emu.S
new file mode 100644
index 000000000000..828cb710dec2
--- /dev/null
+++ b/arch/x86/lib/cmpxchg8b_emu.S
@@ -0,0 +1,57 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ *
+ */
+
+#include <linux/linkage.h>
+#include <asm/alternative-asm.h>
+#include <asm/frame.h>
+#include <asm/dwarf2.h>
+
+
+.text
+
+/*
+ * Inputs:
+ * %esi : memory location to compare
+ * %eax : low 32 bits of old value
+ * %edx : high 32 bits of old value
+ * %ebx : low 32 bits of new value
+ * %ecx : high 32 bits of new value
+ */
+ENTRY(cmpxchg8b_emu)
+CFI_STARTPROC
+
+#
+# Emulate 'cmpxchg8b (%esi)' on UP except we don't
+# set the whole ZF thing (caller will just compare
+# eax:edx with the expected value)
+#
+cmpxchg8b_emu:
+ pushfl
+ cli
+
+ cmpl (%esi), %eax
+ jne not_same
+ cmpl 4(%esi), %edx
+ jne half_same
+
+ movl %ebx, (%esi)
+ movl %ecx, 4(%esi)
+
+ popfl
+ ret
+
+ not_same:
+ movl (%esi), %eax
+ half_same:
+ movl 4(%esi), %edx
+
+ popfl
+ ret
+
+CFI_ENDPROC
+ENDPROC(cmpxchg8b_emu)
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 9b5a9f59a478..06630d26e56d 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -1,9 +1,10 @@
obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
- pat.o pgtable.o physaddr.o gup.o
+ pat.o pgtable.o physaddr.o gup.o setup_nx.o
# Make sure __phys_addr has no stackprotector
nostackp := $(call cc-option, -fno-stack-protector)
CFLAGS_physaddr.o := $(nostackp)
+CFLAGS_setup_nx.o := $(nostackp)
obj-$(CONFIG_SMP) += tlb.o
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 0607119cef94..73ffd5536f62 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -28,69 +28,6 @@ int direct_gbpages
#endif
;
-int nx_enabled;
-
-#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
-static int disable_nx __cpuinitdata;
-
-/*
- * noexec = on|off
- *
- * Control non-executable mappings for processes.
- *
- * on Enable
- * off Disable
- */
-static int __init noexec_setup(char *str)
-{
- if (!str)
- return -EINVAL;
- if (!strncmp(str, "on", 2)) {
- __supported_pte_mask |= _PAGE_NX;
- disable_nx = 0;
- } else if (!strncmp(str, "off", 3)) {
- disable_nx = 1;
- __supported_pte_mask &= ~_PAGE_NX;
- }
- return 0;
-}
-early_param("noexec", noexec_setup);
-#endif
-
-#ifdef CONFIG_X86_PAE
-static void __init set_nx(void)
-{
- unsigned int v[4], l, h;
-
- if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) {
- cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]);
-
- if ((v[3] & (1 << 20)) && !disable_nx) {
- rdmsr(MSR_EFER, l, h);
- l |= EFER_NX;
- wrmsr(MSR_EFER, l, h);
- nx_enabled = 1;
- __supported_pte_mask |= _PAGE_NX;
- }
- }
-}
-#else
-static inline void set_nx(void)
-{
-}
-#endif
-
-#ifdef CONFIG_X86_64
-void __cpuinit check_efer(void)
-{
- unsigned long efer;
-
- rdmsrl(MSR_EFER, efer);
- if (!(efer & EFER_NX) || disable_nx)
- __supported_pte_mask &= ~_PAGE_NX;
-}
-#endif
-
static void __init find_early_table_space(unsigned long end, int use_pse,
int use_gbpages)
{
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 7257cf3decf9..e78cd0ec2bcf 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -81,6 +81,7 @@ enum {
void pat_init(void)
{
u64 pat;
+ bool boot_cpu = !boot_pat_state;
if (!pat_enabled)
return;
@@ -122,8 +123,10 @@ void pat_init(void)
rdmsrl(MSR_IA32_CR_PAT, boot_pat_state);
wrmsrl(MSR_IA32_CR_PAT, pat);
- printk(KERN_INFO "x86 PAT enabled: cpu %d, old 0x%Lx, new 0x%Lx\n",
- smp_processor_id(), boot_pat_state, pat);
+
+ if (boot_cpu)
+ printk(KERN_INFO "x86 PAT enabled: cpu %d, old 0x%Lx, new 0x%Lx\n",
+ smp_processor_id(), boot_pat_state, pat);
}
#undef PAT
diff --git a/arch/x86/mm/setup_nx.c b/arch/x86/mm/setup_nx.c
new file mode 100644
index 000000000000..513d8ed5d2ec
--- /dev/null
+++ b/arch/x86/mm/setup_nx.c
@@ -0,0 +1,69 @@
+#include <linux/spinlock.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+
+#include <asm/pgtable.h>
+
+int nx_enabled;
+
+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
+static int disable_nx __cpuinitdata;
+
+/*
+ * noexec = on|off
+ *
+ * Control non-executable mappings for processes.
+ *
+ * on Enable
+ * off Disable
+ */
+static int __init noexec_setup(char *str)
+{
+ if (!str)
+ return -EINVAL;
+ if (!strncmp(str, "on", 2)) {
+ __supported_pte_mask |= _PAGE_NX;
+ disable_nx = 0;
+ } else if (!strncmp(str, "off", 3)) {
+ disable_nx = 1;
+ __supported_pte_mask &= ~_PAGE_NX;
+ }
+ return 0;
+}
+early_param("noexec", noexec_setup);
+#endif
+
+#ifdef CONFIG_X86_PAE
+void __init set_nx(void)
+{
+ unsigned int v[4], l, h;
+
+ if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) {
+ cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]);
+
+ if ((v[3] & (1 << 20)) && !disable_nx) {
+ rdmsr(MSR_EFER, l, h);
+ l |= EFER_NX;
+ wrmsr(MSR_EFER, l, h);
+ nx_enabled = 1;
+ __supported_pte_mask |= _PAGE_NX;
+ }
+ }
+}
+#else
+void set_nx(void)
+{
+}
+#endif
+
+#ifdef CONFIG_X86_64
+void __cpuinit check_efer(void)
+{
+ unsigned long efer;
+
+ rdmsrl(MSR_EFER, efer);
+ if (!(efer & EFER_NX) || disable_nx)
+ __supported_pte_mask &= ~_PAGE_NX;
+}
+#endif
+
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 52e62e57fedd..b22d13b0c71d 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -266,7 +266,7 @@ void pcibios_set_master(struct pci_dev *dev)
pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat);
}
-static struct vm_operations_struct pci_mmap_ops = {
+static const struct vm_operations_struct pci_mmap_ops = {
.access = generic_access_phys,
};
diff --git a/arch/x86/xen/debugfs.c b/arch/x86/xen/debugfs.c
index b53225d2cac3..e133ce25e290 100644
--- a/arch/x86/xen/debugfs.c
+++ b/arch/x86/xen/debugfs.c
@@ -100,7 +100,7 @@ static int xen_array_release(struct inode *inode, struct file *file)
return 0;
}
-static struct file_operations u32_array_fops = {
+static const struct file_operations u32_array_fops = {
.owner = THIS_MODULE,
.open = u32_array_open,
.release= xen_array_release,
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 544eb7496531..3439616d69f1 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1082,6 +1082,11 @@ asmlinkage void __init xen_start_kernel(void)
__supported_pte_mask |= _PAGE_IOMAP;
+#ifdef CONFIG_X86_64
+ /* Work out if we support NX */
+ check_efer();
+#endif
+
xen_setup_features();
/* Get mfn list */
@@ -1123,11 +1128,6 @@ asmlinkage void __init xen_start_kernel(void)
pgd = (pgd_t *)xen_start_info->pt_base;
-#ifdef CONFIG_X86_64
- /* Work out if we support NX */
- check_efer();
-#endif
-
/* Don't do the full vcpu_info placement stuff until we have a
possible map and a non-dummy shared_info. */
per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];