aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/entry/entry_32.S
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/entry/entry_32.S')
-rw-r--r--arch/x86/entry/entry_32.S344
1 files changed, 254 insertions, 90 deletions
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index f49e11669271..a4dc0dd2b94d 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -67,7 +67,6 @@
# define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
#else
# define preempt_stop(clobbers)
-# define resume_kernel restore_all_kernel
#endif
.macro TRACE_IRQS_IRET
@@ -173,7 +172,7 @@
ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
.if \no_user_check == 0
/* coming from usermode? */
- testl $SEGMENT_RPL_MASK, PT_CS(%esp)
+ testl $USER_SEGMENT_RPL_MASK, PT_CS(%esp)
jz .Lend_\@
.endif
/* On user-cr3? */
@@ -203,10 +202,126 @@
.Lend_\@:
.endm
-.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0
+#define CS_FROM_ENTRY_STACK (1 << 31)
+#define CS_FROM_USER_CR3 (1 << 30)
+#define CS_FROM_KERNEL (1 << 29)
+#define CS_FROM_ESPFIX (1 << 28)
+
+.macro FIXUP_FRAME
+ /*
+ * The high bits of the CS dword (__csh) are used for CS_FROM_*.
+ * Clear them in case hardware didn't do this for us.
+ */
+ andl $0x0000ffff, 4*4(%esp)
+
+#ifdef CONFIG_VM86
+ testl $X86_EFLAGS_VM, 5*4(%esp)
+ jnz .Lfrom_usermode_no_fixup_\@
+#endif
+ testl $USER_SEGMENT_RPL_MASK, 4*4(%esp)
+ jnz .Lfrom_usermode_no_fixup_\@
+
+ orl $CS_FROM_KERNEL, 4*4(%esp)
+
+ /*
+ * When we're here from kernel mode; the (exception) stack looks like:
+ *
+ * 6*4(%esp) - <previous context>
+ * 5*4(%esp) - flags
+ * 4*4(%esp) - cs
+ * 3*4(%esp) - ip
+ * 2*4(%esp) - orig_eax
+ * 1*4(%esp) - gs / function
+ * 0*4(%esp) - fs
+ *
+ * Lets build a 5 entry IRET frame after that, such that struct pt_regs
+ * is complete and in particular regs->sp is correct. This gives us
+ * the original 6 enties as gap:
+ *
+ * 14*4(%esp) - <previous context>
+ * 13*4(%esp) - gap / flags
+ * 12*4(%esp) - gap / cs
+ * 11*4(%esp) - gap / ip
+ * 10*4(%esp) - gap / orig_eax
+ * 9*4(%esp) - gap / gs / function
+ * 8*4(%esp) - gap / fs
+ * 7*4(%esp) - ss
+ * 6*4(%esp) - sp
+ * 5*4(%esp) - flags
+ * 4*4(%esp) - cs
+ * 3*4(%esp) - ip
+ * 2*4(%esp) - orig_eax
+ * 1*4(%esp) - gs / function
+ * 0*4(%esp) - fs
+ */
+
+ pushl %ss # ss
+ pushl %esp # sp (points at ss)
+ addl $7*4, (%esp) # point sp back at the previous context
+ pushl 7*4(%esp) # flags
+ pushl 7*4(%esp) # cs
+ pushl 7*4(%esp) # ip
+ pushl 7*4(%esp) # orig_eax
+ pushl 7*4(%esp) # gs / function
+ pushl 7*4(%esp) # fs
+.Lfrom_usermode_no_fixup_\@:
+.endm
+
+.macro IRET_FRAME
+ /*
+ * We're called with %ds, %es, %fs, and %gs from the interrupted
+ * frame, so we shouldn't use them. Also, we may be in ESPFIX
+ * mode and therefore have a nonzero SS base and an offset ESP,
+ * so any attempt to access the stack needs to use SS. (except for
+ * accesses through %esp, which automatically use SS.)
+ */
+ testl $CS_FROM_KERNEL, 1*4(%esp)
+ jz .Lfinished_frame_\@
+
+ /*
+ * Reconstruct the 3 entry IRET frame right after the (modified)
+ * regs->sp without lowering %esp in between, such that an NMI in the
+ * middle doesn't scribble our stack.
+ */
+ pushl %eax
+ pushl %ecx
+ movl 5*4(%esp), %eax # (modified) regs->sp
+
+ movl 4*4(%esp), %ecx # flags
+ movl %ecx, %ss:-1*4(%eax)
+
+ movl 3*4(%esp), %ecx # cs
+ andl $0x0000ffff, %ecx
+ movl %ecx, %ss:-2*4(%eax)
+
+ movl 2*4(%esp), %ecx # ip
+ movl %ecx, %ss:-3*4(%eax)
+
+ movl 1*4(%esp), %ecx # eax
+ movl %ecx, %ss:-4*4(%eax)
+
+ popl %ecx
+ lea -4*4(%eax), %esp
+ popl %eax
+.Lfinished_frame_\@:
+.endm
+
+.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0 skip_gs=0 unwind_espfix=0
cld
+.if \skip_gs == 0
PUSH_GS
+.endif
pushl %fs
+
+ pushl %eax
+ movl $(__KERNEL_PERCPU), %eax
+ movl %eax, %fs
+.if \unwind_espfix > 0
+ UNWIND_ESPFIX_STACK
+.endif
+ popl %eax
+
+ FIXUP_FRAME
pushl %es
pushl %ds
pushl \pt_regs_ax
@@ -219,19 +334,17 @@
movl $(__USER_DS), %edx
movl %edx, %ds
movl %edx, %es
- movl $(__KERNEL_PERCPU), %edx
- movl %edx, %fs
+.if \skip_gs == 0
SET_KERNEL_GS %edx
-
+.endif
/* Switch to kernel stack if necessary */
.if \switch_stacks > 0
SWITCH_TO_KERNEL_STACK
.endif
-
.endm
-.macro SAVE_ALL_NMI cr3_reg:req
- SAVE_ALL
+.macro SAVE_ALL_NMI cr3_reg:req unwind_espfix=0
+ SAVE_ALL unwind_espfix=\unwind_espfix
BUG_IF_WRONG_CR3
@@ -247,22 +360,6 @@
.Lend_\@:
.endm
-/*
- * This is a sneaky trick to help the unwinder find pt_regs on the stack. The
- * frame pointer is replaced with an encoded pointer to pt_regs. The encoding
- * is just clearing the MSB, which makes it an invalid stack address and is also
- * a signal to the unwinder that it's a pt_regs pointer in disguise.
- *
- * NOTE: This macro must be used *after* SAVE_ALL because it corrupts the
- * original rbp.
- */
-.macro ENCODE_FRAME_POINTER
-#ifdef CONFIG_FRAME_POINTER
- mov %esp, %ebp
- andl $0x7fffffff, %ebp
-#endif
-.endm
-
.macro RESTORE_INT_REGS
popl %ebx
popl %ecx
@@ -279,6 +376,7 @@
2: popl %es
3: popl %fs
POP_GS \pop
+ IRET_FRAME
.pushsection .fixup, "ax"
4: movl $0, (%esp)
jmp 1b
@@ -317,7 +415,8 @@
.macro CHECK_AND_APPLY_ESPFIX
#ifdef CONFIG_X86_ESPFIX32
-#define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + (GDT_ENTRY_ESPFIX_SS * 8)
+#define GDT_ESPFIX_OFFSET (GDT_ENTRY_ESPFIX_SS * 8)
+#define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + GDT_ESPFIX_OFFSET
ALTERNATIVE "jmp .Lend_\@", "", X86_BUG_ESPFIX
@@ -375,9 +474,6 @@
* switch to it before we do any copying.
*/
-#define CS_FROM_ENTRY_STACK (1 << 31)
-#define CS_FROM_USER_CR3 (1 << 30)
-
.macro SWITCH_TO_KERNEL_STACK
ALTERNATIVE "", "jmp .Lend_\@", X86_FEATURE_XENPV
@@ -391,13 +487,6 @@
* that register for the time this macro runs
*/
- /*
- * The high bits of the CS dword (__csh) are used for
- * CS_FROM_ENTRY_STACK and CS_FROM_USER_CR3. Clear them in case
- * hardware didn't do this for us.
- */
- andl $(0x0000ffff), PT_CS(%esp)
-
/* Are we on the entry stack? Bail out if not! */
movl PER_CPU_VAR(cpu_entry_area), %ecx
addl $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
@@ -755,7 +844,7 @@ ret_from_intr:
andl $SEGMENT_RPL_MASK, %eax
#endif
cmpl $USER_RPL, %eax
- jb resume_kernel # not returning to v8086 or userspace
+ jb restore_all_kernel # not returning to v8086 or userspace
ENTRY(resume_userspace)
DISABLE_INTERRUPTS(CLBR_ANY)
@@ -768,8 +857,25 @@ END(ret_from_exception)
#ifdef CONFIG_PREEMPT
ENTRY(resume_kernel)
DISABLE_INTERRUPTS(CLBR_ANY)
+ # preempt count == 0 + NEED_RS set?
cmpl $0, PER_CPU_VAR(__preempt_count)
+#ifndef CONFIG_PREEMPT_LAZY
jnz restore_all_kernel
+#else
+ jz test_int_off
+
+ # atleast preempt count == 0 ?
+ cmpl $_PREEMPT_ENABLED,PER_CPU_VAR(__preempt_count)
+ jne restore_all_kernel
+
+ movl PER_CPU_VAR(current_task), %ebp
+ cmpl $0,TASK_TI_preempt_lazy_count(%ebp) # non-zero preempt_lazy_count ?
+ jnz restore_all_kernel
+
+ testl $_TIF_NEED_RESCHED_LAZY, TASK_TI_flags(%ebp)
+ jz restore_all_kernel
+test_int_off:
+#endif
testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off (exception path) ?
jz restore_all_kernel
call preempt_schedule_irq
@@ -1027,6 +1133,15 @@ restore_all:
INTERRUPT_RETURN
restore_all_kernel:
+#ifdef CONFIG_PREEMPT
+ DISABLE_INTERRUPTS(CLBR_ANY)
+ cmpl $0, PER_CPU_VAR(__preempt_count)
+ jnz .Lno_preempt
+ testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off (exception path) ?
+ jz .Lno_preempt
+ call preempt_schedule_irq
+.Lno_preempt:
+#endif
TRACE_IRQS_IRET
PARANOID_EXIT_TO_KERNEL_MODE
BUG_IF_WRONG_CR3
@@ -1062,30 +1177,43 @@ ENDPROC(entry_INT80_32)
* We can't call C functions using the ESPFIX stack. This code reads
* the high word of the segment base from the GDT and swiches to the
* normal stack and adjusts ESP with the matching offset.
+ *
+ * We might be on user CR3 here, so percpu data is not mapped and we can't
+ * access the GDT through the percpu segment. Instead, use SGDT to find
+ * the cpu_entry_area alias of the GDT.
*/
#ifdef CONFIG_X86_ESPFIX32
/* fixup the stack */
- mov GDT_ESPFIX_SS + 4, %al /* bits 16..23 */
- mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */
+ pushl %ecx
+ subl $2*4, %esp
+ sgdt (%esp)
+ movl 2(%esp), %ecx /* GDT address */
+ /*
+ * Careful: ECX is a linear pointer, so we need to force base
+ * zero. %cs is the only known-linear segment we have right now.
+ */
+ mov %cs:GDT_ESPFIX_OFFSET + 4(%ecx), %al /* bits 16..23 */
+ mov %cs:GDT_ESPFIX_OFFSET + 7(%ecx), %ah /* bits 24..31 */
shl $16, %eax
+ addl $2*4, %esp
+ popl %ecx
addl %esp, %eax /* the adjusted stack pointer */
pushl $__KERNEL_DS
pushl %eax
lss (%esp), %esp /* switch to the normal stack segment */
#endif
.endm
+
.macro UNWIND_ESPFIX_STACK
+ /* It's safe to clobber %eax, all other regs need to be preserved */
#ifdef CONFIG_X86_ESPFIX32
movl %ss, %eax
/* see if on espfix stack */
cmpw $__ESPFIX_SS, %ax
- jne 27f
- movl $__KERNEL_DS, %eax
- movl %eax, %ds
- movl %eax, %es
+ jne .Lno_fixup_\@
/* switch to normal stack */
FIXUP_ESPFIX_STACK
-27:
+.Lno_fixup_\@:
#endif
.endm
@@ -1275,11 +1403,6 @@ END(spurious_interrupt_bug)
#ifdef CONFIG_XEN_PV
ENTRY(xen_hypervisor_callback)
- pushl $-1 /* orig_ax = -1 => not a system call */
- SAVE_ALL
- ENCODE_FRAME_POINTER
- TRACE_IRQS_OFF
-
/*
* Check to see if we got the event in the critical
* region in xen_iret_direct, after we've reenabled
@@ -1287,16 +1410,17 @@ ENTRY(xen_hypervisor_callback)
* iret instruction's behaviour where it delivers a
* pending interrupt when enabling interrupts:
*/
- movl PT_EIP(%esp), %eax
- cmpl $xen_iret_start_crit, %eax
+ cmpl $xen_iret_start_crit, (%esp)
jb 1f
- cmpl $xen_iret_end_crit, %eax
+ cmpl $xen_iret_end_crit, (%esp)
jae 1f
-
- jmp xen_iret_crit_fixup
-
-ENTRY(xen_do_upcall)
-1: mov %esp, %eax
+ call xen_iret_crit_fixup
+1:
+ pushl $-1 /* orig_ax = -1 => not a system call */
+ SAVE_ALL
+ ENCODE_FRAME_POINTER
+ TRACE_IRQS_OFF
+ mov %esp, %eax
call xen_evtchn_do_upcall
#ifndef CONFIG_PREEMPT
call xen_maybe_preempt_hcall
@@ -1378,37 +1502,48 @@ BUILD_INTERRUPT3(hv_stimer0_callback_vector, HYPERV_STIMER0_VECTOR,
ENTRY(page_fault)
ASM_CLAC
pushl $do_page_fault
- ALIGN
- jmp common_exception
+ jmp common_exception_read_cr2
END(page_fault)
-common_exception:
+common_exception_read_cr2:
/* the function address is in %gs's slot on the stack */
- pushl %fs
- pushl %es
- pushl %ds
- pushl %eax
- movl $(__USER_DS), %eax
- movl %eax, %ds
- movl %eax, %es
- movl $(__KERNEL_PERCPU), %eax
- movl %eax, %fs
- pushl %ebp
- pushl %edi
- pushl %esi
- pushl %edx
- pushl %ecx
- pushl %ebx
- SWITCH_TO_KERNEL_STACK
+ SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1
+
ENCODE_FRAME_POINTER
- cld
- UNWIND_ESPFIX_STACK
+
+ /* fixup %gs */
GS_TO_REG %ecx
- movl PT_GS(%esp), %edi # get the function address
+ movl PT_GS(%esp), %edi
+ REG_TO_PTGS %ecx
+ SET_KERNEL_GS %ecx
+
+ GET_CR2_INTO(%ecx) # might clobber %eax
+
+ /* fixup orig %eax */
movl PT_ORIG_EAX(%esp), %edx # get the error code
movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
+
+ TRACE_IRQS_OFF
+ movl %esp, %eax # pt_regs pointer
+ CALL_NOSPEC %edi
+ jmp ret_from_exception
+END(common_exception_read_cr2)
+
+common_exception:
+ /* the function address is in %gs's slot on the stack */
+ SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1
+ ENCODE_FRAME_POINTER
+
+ /* fixup %gs */
+ GS_TO_REG %ecx
+ movl PT_GS(%esp), %edi # get the function address
REG_TO_PTGS %ecx
SET_KERNEL_GS %ecx
+
+ /* fixup orig %eax */
+ movl PT_ORIG_EAX(%esp), %edx # get the error code
+ movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
+
TRACE_IRQS_OFF
movl %esp, %eax # pt_regs pointer
CALL_NOSPEC %edi
@@ -1436,6 +1571,10 @@ ENTRY(nmi)
ASM_CLAC
#ifdef CONFIG_X86_ESPFIX32
+ /*
+ * ESPFIX_SS is only ever set on the return to user path
+ * after we've switched to the entry stack.
+ */
pushl %eax
movl %ss, %eax
cmpw $__ESPFIX_SS, %ax
@@ -1471,6 +1610,11 @@ ENTRY(nmi)
movl %ebx, %esp
.Lnmi_return:
+#ifdef CONFIG_X86_ESPFIX32
+ testl $CS_FROM_ESPFIX, PT_CS(%esp)
+ jnz .Lnmi_from_espfix
+#endif
+
CHECK_AND_APPLY_ESPFIX
RESTORE_ALL_NMI cr3_reg=%edi pop=4
jmp .Lirq_return
@@ -1478,23 +1622,42 @@ ENTRY(nmi)
#ifdef CONFIG_X86_ESPFIX32
.Lnmi_espfix_stack:
/*
- * create the pointer to lss back
+ * Create the pointer to LSS back
*/
pushl %ss
pushl %esp
addl $4, (%esp)
- /* copy the iret frame of 12 bytes */
- .rept 3
- pushl 16(%esp)
- .endr
- pushl %eax
- SAVE_ALL_NMI cr3_reg=%edi
+
+ /* Copy the (short) IRET frame */
+ pushl 4*4(%esp) # flags
+ pushl 4*4(%esp) # cs
+ pushl 4*4(%esp) # ip
+
+ pushl %eax # orig_ax
+
+ SAVE_ALL_NMI cr3_reg=%edi unwind_espfix=1
ENCODE_FRAME_POINTER
- FIXUP_ESPFIX_STACK # %eax == %esp
+
+ /* clear CS_FROM_KERNEL, set CS_FROM_ESPFIX */
+ xorl $(CS_FROM_ESPFIX | CS_FROM_KERNEL), PT_CS(%esp)
+
xorl %edx, %edx # zero error code
- call do_nmi
+ movl %esp, %eax # pt_regs pointer
+ jmp .Lnmi_from_sysenter_stack
+
+.Lnmi_from_espfix:
RESTORE_ALL_NMI cr3_reg=%edi
- lss 12+4(%esp), %esp # back to espfix stack
+ /*
+ * Because we cleared CS_FROM_KERNEL, IRET_FRAME 'forgot' to
+ * fix up the gap and long frame:
+ *
+ * 3 - original frame (exception)
+ * 2 - ESPFIX block (above)
+ * 6 - gap (FIXUP_FRAME)
+ * 5 - long frame (FIXUP_FRAME)
+ * 1 - orig_ax
+ */
+ lss (1+5+6)*4(%esp), %esp # back to espfix stack
jmp .Lirq_return
#endif
END(nmi)
@@ -1513,6 +1676,7 @@ ENTRY(int3)
END(int3)
ENTRY(general_protection)
+ ASM_CLAC
pushl $do_general_protection
jmp common_exception
END(general_protection)
@@ -1521,7 +1685,7 @@ END(general_protection)
ENTRY(async_page_fault)
ASM_CLAC
pushl $do_async_page_fault
- jmp common_exception
+ jmp common_exception_read_cr2
END(async_page_fault)
#endif