diff options
Diffstat (limited to 'arch/x86/entry/entry_32.S')
-rw-r--r-- | arch/x86/entry/entry_32.S | 344 |
1 files changed, 254 insertions, 90 deletions
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index f49e11669271..a4dc0dd2b94d 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -67,7 +67,6 @@ # define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF #else # define preempt_stop(clobbers) -# define resume_kernel restore_all_kernel #endif .macro TRACE_IRQS_IRET @@ -173,7 +172,7 @@ ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI .if \no_user_check == 0 /* coming from usermode? */ - testl $SEGMENT_RPL_MASK, PT_CS(%esp) + testl $USER_SEGMENT_RPL_MASK, PT_CS(%esp) jz .Lend_\@ .endif /* On user-cr3? */ @@ -203,10 +202,126 @@ .Lend_\@: .endm -.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0 +#define CS_FROM_ENTRY_STACK (1 << 31) +#define CS_FROM_USER_CR3 (1 << 30) +#define CS_FROM_KERNEL (1 << 29) +#define CS_FROM_ESPFIX (1 << 28) + +.macro FIXUP_FRAME + /* + * The high bits of the CS dword (__csh) are used for CS_FROM_*. + * Clear them in case hardware didn't do this for us. + */ + andl $0x0000ffff, 4*4(%esp) + +#ifdef CONFIG_VM86 + testl $X86_EFLAGS_VM, 5*4(%esp) + jnz .Lfrom_usermode_no_fixup_\@ +#endif + testl $USER_SEGMENT_RPL_MASK, 4*4(%esp) + jnz .Lfrom_usermode_no_fixup_\@ + + orl $CS_FROM_KERNEL, 4*4(%esp) + + /* + * When we're here from kernel mode; the (exception) stack looks like: + * + * 6*4(%esp) - <previous context> + * 5*4(%esp) - flags + * 4*4(%esp) - cs + * 3*4(%esp) - ip + * 2*4(%esp) - orig_eax + * 1*4(%esp) - gs / function + * 0*4(%esp) - fs + * + * Lets build a 5 entry IRET frame after that, such that struct pt_regs + * is complete and in particular regs->sp is correct. This gives us + * the original 6 enties as gap: + * + * 14*4(%esp) - <previous context> + * 13*4(%esp) - gap / flags + * 12*4(%esp) - gap / cs + * 11*4(%esp) - gap / ip + * 10*4(%esp) - gap / orig_eax + * 9*4(%esp) - gap / gs / function + * 8*4(%esp) - gap / fs + * 7*4(%esp) - ss + * 6*4(%esp) - sp + * 5*4(%esp) - flags + * 4*4(%esp) - cs + * 3*4(%esp) - ip + * 2*4(%esp) - orig_eax + * 1*4(%esp) - gs / function + * 0*4(%esp) - fs + */ + + pushl %ss # ss + pushl %esp # sp (points at ss) + addl $7*4, (%esp) # point sp back at the previous context + pushl 7*4(%esp) # flags + pushl 7*4(%esp) # cs + pushl 7*4(%esp) # ip + pushl 7*4(%esp) # orig_eax + pushl 7*4(%esp) # gs / function + pushl 7*4(%esp) # fs +.Lfrom_usermode_no_fixup_\@: +.endm + +.macro IRET_FRAME + /* + * We're called with %ds, %es, %fs, and %gs from the interrupted + * frame, so we shouldn't use them. Also, we may be in ESPFIX + * mode and therefore have a nonzero SS base and an offset ESP, + * so any attempt to access the stack needs to use SS. (except for + * accesses through %esp, which automatically use SS.) + */ + testl $CS_FROM_KERNEL, 1*4(%esp) + jz .Lfinished_frame_\@ + + /* + * Reconstruct the 3 entry IRET frame right after the (modified) + * regs->sp without lowering %esp in between, such that an NMI in the + * middle doesn't scribble our stack. + */ + pushl %eax + pushl %ecx + movl 5*4(%esp), %eax # (modified) regs->sp + + movl 4*4(%esp), %ecx # flags + movl %ecx, %ss:-1*4(%eax) + + movl 3*4(%esp), %ecx # cs + andl $0x0000ffff, %ecx + movl %ecx, %ss:-2*4(%eax) + + movl 2*4(%esp), %ecx # ip + movl %ecx, %ss:-3*4(%eax) + + movl 1*4(%esp), %ecx # eax + movl %ecx, %ss:-4*4(%eax) + + popl %ecx + lea -4*4(%eax), %esp + popl %eax +.Lfinished_frame_\@: +.endm + +.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0 skip_gs=0 unwind_espfix=0 cld +.if \skip_gs == 0 PUSH_GS +.endif pushl %fs + + pushl %eax + movl $(__KERNEL_PERCPU), %eax + movl %eax, %fs +.if \unwind_espfix > 0 + UNWIND_ESPFIX_STACK +.endif + popl %eax + + FIXUP_FRAME pushl %es pushl %ds pushl \pt_regs_ax @@ -219,19 +334,17 @@ movl $(__USER_DS), %edx movl %edx, %ds movl %edx, %es - movl $(__KERNEL_PERCPU), %edx - movl %edx, %fs +.if \skip_gs == 0 SET_KERNEL_GS %edx - +.endif /* Switch to kernel stack if necessary */ .if \switch_stacks > 0 SWITCH_TO_KERNEL_STACK .endif - .endm -.macro SAVE_ALL_NMI cr3_reg:req - SAVE_ALL +.macro SAVE_ALL_NMI cr3_reg:req unwind_espfix=0 + SAVE_ALL unwind_espfix=\unwind_espfix BUG_IF_WRONG_CR3 @@ -247,22 +360,6 @@ .Lend_\@: .endm -/* - * This is a sneaky trick to help the unwinder find pt_regs on the stack. The - * frame pointer is replaced with an encoded pointer to pt_regs. The encoding - * is just clearing the MSB, which makes it an invalid stack address and is also - * a signal to the unwinder that it's a pt_regs pointer in disguise. - * - * NOTE: This macro must be used *after* SAVE_ALL because it corrupts the - * original rbp. - */ -.macro ENCODE_FRAME_POINTER -#ifdef CONFIG_FRAME_POINTER - mov %esp, %ebp - andl $0x7fffffff, %ebp -#endif -.endm - .macro RESTORE_INT_REGS popl %ebx popl %ecx @@ -279,6 +376,7 @@ 2: popl %es 3: popl %fs POP_GS \pop + IRET_FRAME .pushsection .fixup, "ax" 4: movl $0, (%esp) jmp 1b @@ -317,7 +415,8 @@ .macro CHECK_AND_APPLY_ESPFIX #ifdef CONFIG_X86_ESPFIX32 -#define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + (GDT_ENTRY_ESPFIX_SS * 8) +#define GDT_ESPFIX_OFFSET (GDT_ENTRY_ESPFIX_SS * 8) +#define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + GDT_ESPFIX_OFFSET ALTERNATIVE "jmp .Lend_\@", "", X86_BUG_ESPFIX @@ -375,9 +474,6 @@ * switch to it before we do any copying. */ -#define CS_FROM_ENTRY_STACK (1 << 31) -#define CS_FROM_USER_CR3 (1 << 30) - .macro SWITCH_TO_KERNEL_STACK ALTERNATIVE "", "jmp .Lend_\@", X86_FEATURE_XENPV @@ -391,13 +487,6 @@ * that register for the time this macro runs */ - /* - * The high bits of the CS dword (__csh) are used for - * CS_FROM_ENTRY_STACK and CS_FROM_USER_CR3. Clear them in case - * hardware didn't do this for us. - */ - andl $(0x0000ffff), PT_CS(%esp) - /* Are we on the entry stack? Bail out if not! */ movl PER_CPU_VAR(cpu_entry_area), %ecx addl $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx @@ -755,7 +844,7 @@ ret_from_intr: andl $SEGMENT_RPL_MASK, %eax #endif cmpl $USER_RPL, %eax - jb resume_kernel # not returning to v8086 or userspace + jb restore_all_kernel # not returning to v8086 or userspace ENTRY(resume_userspace) DISABLE_INTERRUPTS(CLBR_ANY) @@ -768,8 +857,25 @@ END(ret_from_exception) #ifdef CONFIG_PREEMPT ENTRY(resume_kernel) DISABLE_INTERRUPTS(CLBR_ANY) + # preempt count == 0 + NEED_RS set? cmpl $0, PER_CPU_VAR(__preempt_count) +#ifndef CONFIG_PREEMPT_LAZY jnz restore_all_kernel +#else + jz test_int_off + + # atleast preempt count == 0 ? + cmpl $_PREEMPT_ENABLED,PER_CPU_VAR(__preempt_count) + jne restore_all_kernel + + movl PER_CPU_VAR(current_task), %ebp + cmpl $0,TASK_TI_preempt_lazy_count(%ebp) # non-zero preempt_lazy_count ? + jnz restore_all_kernel + + testl $_TIF_NEED_RESCHED_LAZY, TASK_TI_flags(%ebp) + jz restore_all_kernel +test_int_off: +#endif testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off (exception path) ? jz restore_all_kernel call preempt_schedule_irq @@ -1027,6 +1133,15 @@ restore_all: INTERRUPT_RETURN restore_all_kernel: +#ifdef CONFIG_PREEMPT + DISABLE_INTERRUPTS(CLBR_ANY) + cmpl $0, PER_CPU_VAR(__preempt_count) + jnz .Lno_preempt + testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off (exception path) ? + jz .Lno_preempt + call preempt_schedule_irq +.Lno_preempt: +#endif TRACE_IRQS_IRET PARANOID_EXIT_TO_KERNEL_MODE BUG_IF_WRONG_CR3 @@ -1062,30 +1177,43 @@ ENDPROC(entry_INT80_32) * We can't call C functions using the ESPFIX stack. This code reads * the high word of the segment base from the GDT and swiches to the * normal stack and adjusts ESP with the matching offset. + * + * We might be on user CR3 here, so percpu data is not mapped and we can't + * access the GDT through the percpu segment. Instead, use SGDT to find + * the cpu_entry_area alias of the GDT. */ #ifdef CONFIG_X86_ESPFIX32 /* fixup the stack */ - mov GDT_ESPFIX_SS + 4, %al /* bits 16..23 */ - mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */ + pushl %ecx + subl $2*4, %esp + sgdt (%esp) + movl 2(%esp), %ecx /* GDT address */ + /* + * Careful: ECX is a linear pointer, so we need to force base + * zero. %cs is the only known-linear segment we have right now. + */ + mov %cs:GDT_ESPFIX_OFFSET + 4(%ecx), %al /* bits 16..23 */ + mov %cs:GDT_ESPFIX_OFFSET + 7(%ecx), %ah /* bits 24..31 */ shl $16, %eax + addl $2*4, %esp + popl %ecx addl %esp, %eax /* the adjusted stack pointer */ pushl $__KERNEL_DS pushl %eax lss (%esp), %esp /* switch to the normal stack segment */ #endif .endm + .macro UNWIND_ESPFIX_STACK + /* It's safe to clobber %eax, all other regs need to be preserved */ #ifdef CONFIG_X86_ESPFIX32 movl %ss, %eax /* see if on espfix stack */ cmpw $__ESPFIX_SS, %ax - jne 27f - movl $__KERNEL_DS, %eax - movl %eax, %ds - movl %eax, %es + jne .Lno_fixup_\@ /* switch to normal stack */ FIXUP_ESPFIX_STACK -27: +.Lno_fixup_\@: #endif .endm @@ -1275,11 +1403,6 @@ END(spurious_interrupt_bug) #ifdef CONFIG_XEN_PV ENTRY(xen_hypervisor_callback) - pushl $-1 /* orig_ax = -1 => not a system call */ - SAVE_ALL - ENCODE_FRAME_POINTER - TRACE_IRQS_OFF - /* * Check to see if we got the event in the critical * region in xen_iret_direct, after we've reenabled @@ -1287,16 +1410,17 @@ ENTRY(xen_hypervisor_callback) * iret instruction's behaviour where it delivers a * pending interrupt when enabling interrupts: */ - movl PT_EIP(%esp), %eax - cmpl $xen_iret_start_crit, %eax + cmpl $xen_iret_start_crit, (%esp) jb 1f - cmpl $xen_iret_end_crit, %eax + cmpl $xen_iret_end_crit, (%esp) jae 1f - - jmp xen_iret_crit_fixup - -ENTRY(xen_do_upcall) -1: mov %esp, %eax + call xen_iret_crit_fixup +1: + pushl $-1 /* orig_ax = -1 => not a system call */ + SAVE_ALL + ENCODE_FRAME_POINTER + TRACE_IRQS_OFF + mov %esp, %eax call xen_evtchn_do_upcall #ifndef CONFIG_PREEMPT call xen_maybe_preempt_hcall @@ -1378,37 +1502,48 @@ BUILD_INTERRUPT3(hv_stimer0_callback_vector, HYPERV_STIMER0_VECTOR, ENTRY(page_fault) ASM_CLAC pushl $do_page_fault - ALIGN - jmp common_exception + jmp common_exception_read_cr2 END(page_fault) -common_exception: +common_exception_read_cr2: /* the function address is in %gs's slot on the stack */ - pushl %fs - pushl %es - pushl %ds - pushl %eax - movl $(__USER_DS), %eax - movl %eax, %ds - movl %eax, %es - movl $(__KERNEL_PERCPU), %eax - movl %eax, %fs - pushl %ebp - pushl %edi - pushl %esi - pushl %edx - pushl %ecx - pushl %ebx - SWITCH_TO_KERNEL_STACK + SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1 + ENCODE_FRAME_POINTER - cld - UNWIND_ESPFIX_STACK + + /* fixup %gs */ GS_TO_REG %ecx - movl PT_GS(%esp), %edi # get the function address + movl PT_GS(%esp), %edi + REG_TO_PTGS %ecx + SET_KERNEL_GS %ecx + + GET_CR2_INTO(%ecx) # might clobber %eax + + /* fixup orig %eax */ movl PT_ORIG_EAX(%esp), %edx # get the error code movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart + + TRACE_IRQS_OFF + movl %esp, %eax # pt_regs pointer + CALL_NOSPEC %edi + jmp ret_from_exception +END(common_exception_read_cr2) + +common_exception: + /* the function address is in %gs's slot on the stack */ + SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1 + ENCODE_FRAME_POINTER + + /* fixup %gs */ + GS_TO_REG %ecx + movl PT_GS(%esp), %edi # get the function address REG_TO_PTGS %ecx SET_KERNEL_GS %ecx + + /* fixup orig %eax */ + movl PT_ORIG_EAX(%esp), %edx # get the error code + movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart + TRACE_IRQS_OFF movl %esp, %eax # pt_regs pointer CALL_NOSPEC %edi @@ -1436,6 +1571,10 @@ ENTRY(nmi) ASM_CLAC #ifdef CONFIG_X86_ESPFIX32 + /* + * ESPFIX_SS is only ever set on the return to user path + * after we've switched to the entry stack. + */ pushl %eax movl %ss, %eax cmpw $__ESPFIX_SS, %ax @@ -1471,6 +1610,11 @@ ENTRY(nmi) movl %ebx, %esp .Lnmi_return: +#ifdef CONFIG_X86_ESPFIX32 + testl $CS_FROM_ESPFIX, PT_CS(%esp) + jnz .Lnmi_from_espfix +#endif + CHECK_AND_APPLY_ESPFIX RESTORE_ALL_NMI cr3_reg=%edi pop=4 jmp .Lirq_return @@ -1478,23 +1622,42 @@ ENTRY(nmi) #ifdef CONFIG_X86_ESPFIX32 .Lnmi_espfix_stack: /* - * create the pointer to lss back + * Create the pointer to LSS back */ pushl %ss pushl %esp addl $4, (%esp) - /* copy the iret frame of 12 bytes */ - .rept 3 - pushl 16(%esp) - .endr - pushl %eax - SAVE_ALL_NMI cr3_reg=%edi + + /* Copy the (short) IRET frame */ + pushl 4*4(%esp) # flags + pushl 4*4(%esp) # cs + pushl 4*4(%esp) # ip + + pushl %eax # orig_ax + + SAVE_ALL_NMI cr3_reg=%edi unwind_espfix=1 ENCODE_FRAME_POINTER - FIXUP_ESPFIX_STACK # %eax == %esp + + /* clear CS_FROM_KERNEL, set CS_FROM_ESPFIX */ + xorl $(CS_FROM_ESPFIX | CS_FROM_KERNEL), PT_CS(%esp) + xorl %edx, %edx # zero error code - call do_nmi + movl %esp, %eax # pt_regs pointer + jmp .Lnmi_from_sysenter_stack + +.Lnmi_from_espfix: RESTORE_ALL_NMI cr3_reg=%edi - lss 12+4(%esp), %esp # back to espfix stack + /* + * Because we cleared CS_FROM_KERNEL, IRET_FRAME 'forgot' to + * fix up the gap and long frame: + * + * 3 - original frame (exception) + * 2 - ESPFIX block (above) + * 6 - gap (FIXUP_FRAME) + * 5 - long frame (FIXUP_FRAME) + * 1 - orig_ax + */ + lss (1+5+6)*4(%esp), %esp # back to espfix stack jmp .Lirq_return #endif END(nmi) @@ -1513,6 +1676,7 @@ ENTRY(int3) END(int3) ENTRY(general_protection) + ASM_CLAC pushl $do_general_protection jmp common_exception END(general_protection) @@ -1521,7 +1685,7 @@ END(general_protection) ENTRY(async_page_fault) ASM_CLAC pushl $do_async_page_fault - jmp common_exception + jmp common_exception_read_cr2 END(async_page_fault) #endif |