aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/include/asm/percpu.h
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/include/asm/percpu.h')
-rw-r--r--arch/x86/include/asm/percpu.h510
1 files changed, 172 insertions, 338 deletions
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 2278797c769d..a3c33b79fb86 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -4,33 +4,15 @@
#ifdef CONFIG_X86_64
#define __percpu_seg gs
-#define __percpu_mov_op movq
#else
#define __percpu_seg fs
-#define __percpu_mov_op movl
#endif
#ifdef __ASSEMBLY__
-/*
- * PER_CPU finds an address of a per-cpu variable.
- *
- * Args:
- * var - variable name
- * reg - 32bit register
- *
- * The resulting address is stored in the "reg" argument.
- *
- * Example:
- * PER_CPU(cpu_gdt_descr, %ebx)
- */
#ifdef CONFIG_SMP
-#define PER_CPU(var, reg) \
- __percpu_mov_op %__percpu_seg:this_cpu_off, reg; \
- lea var(reg), reg
#define PER_CPU_VAR(var) %__percpu_seg:var
#else /* ! SMP */
-#define PER_CPU(var, reg) __percpu_mov_op $var, reg
#define PER_CPU_VAR(var) var
#endif /* SMP */
@@ -85,213 +67,108 @@
/* For arch-specific code, we can use direct single-insn ops (they
* don't give an lvalue though). */
-extern void __bad_percpu_size(void);
-
-#define percpu_to_op(qual, op, var, val) \
-do { \
- typedef typeof(var) pto_T__; \
- if (0) { \
- pto_T__ pto_tmp__; \
- pto_tmp__ = (val); \
- (void)pto_tmp__; \
- } \
- switch (sizeof(var)) { \
- case 1: \
- asm qual (op "b %1,"__percpu_arg(0) \
- : "+m" (var) \
- : "qi" ((pto_T__)(val))); \
- break; \
- case 2: \
- asm qual (op "w %1,"__percpu_arg(0) \
- : "+m" (var) \
- : "ri" ((pto_T__)(val))); \
- break; \
- case 4: \
- asm qual (op "l %1,"__percpu_arg(0) \
- : "+m" (var) \
- : "ri" ((pto_T__)(val))); \
- break; \
- case 8: \
- asm qual (op "q %1,"__percpu_arg(0) \
- : "+m" (var) \
- : "re" ((pto_T__)(val))); \
- break; \
- default: __bad_percpu_size(); \
- } \
+
+#define __pcpu_type_1 u8
+#define __pcpu_type_2 u16
+#define __pcpu_type_4 u32
+#define __pcpu_type_8 u64
+
+#define __pcpu_cast_1(val) ((u8)(((unsigned long) val) & 0xff))
+#define __pcpu_cast_2(val) ((u16)(((unsigned long) val) & 0xffff))
+#define __pcpu_cast_4(val) ((u32)(((unsigned long) val) & 0xffffffff))
+#define __pcpu_cast_8(val) ((u64)(val))
+
+#define __pcpu_op1_1(op, dst) op "b " dst
+#define __pcpu_op1_2(op, dst) op "w " dst
+#define __pcpu_op1_4(op, dst) op "l " dst
+#define __pcpu_op1_8(op, dst) op "q " dst
+
+#define __pcpu_op2_1(op, src, dst) op "b " src ", " dst
+#define __pcpu_op2_2(op, src, dst) op "w " src ", " dst
+#define __pcpu_op2_4(op, src, dst) op "l " src ", " dst
+#define __pcpu_op2_8(op, src, dst) op "q " src ", " dst
+
+#define __pcpu_reg_1(mod, x) mod "q" (x)
+#define __pcpu_reg_2(mod, x) mod "r" (x)
+#define __pcpu_reg_4(mod, x) mod "r" (x)
+#define __pcpu_reg_8(mod, x) mod "r" (x)
+
+#define __pcpu_reg_imm_1(x) "qi" (x)
+#define __pcpu_reg_imm_2(x) "ri" (x)
+#define __pcpu_reg_imm_4(x) "ri" (x)
+#define __pcpu_reg_imm_8(x) "re" (x)
+
+#define percpu_to_op(size, qual, op, _var, _val) \
+do { \
+ __pcpu_type_##size pto_val__ = __pcpu_cast_##size(_val); \
+ if (0) { \
+ typeof(_var) pto_tmp__; \
+ pto_tmp__ = (_val); \
+ (void)pto_tmp__; \
+ } \
+ asm qual(__pcpu_op2_##size(op, "%[val]", __percpu_arg([var])) \
+ : [var] "+m" (_var) \
+ : [val] __pcpu_reg_imm_##size(pto_val__)); \
} while (0)
+#define percpu_unary_op(size, qual, op, _var) \
+({ \
+ asm qual (__pcpu_op1_##size(op, __percpu_arg([var])) \
+ : [var] "+m" (_var)); \
+})
+
/*
* Generate a percpu add to memory instruction and optimize code
* if one is added or subtracted.
*/
-#define percpu_add_op(qual, var, val) \
+#define percpu_add_op(size, qual, var, val) \
do { \
- typedef typeof(var) pao_T__; \
const int pao_ID__ = (__builtin_constant_p(val) && \
((val) == 1 || (val) == -1)) ? \
(int)(val) : 0; \
if (0) { \
- pao_T__ pao_tmp__; \
+ typeof(var) pao_tmp__; \
pao_tmp__ = (val); \
(void)pao_tmp__; \
} \
- switch (sizeof(var)) { \
- case 1: \
- if (pao_ID__ == 1) \
- asm qual ("incb "__percpu_arg(0) : "+m" (var)); \
- else if (pao_ID__ == -1) \
- asm qual ("decb "__percpu_arg(0) : "+m" (var)); \
- else \
- asm qual ("addb %1, "__percpu_arg(0) \
- : "+m" (var) \
- : "qi" ((pao_T__)(val))); \
- break; \
- case 2: \
- if (pao_ID__ == 1) \
- asm qual ("incw "__percpu_arg(0) : "+m" (var)); \
- else if (pao_ID__ == -1) \
- asm qual ("decw "__percpu_arg(0) : "+m" (var)); \
- else \
- asm qual ("addw %1, "__percpu_arg(0) \
- : "+m" (var) \
- : "ri" ((pao_T__)(val))); \
- break; \
- case 4: \
- if (pao_ID__ == 1) \
- asm qual ("incl "__percpu_arg(0) : "+m" (var)); \
- else if (pao_ID__ == -1) \
- asm qual ("decl "__percpu_arg(0) : "+m" (var)); \
- else \
- asm qual ("addl %1, "__percpu_arg(0) \
- : "+m" (var) \
- : "ri" ((pao_T__)(val))); \
- break; \
- case 8: \
- if (pao_ID__ == 1) \
- asm qual ("incq "__percpu_arg(0) : "+m" (var)); \
- else if (pao_ID__ == -1) \
- asm qual ("decq "__percpu_arg(0) : "+m" (var)); \
- else \
- asm qual ("addq %1, "__percpu_arg(0) \
- : "+m" (var) \
- : "re" ((pao_T__)(val))); \
- break; \
- default: __bad_percpu_size(); \
- } \
+ if (pao_ID__ == 1) \
+ percpu_unary_op(size, qual, "inc", var); \
+ else if (pao_ID__ == -1) \
+ percpu_unary_op(size, qual, "dec", var); \
+ else \
+ percpu_to_op(size, qual, "add", var, val); \
} while (0)
-#define percpu_from_op(qual, op, var) \
-({ \
- typeof(var) pfo_ret__; \
- switch (sizeof(var)) { \
- case 1: \
- asm qual (op "b "__percpu_arg(1)",%0" \
- : "=q" (pfo_ret__) \
- : "m" (var)); \
- break; \
- case 2: \
- asm qual (op "w "__percpu_arg(1)",%0" \
- : "=r" (pfo_ret__) \
- : "m" (var)); \
- break; \
- case 4: \
- asm qual (op "l "__percpu_arg(1)",%0" \
- : "=r" (pfo_ret__) \
- : "m" (var)); \
- break; \
- case 8: \
- asm qual (op "q "__percpu_arg(1)",%0" \
- : "=r" (pfo_ret__) \
- : "m" (var)); \
- break; \
- default: __bad_percpu_size(); \
- } \
- pfo_ret__; \
-})
-
-#define percpu_stable_op(op, var) \
-({ \
- typeof(var) pfo_ret__; \
- switch (sizeof(var)) { \
- case 1: \
- asm(op "b "__percpu_arg(P1)",%0" \
- : "=q" (pfo_ret__) \
- : "p" (&(var))); \
- break; \
- case 2: \
- asm(op "w "__percpu_arg(P1)",%0" \
- : "=r" (pfo_ret__) \
- : "p" (&(var))); \
- break; \
- case 4: \
- asm(op "l "__percpu_arg(P1)",%0" \
- : "=r" (pfo_ret__) \
- : "p" (&(var))); \
- break; \
- case 8: \
- asm(op "q "__percpu_arg(P1)",%0" \
- : "=r" (pfo_ret__) \
- : "p" (&(var))); \
- break; \
- default: __bad_percpu_size(); \
- } \
- pfo_ret__; \
+#define percpu_from_op(size, qual, op, _var) \
+({ \
+ __pcpu_type_##size pfo_val__; \
+ asm qual (__pcpu_op2_##size(op, __percpu_arg([var]), "%[val]") \
+ : [val] __pcpu_reg_##size("=", pfo_val__) \
+ : [var] "m" (_var)); \
+ (typeof(_var))(unsigned long) pfo_val__; \
})
-#define percpu_unary_op(qual, op, var) \
-({ \
- switch (sizeof(var)) { \
- case 1: \
- asm qual (op "b "__percpu_arg(0) \
- : "+m" (var)); \
- break; \
- case 2: \
- asm qual (op "w "__percpu_arg(0) \
- : "+m" (var)); \
- break; \
- case 4: \
- asm qual (op "l "__percpu_arg(0) \
- : "+m" (var)); \
- break; \
- case 8: \
- asm qual (op "q "__percpu_arg(0) \
- : "+m" (var)); \
- break; \
- default: __bad_percpu_size(); \
- } \
+#define percpu_stable_op(size, op, _var) \
+({ \
+ __pcpu_type_##size pfo_val__; \
+ asm(__pcpu_op2_##size(op, __percpu_arg(P[var]), "%[val]") \
+ : [val] __pcpu_reg_##size("=", pfo_val__) \
+ : [var] "p" (&(_var))); \
+ (typeof(_var))(unsigned long) pfo_val__; \
})
/*
* Add return operation
*/
-#define percpu_add_return_op(qual, var, val) \
+#define percpu_add_return_op(size, qual, _var, _val) \
({ \
- typeof(var) paro_ret__ = val; \
- switch (sizeof(var)) { \
- case 1: \
- asm qual ("xaddb %0, "__percpu_arg(1) \
- : "+q" (paro_ret__), "+m" (var) \
- : : "memory"); \
- break; \
- case 2: \
- asm qual ("xaddw %0, "__percpu_arg(1) \
- : "+r" (paro_ret__), "+m" (var) \
- : : "memory"); \
- break; \
- case 4: \
- asm qual ("xaddl %0, "__percpu_arg(1) \
- : "+r" (paro_ret__), "+m" (var) \
- : : "memory"); \
- break; \
- case 8: \
- asm qual ("xaddq %0, "__percpu_arg(1) \
- : "+re" (paro_ret__), "+m" (var) \
- : : "memory"); \
- break; \
- default: __bad_percpu_size(); \
- } \
- paro_ret__ += val; \
- paro_ret__; \
+ __pcpu_type_##size paro_tmp__ = __pcpu_cast_##size(_val); \
+ asm qual (__pcpu_op2_##size("xadd", "%[tmp]", \
+ __percpu_arg([var])) \
+ : [tmp] __pcpu_reg_##size("+", paro_tmp__), \
+ [var] "+m" (_var) \
+ : : "memory"); \
+ (typeof(_var))(unsigned long) (paro_tmp__ + _val); \
})
/*
@@ -299,85 +176,38 @@ do { \
* expensive due to the implied lock prefix. The processor cannot prefetch
* cachelines if xchg is used.
*/
-#define percpu_xchg_op(qual, var, nval) \
+#define percpu_xchg_op(size, qual, _var, _nval) \
({ \
- typeof(var) pxo_ret__; \
- typeof(var) pxo_new__ = (nval); \
- switch (sizeof(var)) { \
- case 1: \
- asm qual ("\n\tmov "__percpu_arg(1)",%%al" \
- "\n1:\tcmpxchgb %2, "__percpu_arg(1) \
- "\n\tjnz 1b" \
- : "=&a" (pxo_ret__), "+m" (var) \
- : "q" (pxo_new__) \
- : "memory"); \
- break; \
- case 2: \
- asm qual ("\n\tmov "__percpu_arg(1)",%%ax" \
- "\n1:\tcmpxchgw %2, "__percpu_arg(1) \
- "\n\tjnz 1b" \
- : "=&a" (pxo_ret__), "+m" (var) \
- : "r" (pxo_new__) \
- : "memory"); \
- break; \
- case 4: \
- asm qual ("\n\tmov "__percpu_arg(1)",%%eax" \
- "\n1:\tcmpxchgl %2, "__percpu_arg(1) \
- "\n\tjnz 1b" \
- : "=&a" (pxo_ret__), "+m" (var) \
- : "r" (pxo_new__) \
- : "memory"); \
- break; \
- case 8: \
- asm qual ("\n\tmov "__percpu_arg(1)",%%rax" \
- "\n1:\tcmpxchgq %2, "__percpu_arg(1) \
- "\n\tjnz 1b" \
- : "=&a" (pxo_ret__), "+m" (var) \
- : "r" (pxo_new__) \
- : "memory"); \
- break; \
- default: __bad_percpu_size(); \
- } \
- pxo_ret__; \
+ __pcpu_type_##size pxo_old__; \
+ __pcpu_type_##size pxo_new__ = __pcpu_cast_##size(_nval); \
+ asm qual (__pcpu_op2_##size("mov", __percpu_arg([var]), \
+ "%[oval]") \
+ "\n1:\t" \
+ __pcpu_op2_##size("cmpxchg", "%[nval]", \
+ __percpu_arg([var])) \
+ "\n\tjnz 1b" \
+ : [oval] "=&a" (pxo_old__), \
+ [var] "+m" (_var) \
+ : [nval] __pcpu_reg_##size(, pxo_new__) \
+ : "memory"); \
+ (typeof(_var))(unsigned long) pxo_old__; \
})
/*
* cmpxchg has no such implied lock semantics as a result it is much
* more efficient for cpu local operations.
*/
-#define percpu_cmpxchg_op(qual, var, oval, nval) \
+#define percpu_cmpxchg_op(size, qual, _var, _oval, _nval) \
({ \
- typeof(var) pco_ret__; \
- typeof(var) pco_old__ = (oval); \
- typeof(var) pco_new__ = (nval); \
- switch (sizeof(var)) { \
- case 1: \
- asm qual ("cmpxchgb %2, "__percpu_arg(1) \
- : "=a" (pco_ret__), "+m" (var) \
- : "q" (pco_new__), "0" (pco_old__) \
- : "memory"); \
- break; \
- case 2: \
- asm qual ("cmpxchgw %2, "__percpu_arg(1) \
- : "=a" (pco_ret__), "+m" (var) \
- : "r" (pco_new__), "0" (pco_old__) \
- : "memory"); \
- break; \
- case 4: \
- asm qual ("cmpxchgl %2, "__percpu_arg(1) \
- : "=a" (pco_ret__), "+m" (var) \
- : "r" (pco_new__), "0" (pco_old__) \
- : "memory"); \
- break; \
- case 8: \
- asm qual ("cmpxchgq %2, "__percpu_arg(1) \
- : "=a" (pco_ret__), "+m" (var) \
- : "r" (pco_new__), "0" (pco_old__) \
- : "memory"); \
- break; \
- default: __bad_percpu_size(); \
- } \
- pco_ret__; \
+ __pcpu_type_##size pco_old__ = __pcpu_cast_##size(_oval); \
+ __pcpu_type_##size pco_new__ = __pcpu_cast_##size(_nval); \
+ asm qual (__pcpu_op2_##size("cmpxchg", "%[nval]", \
+ __percpu_arg([var])) \
+ : [oval] "+a" (pco_old__), \
+ [var] "+m" (_var) \
+ : [nval] __pcpu_reg_##size(, pco_new__) \
+ : "memory"); \
+ (typeof(_var))(unsigned long) pco_old__; \
})
/*
@@ -389,24 +219,28 @@ do { \
* per-thread variables implemented as per-cpu variables and thus
* stable for the duration of the respective task.
*/
-#define this_cpu_read_stable(var) percpu_stable_op("mov", var)
-
-#define raw_cpu_read_1(pcp) percpu_from_op(, "mov", pcp)
-#define raw_cpu_read_2(pcp) percpu_from_op(, "mov", pcp)
-#define raw_cpu_read_4(pcp) percpu_from_op(, "mov", pcp)
-
-#define raw_cpu_write_1(pcp, val) percpu_to_op(, "mov", (pcp), val)
-#define raw_cpu_write_2(pcp, val) percpu_to_op(, "mov", (pcp), val)
-#define raw_cpu_write_4(pcp, val) percpu_to_op(, "mov", (pcp), val)
-#define raw_cpu_add_1(pcp, val) percpu_add_op(, (pcp), val)
-#define raw_cpu_add_2(pcp, val) percpu_add_op(, (pcp), val)
-#define raw_cpu_add_4(pcp, val) percpu_add_op(, (pcp), val)
-#define raw_cpu_and_1(pcp, val) percpu_to_op(, "and", (pcp), val)
-#define raw_cpu_and_2(pcp, val) percpu_to_op(, "and", (pcp), val)
-#define raw_cpu_and_4(pcp, val) percpu_to_op(, "and", (pcp), val)
-#define raw_cpu_or_1(pcp, val) percpu_to_op(, "or", (pcp), val)
-#define raw_cpu_or_2(pcp, val) percpu_to_op(, "or", (pcp), val)
-#define raw_cpu_or_4(pcp, val) percpu_to_op(, "or", (pcp), val)
+#define this_cpu_read_stable_1(pcp) percpu_stable_op(1, "mov", pcp)
+#define this_cpu_read_stable_2(pcp) percpu_stable_op(2, "mov", pcp)
+#define this_cpu_read_stable_4(pcp) percpu_stable_op(4, "mov", pcp)
+#define this_cpu_read_stable_8(pcp) percpu_stable_op(8, "mov", pcp)
+#define this_cpu_read_stable(pcp) __pcpu_size_call_return(this_cpu_read_stable_, pcp)
+
+#define raw_cpu_read_1(pcp) percpu_from_op(1, , "mov", pcp)
+#define raw_cpu_read_2(pcp) percpu_from_op(2, , "mov", pcp)
+#define raw_cpu_read_4(pcp) percpu_from_op(4, , "mov", pcp)
+
+#define raw_cpu_write_1(pcp, val) percpu_to_op(1, , "mov", (pcp), val)
+#define raw_cpu_write_2(pcp, val) percpu_to_op(2, , "mov", (pcp), val)
+#define raw_cpu_write_4(pcp, val) percpu_to_op(4, , "mov", (pcp), val)
+#define raw_cpu_add_1(pcp, val) percpu_add_op(1, , (pcp), val)
+#define raw_cpu_add_2(pcp, val) percpu_add_op(2, , (pcp), val)
+#define raw_cpu_add_4(pcp, val) percpu_add_op(4, , (pcp), val)
+#define raw_cpu_and_1(pcp, val) percpu_to_op(1, , "and", (pcp), val)
+#define raw_cpu_and_2(pcp, val) percpu_to_op(2, , "and", (pcp), val)
+#define raw_cpu_and_4(pcp, val) percpu_to_op(4, , "and", (pcp), val)
+#define raw_cpu_or_1(pcp, val) percpu_to_op(1, , "or", (pcp), val)
+#define raw_cpu_or_2(pcp, val) percpu_to_op(2, , "or", (pcp), val)
+#define raw_cpu_or_4(pcp, val) percpu_to_op(4, , "or", (pcp), val)
/*
* raw_cpu_xchg() can use a load-store since it is not required to be
@@ -423,38 +257,38 @@ do { \
#define raw_cpu_xchg_2(pcp, val) raw_percpu_xchg_op(pcp, val)
#define raw_cpu_xchg_4(pcp, val) raw_percpu_xchg_op(pcp, val)
-#define this_cpu_read_1(pcp) percpu_from_op(volatile, "mov", pcp)
-#define this_cpu_read_2(pcp) percpu_from_op(volatile, "mov", pcp)
-#define this_cpu_read_4(pcp) percpu_from_op(volatile, "mov", pcp)
-#define this_cpu_write_1(pcp, val) percpu_to_op(volatile, "mov", (pcp), val)
-#define this_cpu_write_2(pcp, val) percpu_to_op(volatile, "mov", (pcp), val)
-#define this_cpu_write_4(pcp, val) percpu_to_op(volatile, "mov", (pcp), val)
-#define this_cpu_add_1(pcp, val) percpu_add_op(volatile, (pcp), val)
-#define this_cpu_add_2(pcp, val) percpu_add_op(volatile, (pcp), val)
-#define this_cpu_add_4(pcp, val) percpu_add_op(volatile, (pcp), val)
-#define this_cpu_and_1(pcp, val) percpu_to_op(volatile, "and", (pcp), val)
-#define this_cpu_and_2(pcp, val) percpu_to_op(volatile, "and", (pcp), val)
-#define this_cpu_and_4(pcp, val) percpu_to_op(volatile, "and", (pcp), val)
-#define this_cpu_or_1(pcp, val) percpu_to_op(volatile, "or", (pcp), val)
-#define this_cpu_or_2(pcp, val) percpu_to_op(volatile, "or", (pcp), val)
-#define this_cpu_or_4(pcp, val) percpu_to_op(volatile, "or", (pcp), val)
-#define this_cpu_xchg_1(pcp, nval) percpu_xchg_op(volatile, pcp, nval)
-#define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(volatile, pcp, nval)
-#define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(volatile, pcp, nval)
-
-#define raw_cpu_add_return_1(pcp, val) percpu_add_return_op(, pcp, val)
-#define raw_cpu_add_return_2(pcp, val) percpu_add_return_op(, pcp, val)
-#define raw_cpu_add_return_4(pcp, val) percpu_add_return_op(, pcp, val)
-#define raw_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(, pcp, oval, nval)
-#define raw_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(, pcp, oval, nval)
-#define raw_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(, pcp, oval, nval)
-
-#define this_cpu_add_return_1(pcp, val) percpu_add_return_op(volatile, pcp, val)
-#define this_cpu_add_return_2(pcp, val) percpu_add_return_op(volatile, pcp, val)
-#define this_cpu_add_return_4(pcp, val) percpu_add_return_op(volatile, pcp, val)
-#define this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(volatile, pcp, oval, nval)
-#define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(volatile, pcp, oval, nval)
-#define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(volatile, pcp, oval, nval)
+#define this_cpu_read_1(pcp) percpu_from_op(1, volatile, "mov", pcp)
+#define this_cpu_read_2(pcp) percpu_from_op(2, volatile, "mov", pcp)
+#define this_cpu_read_4(pcp) percpu_from_op(4, volatile, "mov", pcp)
+#define this_cpu_write_1(pcp, val) percpu_to_op(1, volatile, "mov", (pcp), val)
+#define this_cpu_write_2(pcp, val) percpu_to_op(2, volatile, "mov", (pcp), val)
+#define this_cpu_write_4(pcp, val) percpu_to_op(4, volatile, "mov", (pcp), val)
+#define this_cpu_add_1(pcp, val) percpu_add_op(1, volatile, (pcp), val)
+#define this_cpu_add_2(pcp, val) percpu_add_op(2, volatile, (pcp), val)
+#define this_cpu_add_4(pcp, val) percpu_add_op(4, volatile, (pcp), val)
+#define this_cpu_and_1(pcp, val) percpu_to_op(1, volatile, "and", (pcp), val)
+#define this_cpu_and_2(pcp, val) percpu_to_op(2, volatile, "and", (pcp), val)
+#define this_cpu_and_4(pcp, val) percpu_to_op(4, volatile, "and", (pcp), val)
+#define this_cpu_or_1(pcp, val) percpu_to_op(1, volatile, "or", (pcp), val)
+#define this_cpu_or_2(pcp, val) percpu_to_op(2, volatile, "or", (pcp), val)
+#define this_cpu_or_4(pcp, val) percpu_to_op(4, volatile, "or", (pcp), val)
+#define this_cpu_xchg_1(pcp, nval) percpu_xchg_op(1, volatile, pcp, nval)
+#define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(2, volatile, pcp, nval)
+#define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(4, volatile, pcp, nval)
+
+#define raw_cpu_add_return_1(pcp, val) percpu_add_return_op(1, , pcp, val)
+#define raw_cpu_add_return_2(pcp, val) percpu_add_return_op(2, , pcp, val)
+#define raw_cpu_add_return_4(pcp, val) percpu_add_return_op(4, , pcp, val)
+#define raw_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(1, , pcp, oval, nval)
+#define raw_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(2, , pcp, oval, nval)
+#define raw_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(4, , pcp, oval, nval)
+
+#define this_cpu_add_return_1(pcp, val) percpu_add_return_op(1, volatile, pcp, val)
+#define this_cpu_add_return_2(pcp, val) percpu_add_return_op(2, volatile, pcp, val)
+#define this_cpu_add_return_4(pcp, val) percpu_add_return_op(4, volatile, pcp, val)
+#define this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(1, volatile, pcp, oval, nval)
+#define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(2, volatile, pcp, oval, nval)
+#define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(4, volatile, pcp, oval, nval)
#ifdef CONFIG_X86_CMPXCHG64
#define percpu_cmpxchg8b_double(pcp1, pcp2, o1, o2, n1, n2) \
@@ -478,23 +312,23 @@ do { \
* 32 bit must fall back to generic operations.
*/
#ifdef CONFIG_X86_64
-#define raw_cpu_read_8(pcp) percpu_from_op(, "mov", pcp)
-#define raw_cpu_write_8(pcp, val) percpu_to_op(, "mov", (pcp), val)
-#define raw_cpu_add_8(pcp, val) percpu_add_op(, (pcp), val)
-#define raw_cpu_and_8(pcp, val) percpu_to_op(, "and", (pcp), val)
-#define raw_cpu_or_8(pcp, val) percpu_to_op(, "or", (pcp), val)
-#define raw_cpu_add_return_8(pcp, val) percpu_add_return_op(, pcp, val)
+#define raw_cpu_read_8(pcp) percpu_from_op(8, , "mov", pcp)
+#define raw_cpu_write_8(pcp, val) percpu_to_op(8, , "mov", (pcp), val)
+#define raw_cpu_add_8(pcp, val) percpu_add_op(8, , (pcp), val)
+#define raw_cpu_and_8(pcp, val) percpu_to_op(8, , "and", (pcp), val)
+#define raw_cpu_or_8(pcp, val) percpu_to_op(8, , "or", (pcp), val)
+#define raw_cpu_add_return_8(pcp, val) percpu_add_return_op(8, , pcp, val)
#define raw_cpu_xchg_8(pcp, nval) raw_percpu_xchg_op(pcp, nval)
-#define raw_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(, pcp, oval, nval)
-
-#define this_cpu_read_8(pcp) percpu_from_op(volatile, "mov", pcp)
-#define this_cpu_write_8(pcp, val) percpu_to_op(volatile, "mov", (pcp), val)
-#define this_cpu_add_8(pcp, val) percpu_add_op(volatile, (pcp), val)
-#define this_cpu_and_8(pcp, val) percpu_to_op(volatile, "and", (pcp), val)
-#define this_cpu_or_8(pcp, val) percpu_to_op(volatile, "or", (pcp), val)
-#define this_cpu_add_return_8(pcp, val) percpu_add_return_op(volatile, pcp, val)
-#define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(volatile, pcp, nval)
-#define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(volatile, pcp, oval, nval)
+#define raw_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(8, , pcp, oval, nval)
+
+#define this_cpu_read_8(pcp) percpu_from_op(8, volatile, "mov", pcp)
+#define this_cpu_write_8(pcp, val) percpu_to_op(8, volatile, "mov", (pcp), val)
+#define this_cpu_add_8(pcp, val) percpu_add_op(8, volatile, (pcp), val)
+#define this_cpu_and_8(pcp, val) percpu_to_op(8, volatile, "and", (pcp), val)
+#define this_cpu_or_8(pcp, val) percpu_to_op(8, volatile, "or", (pcp), val)
+#define this_cpu_add_return_8(pcp, val) percpu_add_return_op(8, volatile, pcp, val)
+#define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(8, volatile, pcp, nval)
+#define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(8, volatile, pcp, oval, nval)
/*
* Pretty complex macro to generate cmpxchg16 instruction. The instruction