aboutsummaryrefslogtreecommitdiffstats
path: root/arch/s390
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390')
-rw-r--r--arch/s390/Makefile4
-rw-r--r--arch/s390/boot/Makefile2
-rw-r--r--arch/s390/boot/kaslr.c2
-rw-r--r--arch/s390/boot/startup.c5
-rw-r--r--arch/s390/boot/uv.c3
-rw-r--r--arch/s390/include/asm/kvm_host.h8
-rw-r--r--arch/s390/include/asm/lowcore.h4
-rw-r--r--arch/s390/include/asm/page.h4
-rw-r--r--arch/s390/include/asm/pci.h2
-rw-r--r--arch/s390/include/asm/pci_insn.h10
-rw-r--r--arch/s390/include/asm/pci_io.h10
-rw-r--r--arch/s390/include/asm/pgalloc.h16
-rw-r--r--arch/s390/include/asm/pgtable.h10
-rw-r--r--arch/s390/include/asm/processor.h1
-rw-r--r--arch/s390/include/asm/qdio.h2
-rw-r--r--arch/s390/include/asm/setup.h7
-rw-r--r--arch/s390/include/asm/syscall.h12
-rw-r--r--arch/s390/include/asm/timex.h18
-rw-r--r--arch/s390/include/asm/uaccess.h4
-rw-r--r--arch/s390/include/asm/unwind.h1
-rw-r--r--arch/s390/include/asm/vdso.h1
-rw-r--r--arch/s390/kernel/asm-offsets.c4
-rw-r--r--arch/s390/kernel/debug.c3
-rw-r--r--arch/s390/kernel/diag.c2
-rw-r--r--arch/s390/kernel/dis.c13
-rw-r--r--arch/s390/kernel/early.c4
-rw-r--r--arch/s390/kernel/entry.S67
-rw-r--r--arch/s390/kernel/idle.c29
-rw-r--r--arch/s390/kernel/machine_kexec.c4
-rw-r--r--arch/s390/kernel/machine_kexec_file.c2
-rw-r--r--arch/s390/kernel/machine_kexec_reloc.c1
-rw-r--r--arch/s390/kernel/mcount.S16
-rw-r--r--arch/s390/kernel/perf_cpum_cf.c21
-rw-r--r--arch/s390/kernel/perf_cpum_cf_diag.c10
-rw-r--r--arch/s390/kernel/perf_cpum_sf.c39
-rw-r--r--arch/s390/kernel/perf_event.c8
-rw-r--r--arch/s390/kernel/process.c1
-rw-r--r--arch/s390/kernel/processor.c5
-rw-r--r--arch/s390/kernel/ptrace.c37
-rw-r--r--arch/s390/kernel/setup.c5
-rw-r--r--arch/s390/kernel/smp.c87
-rw-r--r--arch/s390/kernel/time.c1
-rw-r--r--arch/s390/kernel/unwind_bc.c37
-rw-r--r--arch/s390/kernel/vdso64/Makefile10
-rw-r--r--arch/s390/kernel/vdso64/clock_getres.S10
-rw-r--r--arch/s390/kvm/interrupt.c6
-rw-r--r--arch/s390/kvm/kvm-s390.c9
-rw-r--r--arch/s390/kvm/vsie.c1
-rw-r--r--arch/s390/lib/uaccess.c4
-rw-r--r--arch/s390/mm/cmm.c12
-rw-r--r--arch/s390/mm/gmap.c34
-rw-r--r--arch/s390/mm/hugetlbpage.c111
-rw-r--r--arch/s390/mm/maccess.c24
-rw-r--r--arch/s390/mm/pgalloc.c16
-rw-r--r--arch/s390/mm/vmem.c4
-rw-r--r--arch/s390/pci/pci.c10
-rw-r--r--arch/s390/pci/pci_clp.c51
-rw-r--r--arch/s390/pci/pci_irq.c7
-rw-r--r--arch/s390/pci/pci_mmio.c213
-rw-r--r--arch/s390/pci/pci_sysfs.c63
-rw-r--r--arch/s390/purgatory/Makefile6
-rw-r--r--arch/s390/purgatory/string.c3
62 files changed, 859 insertions, 257 deletions
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index e48013cf50a2..1d53ea0a4e14 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -69,7 +69,7 @@ cflags-y += -Wa,-I$(srctree)/arch/$(ARCH)/include
#
cflags-$(CONFIG_FRAME_POINTER) += -fno-optimize-sibling-calls
-ifeq ($(call cc-option-yn,-mpacked-stack),y)
+ifeq ($(call cc-option-yn,-mpacked-stack -mbackchain -msoft-float),y)
cflags-$(CONFIG_PACK_STACK) += -mpacked-stack -D__PACK_STACK
aflags-$(CONFIG_PACK_STACK) += -D__PACK_STACK
endif
@@ -146,7 +146,7 @@ all: bzImage
#KBUILD_IMAGE is necessary for packaging targets like rpm-pkg, deb-pkg...
KBUILD_IMAGE := $(boot)/bzImage
-install: vmlinux
+install:
$(Q)$(MAKE) $(build)=$(boot) $@
bzImage: vmlinux
diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile
index 7cba96e7587b..98e808b4e6ec 100644
--- a/arch/s390/boot/Makefile
+++ b/arch/s390/boot/Makefile
@@ -70,7 +70,7 @@ $(obj)/compressed/vmlinux: $(obj)/startup.a FORCE
$(obj)/startup.a: $(OBJECTS) FORCE
$(call if_changed,ar)
-install: $(CONFIGURE) $(obj)/bzImage
+install:
sh -x $(srctree)/$(obj)/install.sh $(KERNELRELEASE) $(obj)/bzImage \
System.map "$(INSTALL_PATH)"
diff --git a/arch/s390/boot/kaslr.c b/arch/s390/boot/kaslr.c
index 3bdd8132e56b..ae3e22355b9f 100644
--- a/arch/s390/boot/kaslr.c
+++ b/arch/s390/boot/kaslr.c
@@ -73,7 +73,7 @@ static unsigned long get_random(unsigned long limit)
*(unsigned long *) prng.parm_block ^= seed;
for (i = 0; i < 16; i++) {
cpacf_kmc(CPACF_KMC_PRNG, prng.parm_block,
- (char *) entropy, (char *) entropy,
+ (u8 *) entropy, (u8 *) entropy,
sizeof(entropy));
memcpy(prng.parm_block, entropy, sizeof(entropy));
}
diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
index ceeacbeff600..437ddec24723 100644
--- a/arch/s390/boot/startup.c
+++ b/arch/s390/boot/startup.c
@@ -164,6 +164,11 @@ void startup_kernel(void)
handle_relocs(__kaslr_offset);
if (__kaslr_offset) {
+ /*
+ * Save KASLR offset for early dumps, before vmcore_info is set.
+ * Mark as uneven to distinguish from real vmcore_info pointer.
+ */
+ S390_lowcore.vmcore_info = __kaslr_offset | 0x1UL;
/* Clear non-relocated kernel */
if (IS_ENABLED(CONFIG_KERNEL_UNCOMPRESSED))
memset(img, 0, vmlinux.image_size);
diff --git a/arch/s390/boot/uv.c b/arch/s390/boot/uv.c
index ed007f4a6444..3f501159ee9f 100644
--- a/arch/s390/boot/uv.c
+++ b/arch/s390/boot/uv.c
@@ -15,7 +15,8 @@ void uv_query_info(void)
if (!test_facility(158))
return;
- if (uv_call(0, (uint64_t)&uvcb))
+ /* rc==0x100 means that there is additional data we do not process */
+ if (uv_call(0, (uint64_t)&uvcb) && uvcb.header.rc != 0x100)
return;
if (test_bit_inv(BIT_UVC_CMD_SET_SHARED_ACCESS, (unsigned long *)uvcb.inst_calls_list) &&
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 2b00a3ebee08..8b5a39e4739a 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -30,12 +30,12 @@
#define KVM_USER_MEM_SLOTS 32
/*
- * These seem to be used for allocating ->chip in the routing table,
- * which we don't use. 4096 is an out-of-thin-air value. If we need
- * to look at ->chip later on, we'll need to revisit this.
+ * These seem to be used for allocating ->chip in the routing table, which we
+ * don't use. 1 is as small as we can get to reduce the needed memory. If we
+ * need to look at ->chip later on, we'll need to revisit this.
*/
#define KVM_NR_IRQCHIPS 1
-#define KVM_IRQCHIP_NUM_PINS 4096
+#define KVM_IRQCHIP_NUM_PINS 1
#define KVM_HALT_POLL_NS_DEFAULT 50000
/* s390-specific vcpu->requests bit members */
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index 237ee0c4169f..612ed3c6d581 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -141,7 +141,9 @@ struct lowcore {
/* br %r1 trampoline */
__u16 br_r1_trampoline; /* 0x0400 */
- __u8 pad_0x0402[0x0e00-0x0402]; /* 0x0402 */
+ __u32 return_lpswe; /* 0x0402 */
+ __u32 return_mcck_lpswe; /* 0x0406 */
+ __u8 pad_0x040a[0x0e00-0x040a]; /* 0x040a */
/*
* 0xe00 contains the address of the IPL Parameter Information
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 823578c6b9e2..e399102367af 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -33,6 +33,8 @@
#define ARCH_HAS_PREPARE_HUGEPAGE
#define ARCH_HAS_HUGEPAGE_CLEAR_FLUSH
+#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
+
#include <asm/setup.h>
#ifndef __ASSEMBLY__
@@ -40,7 +42,7 @@ void __storage_key_init_range(unsigned long start, unsigned long end);
static inline void storage_key_init_range(unsigned long start, unsigned long end)
{
- if (PAGE_DEFAULT_KEY)
+ if (PAGE_DEFAULT_KEY != 0)
__storage_key_init_range(start, end);
}
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 305befd55326..0cca0e2d2a35 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -183,7 +183,7 @@ void zpci_remove_reserved_devices(void);
/* CLP */
int clp_scan_pci_devices(void);
int clp_rescan_pci_devices(void);
-int clp_rescan_pci_devices_simple(void);
+int clp_rescan_pci_devices_simple(u32 *fid);
int clp_add_pci_device(u32, u32, int);
int clp_enable_fh(struct zpci_dev *, u8);
int clp_disable_fh(struct zpci_dev *);
diff --git a/arch/s390/include/asm/pci_insn.h b/arch/s390/include/asm/pci_insn.h
index ff81ed19c506..61cf9531f68f 100644
--- a/arch/s390/include/asm/pci_insn.h
+++ b/arch/s390/include/asm/pci_insn.h
@@ -143,14 +143,4 @@ static inline int zpci_set_irq_ctrl(u16 ctl, u8 isc)
return __zpci_set_irq_ctrl(ctl, isc, &iib);
}
-#ifdef CONFIG_PCI
-static inline void enable_mio_ctl(void)
-{
- if (static_branch_likely(&have_mio))
- __ctl_set_bit(2, 5);
-}
-#else /* CONFIG_PCI */
-static inline void enable_mio_ctl(void) {}
-#endif /* CONFIG_PCI */
-
#endif
diff --git a/arch/s390/include/asm/pci_io.h b/arch/s390/include/asm/pci_io.h
index cd060b5dd8fd..e4dc64cc9c55 100644
--- a/arch/s390/include/asm/pci_io.h
+++ b/arch/s390/include/asm/pci_io.h
@@ -8,6 +8,10 @@
#include <linux/slab.h>
#include <asm/pci_insn.h>
+/* I/O size constraints */
+#define ZPCI_MAX_READ_SIZE 8
+#define ZPCI_MAX_WRITE_SIZE 128
+
/* I/O Map */
#define ZPCI_IOMAP_SHIFT 48
#define ZPCI_IOMAP_ADDR_BASE 0x8000000000000000UL
@@ -140,7 +144,8 @@ static inline int zpci_memcpy_fromio(void *dst,
while (n > 0) {
size = zpci_get_max_write_size((u64 __force) src,
- (u64) dst, n, 8);
+ (u64) dst, n,
+ ZPCI_MAX_READ_SIZE);
rc = zpci_read_single(dst, src, size);
if (rc)
break;
@@ -161,7 +166,8 @@ static inline int zpci_memcpy_toio(volatile void __iomem *dst,
while (n > 0) {
size = zpci_get_max_write_size((u64 __force) dst,
- (u64) src, n, 128);
+ (u64) src, n,
+ ZPCI_MAX_WRITE_SIZE);
if (size > 8) /* main path */
rc = zpci_write_block(dst, src, size);
else
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index bccb8f4a63e2..77606c4acd58 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -56,7 +56,12 @@ static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long address)
crst_table_init(table, _REGION2_ENTRY_EMPTY);
return (p4d_t *) table;
}
-#define p4d_free(mm, p4d) crst_table_free(mm, (unsigned long *) p4d)
+
+static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)
+{
+ if (!mm_p4d_folded(mm))
+ crst_table_free(mm, (unsigned long *) p4d);
+}
static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
{
@@ -65,7 +70,12 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
crst_table_init(table, _REGION3_ENTRY_EMPTY);
return (pud_t *) table;
}
-#define pud_free(mm, pud) crst_table_free(mm, (unsigned long *) pud)
+
+static inline void pud_free(struct mm_struct *mm, pud_t *pud)
+{
+ if (!mm_pud_folded(mm))
+ crst_table_free(mm, (unsigned long *) pud);
+}
static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr)
{
@@ -83,6 +93,8 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr)
static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
{
+ if (mm_pmd_folded(mm))
+ return;
pgtable_pmd_page_dtor(virt_to_page(pmd));
crst_table_free(mm, (unsigned long *) pmd);
}
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 3c17771fe43e..cb9ded67c77c 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -755,6 +755,12 @@ static inline int pmd_write(pmd_t pmd)
return (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE) != 0;
}
+#define pud_write pud_write
+static inline int pud_write(pud_t pud)
+{
+ return (pud_val(pud) & _REGION3_ENTRY_WRITE) != 0;
+}
+
static inline int pmd_dirty(pmd_t pmd)
{
int dirty = 1;
@@ -1172,8 +1178,6 @@ void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr);
static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t entry)
{
- if (!MACHINE_HAS_NX)
- pte_val(entry) &= ~_PAGE_NOEXEC;
if (pte_present(entry))
pte_val(entry) &= ~_PAGE_UNUSED;
if (mm_has_pgste(mm))
@@ -1190,6 +1194,8 @@ static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot)
{
pte_t __pte;
pte_val(__pte) = physpage + pgprot_val(pgprot);
+ if (!MACHINE_HAS_NX)
+ pte_val(__pte) &= ~_PAGE_NOEXEC;
return pte_mkyoung(__pte);
}
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index b0fcbc37b637..2c0b8b5fe19a 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -161,6 +161,7 @@ typedef struct thread_struct thread_struct;
#define INIT_THREAD { \
.ksp = sizeof(init_stack) + (unsigned long) &init_stack, \
.fpu.regs = (void *) init_task.thread.fpu.fprs, \
+ .last_break = 1, \
}
/*
diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h
index db5ef22c46e4..b265106ca331 100644
--- a/arch/s390/include/asm/qdio.h
+++ b/arch/s390/include/asm/qdio.h
@@ -228,7 +228,7 @@ struct qdio_buffer {
* @sbal: absolute SBAL address
*/
struct sl_element {
- unsigned long sbal;
+ u64 sbal;
} __attribute__ ((packed));
/**
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index 925889d360c1..eb83a57cb503 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -8,6 +8,7 @@
#include <linux/const.h>
#include <uapi/asm/setup.h>
+#include <linux/build_bug.h>
#define EP_OFFSET 0x10008
#define EP_STRING "S390EP"
@@ -153,6 +154,12 @@ static inline unsigned long kaslr_offset(void)
return __kaslr_offset;
}
+static inline u32 gen_lpswe(unsigned long addr)
+{
+ BUILD_BUG_ON(addr > 0xfff);
+ return 0xb2b20000 | addr;
+}
+
#else /* __ASSEMBLY__ */
#define IPL_DEVICE (IPL_DEVICE_OFFSET)
diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h
index f073292e9fdb..d9d5de0f67ff 100644
--- a/arch/s390/include/asm/syscall.h
+++ b/arch/s390/include/asm/syscall.h
@@ -33,7 +33,17 @@ static inline void syscall_rollback(struct task_struct *task,
static inline long syscall_get_error(struct task_struct *task,
struct pt_regs *regs)
{
- return IS_ERR_VALUE(regs->gprs[2]) ? regs->gprs[2] : 0;
+ unsigned long error = regs->gprs[2];
+#ifdef CONFIG_COMPAT
+ if (test_tsk_thread_flag(task, TIF_31BIT)) {
+ /*
+ * Sign-extend the value so (int)-EFOO becomes (long)-EFOO
+ * and will match correctly in comparisons.
+ */
+ error = (long)(int)error;
+ }
+#endif
+ return IS_ERR_VALUE(error) ? error : 0;
}
static inline long syscall_get_return_value(struct task_struct *task,
diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h
index 64539c221672..b6a4ce9dafaf 100644
--- a/arch/s390/include/asm/timex.h
+++ b/arch/s390/include/asm/timex.h
@@ -10,8 +10,9 @@
#ifndef _ASM_S390_TIMEX_H
#define _ASM_S390_TIMEX_H
-#include <asm/lowcore.h>
+#include <linux/preempt.h>
#include <linux/time64.h>
+#include <asm/lowcore.h>
/* The value of the TOD clock for 1.1.1970. */
#define TOD_UNIX_EPOCH 0x7d91048bca000000ULL
@@ -154,7 +155,7 @@ static inline void get_tod_clock_ext(char *clk)
static inline unsigned long long get_tod_clock(void)
{
- unsigned char clk[STORE_CLOCK_EXT_SIZE];
+ char clk[STORE_CLOCK_EXT_SIZE];
get_tod_clock_ext(clk);
return *((unsigned long long *)&clk[1]);
@@ -186,15 +187,18 @@ extern unsigned char tod_clock_base[16] __aligned(8);
/**
* get_clock_monotonic - returns current time in clock rate units
*
- * The caller must ensure that preemption is disabled.
* The clock and tod_clock_base get changed via stop_machine.
- * Therefore preemption must be disabled when calling this
- * function, otherwise the returned value is not guaranteed to
- * be monotonic.
+ * Therefore preemption must be disabled, otherwise the returned
+ * value is not guaranteed to be monotonic.
*/
static inline unsigned long long get_tod_clock_monotonic(void)
{
- return get_tod_clock() - *(unsigned long long *) &tod_clock_base[1];
+ unsigned long long tod;
+
+ preempt_disable_notrace();
+ tod = get_tod_clock() - *(unsigned long long *) &tod_clock_base[1];
+ preempt_enable_notrace();
+ return tod;
}
/**
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
index bd2fd9a7821d..a470f1fa9f2a 100644
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h
@@ -83,7 +83,7 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n);
__rc; \
})
-static inline int __put_user_fn(void *x, void __user *ptr, unsigned long size)
+static __always_inline int __put_user_fn(void *x, void __user *ptr, unsigned long size)
{
unsigned long spec = 0x010000UL;
int rc;
@@ -113,7 +113,7 @@ static inline int __put_user_fn(void *x, void __user *ptr, unsigned long size)
return rc;
}
-static inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size)
+static __always_inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size)
{
unsigned long spec = 0x01UL;
int rc;
diff --git a/arch/s390/include/asm/unwind.h b/arch/s390/include/asm/unwind.h
index 6eb2ef105d87..ac4568d60d08 100644
--- a/arch/s390/include/asm/unwind.h
+++ b/arch/s390/include/asm/unwind.h
@@ -35,6 +35,7 @@ struct unwind_state {
struct task_struct *task;
struct pt_regs *regs;
unsigned long sp, ip;
+ bool reuse_sp;
int graph_idx;
bool reliable;
bool error;
diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h
index 169d7604eb80..f3ba84fa9bd1 100644
--- a/arch/s390/include/asm/vdso.h
+++ b/arch/s390/include/asm/vdso.h
@@ -36,6 +36,7 @@ struct vdso_data {
__u32 tk_shift; /* Shift used for xtime_nsec 0x60 */
__u32 ts_dir; /* TOD steering direction 0x64 */
__u64 ts_end; /* TOD steering end 0x68 */
+ __u32 hrtimer_res; /* hrtimer resolution 0x70 */
};
struct vdso_per_cpu_data {
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index 41ac4ad21311..a65cb4924bdb 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -76,6 +76,7 @@ int main(void)
OFFSET(__VDSO_TK_SHIFT, vdso_data, tk_shift);
OFFSET(__VDSO_TS_DIR, vdso_data, ts_dir);
OFFSET(__VDSO_TS_END, vdso_data, ts_end);
+ OFFSET(__VDSO_CLOCK_REALTIME_RES, vdso_data, hrtimer_res);
OFFSET(__VDSO_ECTG_BASE, vdso_per_cpu_data, ectg_timer_base);
OFFSET(__VDSO_ECTG_USER, vdso_per_cpu_data, ectg_user_time);
OFFSET(__VDSO_CPU_NR, vdso_per_cpu_data, cpu_nr);
@@ -87,7 +88,6 @@ int main(void)
DEFINE(__CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE);
DEFINE(__CLOCK_MONOTONIC_COARSE, CLOCK_MONOTONIC_COARSE);
DEFINE(__CLOCK_THREAD_CPUTIME_ID, CLOCK_THREAD_CPUTIME_ID);
- DEFINE(__CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC);
DEFINE(__CLOCK_COARSE_RES, LOW_RES_NSEC);
BLANK();
/* idle data offsets */
@@ -125,6 +125,8 @@ int main(void)
OFFSET(__LC_EXT_DAMAGE_CODE, lowcore, external_damage_code);
OFFSET(__LC_MCCK_FAIL_STOR_ADDR, lowcore, failing_storage_address);
OFFSET(__LC_LAST_BREAK, lowcore, breaking_event_addr);
+ OFFSET(__LC_RETURN_LPSWE, lowcore, return_lpswe);
+ OFFSET(__LC_RETURN_MCCK_LPSWE, lowcore, return_mcck_lpswe);
OFFSET(__LC_RST_OLD_PSW, lowcore, restart_old_psw);
OFFSET(__LC_EXT_OLD_PSW, lowcore, external_old_psw);
OFFSET(__LC_SVC_OLD_PSW, lowcore, svc_old_psw);
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index 0ebf08c3b35e..b46a3c27eb08 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -198,9 +198,10 @@ static debug_entry_t ***debug_areas_alloc(int pages_per_area, int nr_areas)
if (!areas)
goto fail_malloc_areas;
for (i = 0; i < nr_areas; i++) {
+ /* GFP_NOWARN to avoid user triggerable WARN, we handle fails */
areas[i] = kmalloc_array(pages_per_area,
sizeof(debug_entry_t *),
- GFP_KERNEL);
+ GFP_KERNEL | __GFP_NOWARN);
if (!areas[i])
goto fail_malloc_areas2;
for (j = 0; j < pages_per_area; j++) {
diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c
index e9dac9a24d3f..61f2b0412345 100644
--- a/arch/s390/kernel/diag.c
+++ b/arch/s390/kernel/diag.c
@@ -84,7 +84,7 @@ static int show_diag_stat(struct seq_file *m, void *v)
static void *show_diag_stat_start(struct seq_file *m, loff_t *pos)
{
- return *pos <= nr_cpu_ids ? (void *)((unsigned long) *pos + 1) : NULL;
+ return *pos <= NR_DIAG_STAT ? (void *)((unsigned long) *pos + 1) : NULL;
}
static void *show_diag_stat_next(struct seq_file *m, void *v, loff_t *pos)
diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c
index b2c68fbf2634..41925f220694 100644
--- a/arch/s390/kernel/dis.c
+++ b/arch/s390/kernel/dis.c
@@ -462,10 +462,11 @@ static int print_insn(char *buffer, unsigned char *code, unsigned long addr)
ptr += sprintf(ptr, "%%c%i", value);
else if (operand->flags & OPERAND_VR)
ptr += sprintf(ptr, "%%v%i", value);
- else if (operand->flags & OPERAND_PCREL)
- ptr += sprintf(ptr, "%lx", (signed int) value
- + addr);
- else if (operand->flags & OPERAND_SIGNED)
+ else if (operand->flags & OPERAND_PCREL) {
+ void *pcrel = (void *)((int)value + addr);
+
+ ptr += sprintf(ptr, "%px", pcrel);
+ } else if (operand->flags & OPERAND_SIGNED)
ptr += sprintf(ptr, "%i", value);
else
ptr += sprintf(ptr, "%u", value);
@@ -537,7 +538,7 @@ void show_code(struct pt_regs *regs)
else
*ptr++ = ' ';
addr = regs->psw.addr + start - 32;
- ptr += sprintf(ptr, "%016lx: ", addr);
+ ptr += sprintf(ptr, "%px: ", (void *)addr);
if (start + opsize >= end)
break;
for (i = 0; i < opsize; i++)
@@ -565,7 +566,7 @@ void print_fn_code(unsigned char *code, unsigned long len)
opsize = insn_length(*code);
if (opsize > len)
break;
- ptr += sprintf(ptr, "%p: ", code);
+ ptr += sprintf(ptr, "%px: ", code);
for (i = 0; i < opsize; i++)
ptr += sprintf(ptr, "%02x", code[i]);
*ptr++ = '\t';
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 629f173f60cd..9cebf0487ebd 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -30,7 +30,6 @@
#include <asm/sclp.h>
#include <asm/facility.h>
#include <asm/boot_data.h>
-#include <asm/pci_insn.h>
#include "entry.h"
/*
@@ -155,6 +154,8 @@ static noinline __init void setup_lowcore_early(void)
psw_t psw;
psw.mask = PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA;
+ if (IS_ENABLED(CONFIG_KASAN))
+ psw.mask |= PSW_MASK_DAT;
psw.addr = (unsigned long) s390_base_ext_handler;
S390_lowcore.external_new_psw = psw;
psw.addr = (unsigned long) s390_base_pgm_handler;
@@ -236,7 +237,6 @@ static __init void detect_machine_facilities(void)
clock_comparator_max = -1ULL >> 1;
__ctl_set_bit(0, 53);
}
- enable_mio_ctl();
}
static inline void save_vector_registers(void)
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 3f4d272577d3..de7918a6830c 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -115,26 +115,29 @@ _LPP_OFFSET = __LC_LPP
.macro SWITCH_ASYNC savearea,timer
tmhh %r8,0x0001 # interrupting from user ?
- jnz 1f
+ jnz 2f
lgr %r14,%r9
+ cghi %r14,__LC_RETURN_LPSWE
+ je 0f
slg %r14,BASED(.Lcritical_start)
clg %r14,BASED(.Lcritical_length)
- jhe 0f
+ jhe 1f
+0:
lghi %r11,\savearea # inside critical section, do cleanup
brasl %r14,cleanup_critical
tmhh %r8,0x0001 # retest problem state after cleanup
- jnz 1f
-0: lg %r14,__LC_ASYNC_STACK # are we already on the target stack?
+ jnz 2f
+1: lg %r14,__LC_ASYNC_STACK # are we already on the target stack?
slgr %r14,%r15
srag %r14,%r14,STACK_SHIFT
- jnz 2f
+ jnz 3f
CHECK_STACK \savearea
aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
- j 3f
-1: UPDATE_VTIME %r14,%r15,\timer
+ j 4f
+2: UPDATE_VTIME %r14,%r15,\timer
BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP
-2: lg %r15,__LC_ASYNC_STACK # load async stack
-3: la %r11,STACK_FRAME_OVERHEAD(%r15)
+3: lg %r15,__LC_ASYNC_STACK # load async stack
+4: la %r11,STACK_FRAME_OVERHEAD(%r15)
.endm
.macro UPDATE_VTIME w1,w2,enter_timer
@@ -365,9 +368,9 @@ ENTRY(system_call)
jnz .Lsysc_nr_ok
# svc 0: system call number in %r1
llgfr %r1,%r1 # clear high word in r1
+ sth %r1,__PT_INT_CODE+2(%r11)
cghi %r1,NR_syscalls
jnl .Lsysc_nr_ok
- sth %r1,__PT_INT_CODE+2(%r11)
slag %r8,%r1,3
.Lsysc_nr_ok:
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
@@ -401,7 +404,7 @@ ENTRY(system_call)
stpt __LC_EXIT_TIMER
mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
lmg %r11,%r15,__PT_R11(%r11)
- lpswe __LC_RETURN_PSW
+ b __LC_RETURN_LPSWE(%r0)
.Lsysc_done:
#
@@ -608,43 +611,50 @@ ENTRY(pgm_check_handler)
BPOFF
stmg %r8,%r15,__LC_SAVE_AREA_SYNC
lg %r10,__LC_LAST_BREAK
- lg %r12,__LC_CURRENT
+ srag %r11,%r10,12
+ jnz 0f
+ /* if __LC_LAST_BREAK is < 4096, it contains one of
+ * the lpswe addresses in lowcore. Set it to 1 (initial state)
+ * to prevent leaking that address to userspace.
+ */
+ lghi %r10,1
+0: lg %r12,__LC_CURRENT
lghi %r11,0
larl %r13,cleanup_critical
lmg %r8,%r9,__LC_PGM_OLD_PSW
tmhh %r8,0x0001 # test problem state bit
- jnz 2f # -> fault in user space
+ jnz 3f # -> fault in user space
#if IS_ENABLED(CONFIG_KVM)
# cleanup critical section for program checks in sie64a
lgr %r14,%r9
slg %r14,BASED(.Lsie_critical_start)
clg %r14,BASED(.Lsie_critical_length)
- jhe 0f
+ jhe 1f
lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer
ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE
lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
larl %r9,sie_exit # skip forward to sie_exit
lghi %r11,_PIF_GUEST_FAULT
#endif
-0: tmhh %r8,0x4000 # PER bit set in old PSW ?
- jnz 1f # -> enabled, can't be a double fault
+1: tmhh %r8,0x4000 # PER bit set in old PSW ?
+ jnz 2f # -> enabled, can't be a double fault
tm __LC_PGM_ILC+3,0x80 # check for per exception
jnz .Lpgm_svcper # -> single stepped svc
-1: CHECK_STACK __LC_SAVE_AREA_SYNC
+2: CHECK_STACK __LC_SAVE_AREA_SYNC
aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
- # CHECK_VMAP_STACK branches to stack_overflow or 4f
- CHECK_VMAP_STACK __LC_SAVE_AREA_SYNC,4f
-2: UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER
+ # CHECK_VMAP_STACK branches to stack_overflow or 5f
+ CHECK_VMAP_STACK __LC_SAVE_AREA_SYNC,5f
+3: UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER
BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP
lg %r15,__LC_KERNEL_STACK
lgr %r14,%r12
aghi %r14,__TASK_thread # pointer to thread_struct
lghi %r13,__LC_PGM_TDB
tm __LC_PGM_ILC+2,0x02 # check for transaction abort
- jz 3f
+ jz 4f
mvc __THREAD_trap_tdb(256,%r14),0(%r13)
-3: stg %r10,__THREAD_last_break(%r14)
-4: lgr %r13,%r11
+4: stg %r10,__THREAD_last_break(%r14)
+5: lgr %r13,%r11
la %r11,STACK_FRAME_OVERHEAD(%r15)
stmg %r0,%r7,__PT_R0(%r11)
# clear user controlled registers to prevent speculative use
@@ -663,14 +673,14 @@ ENTRY(pgm_check_handler)
stg %r13,__PT_FLAGS(%r11)
stg %r10,__PT_ARGS(%r11)
tm __LC_PGM_ILC+3,0x80 # check for per exception
- jz 5f
+ jz 6f
tmhh %r8,0x0001 # kernel per event ?
jz .Lpgm_kprobe
oi __PT_FLAGS+7(%r11),_PIF_PER_TRAP
mvc __THREAD_per_address(8,%r14),__LC_PER_ADDRESS
mvc __THREAD_per_cause(2,%r14),__LC_PER_CODE
mvc __THREAD_per_paid(1,%r14),__LC_PER_ACCESS_ID
-5: REENABLE_IRQS
+6: REENABLE_IRQS
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
larl %r1,pgm_check_table
llgh %r10,__PT_INT_CODE+2(%r11)
@@ -775,7 +785,7 @@ ENTRY(io_int_handler)
mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
.Lio_exit_kernel:
lmg %r11,%r15,__PT_R11(%r11)
- lpswe __LC_RETURN_PSW
+ b __LC_RETURN_LPSWE(%r0)
.Lio_done:
#
@@ -1216,7 +1226,7 @@ ENTRY(mcck_int_handler)
stpt __LC_EXIT_TIMER
mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
0: lmg %r11,%r15,__PT_R11(%r11)
- lpswe __LC_RETURN_MCCK_PSW
+ b __LC_RETURN_MCCK_LPSWE
.Lmcck_panic:
lg %r15,__LC_NODAT_STACK
@@ -1273,6 +1283,8 @@ ENDPROC(stack_overflow)
#endif
ENTRY(cleanup_critical)
+ cghi %r9,__LC_RETURN_LPSWE
+ je .Lcleanup_lpswe
#if IS_ENABLED(CONFIG_KVM)
clg %r9,BASED(.Lcleanup_table_sie) # .Lsie_gmap
jl 0f
@@ -1426,6 +1438,7 @@ ENDPROC(cleanup_critical)
mvc __LC_RETURN_PSW(16),__PT_PSW(%r9)
mvc 0(64,%r11),__PT_R8(%r9)
lmg %r0,%r7,__PT_R0(%r9)
+.Lcleanup_lpswe:
1: lmg %r8,%r9,__LC_RETURN_PSW
BR_EX %r14,%r11
.Lcleanup_sysc_restore_insn:
diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c
index b9d8fe45737a..8f8456816d83 100644
--- a/arch/s390/kernel/idle.c
+++ b/arch/s390/kernel/idle.c
@@ -69,18 +69,26 @@ DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL);
static ssize_t show_idle_time(struct device *dev,
struct device_attribute *attr, char *buf)
{
+ unsigned long long now, idle_time, idle_enter, idle_exit, in_idle;
struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id);
- unsigned long long now, idle_time, idle_enter, idle_exit;
unsigned int seq;
do {
- now = get_tod_clock();
seq = read_seqcount_begin(&idle->seqcount);
idle_time = READ_ONCE(idle->idle_time);
idle_enter = READ_ONCE(idle->clock_idle_enter);
idle_exit = READ_ONCE(idle->clock_idle_exit);
} while (read_seqcount_retry(&idle->seqcount, seq));
- idle_time += idle_enter ? ((idle_exit ? : now) - idle_enter) : 0;
+ in_idle = 0;
+ now = get_tod_clock();
+ if (idle_enter) {
+ if (idle_exit) {
+ in_idle = idle_exit - idle_enter;
+ } else if (now > idle_enter) {
+ in_idle = now - idle_enter;
+ }
+ }
+ idle_time += in_idle;
return sprintf(buf, "%llu\n", idle_time >> 12);
}
DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL);
@@ -88,17 +96,24 @@ DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL);
u64 arch_cpu_idle_time(int cpu)
{
struct s390_idle_data *idle = &per_cpu(s390_idle, cpu);
- unsigned long long now, idle_enter, idle_exit;
+ unsigned long long now, idle_enter, idle_exit, in_idle;
unsigned int seq;
do {
- now = get_tod_clock();
seq = read_seqcount_begin(&idle->seqcount);
idle_enter = READ_ONCE(idle->clock_idle_enter);
idle_exit = READ_ONCE(idle->clock_idle_exit);
} while (read_seqcount_retry(&idle->seqcount, seq));
-
- return cputime_to_nsecs(idle_enter ? ((idle_exit ?: now) - idle_enter) : 0);
+ in_idle = 0;
+ now = get_tod_clock();
+ if (idle_enter) {
+ if (idle_exit) {
+ in_idle = idle_exit - idle_enter;
+ } else if (now > idle_enter) {
+ in_idle = now - idle_enter;
+ }
+ }
+ return cputime_to_nsecs(in_idle);
}
void arch_cpu_idle_enter(void)
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index 8a1ae140c5e2..e65c50a394f4 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -163,7 +163,9 @@ static bool kdump_csum_valid(struct kimage *image)
#ifdef CONFIG_CRASH_DUMP
int rc;
+ preempt_disable();
rc = CALL_ON_STACK(do_start_kdump, S390_lowcore.nodat_stack, 1, image);
+ preempt_enable();
return rc == 0;
#else
return false;
@@ -253,10 +255,10 @@ void arch_crash_save_vmcoreinfo(void)
VMCOREINFO_SYMBOL(lowcore_ptr);
VMCOREINFO_SYMBOL(high_memory);
VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS);
- mem_assign_absolute(S390_lowcore.vmcore_info, paddr_vmcoreinfo_note());
vmcoreinfo_append_str("SDMA=%lx\n", __sdma);
vmcoreinfo_append_str("EDMA=%lx\n", __edma);
vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset());
+ mem_assign_absolute(S390_lowcore.vmcore_info, paddr_vmcoreinfo_note());
}
void machine_shutdown(void)
diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c
index fbdd3ea73667..bb7ed0d1a351 100644
--- a/arch/s390/kernel/machine_kexec_file.c
+++ b/arch/s390/kernel/machine_kexec_file.c
@@ -173,7 +173,7 @@ static int kexec_file_add_initrd(struct kimage *image,
buf.mem += crashk_res.start;
buf.memsz = buf.bufsz;
- data->parm->initrd_start = buf.mem;
+ data->parm->initrd_start = data->memsz;
data->parm->initrd_size = buf.memsz;
data->memsz += buf.memsz;
diff --git a/arch/s390/kernel/machine_kexec_reloc.c b/arch/s390/kernel/machine_kexec_reloc.c
index 99e7951c199c..a77d7f72dd4a 100644
--- a/arch/s390/kernel/machine_kexec_reloc.c
+++ b/arch/s390/kernel/machine_kexec_reloc.c
@@ -27,6 +27,7 @@ int arch_kexec_do_relocs(int r_type, void *loc, unsigned long val,
break;
case R_390_64: /* Direct 64 bit. */
case R_390_GLOB_DAT:
+ case R_390_JMP_SLOT:
*(u64 *)loc = val;
break;
case R_390_PC16: /* PC relative 16 bit. */
diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S
index 9e1660a6b9db..f942341429b1 100644
--- a/arch/s390/kernel/mcount.S
+++ b/arch/s390/kernel/mcount.S
@@ -26,6 +26,12 @@ ENDPROC(ftrace_stub)
#define STACK_PTREGS (STACK_FRAME_OVERHEAD)
#define STACK_PTREGS_GPRS (STACK_PTREGS + __PT_GPRS)
#define STACK_PTREGS_PSW (STACK_PTREGS + __PT_PSW)
+#ifdef __PACK_STACK
+/* allocate just enough for r14, r15 and backchain */
+#define TRACED_FUNC_FRAME_SIZE 24
+#else
+#define TRACED_FUNC_FRAME_SIZE STACK_FRAME_OVERHEAD
+#endif
ENTRY(_mcount)
BR_EX %r14
@@ -35,13 +41,21 @@ EXPORT_SYMBOL(_mcount)
ENTRY(ftrace_caller)
.globl ftrace_regs_caller
.set ftrace_regs_caller,ftrace_caller
+ stg %r14,(__SF_GPRS+8*8)(%r15) # save traced function caller
lgr %r1,%r15
#if !(defined(CC_USING_HOTPATCH) || defined(CC_USING_NOP_MCOUNT))
aghi %r0,MCOUNT_RETURN_FIXUP
#endif
- aghi %r15,-STACK_FRAME_SIZE
+ # allocate stack frame for ftrace_caller to contain traced function
+ aghi %r15,-TRACED_FUNC_FRAME_SIZE
stg %r1,__SF_BACKCHAIN(%r15)
+ stg %r0,(__SF_GPRS+8*8)(%r15)
+ stg %r15,(__SF_GPRS+9*8)(%r15)
+ # allocate pt_regs and stack frame for ftrace_trace_function
+ aghi %r15,-STACK_FRAME_SIZE
stg %r1,(STACK_PTREGS_GPRS+15*8)(%r15)
+ aghi %r1,-TRACED_FUNC_FRAME_SIZE
+ stg %r1,__SF_BACKCHAIN(%r15)
stg %r0,(STACK_PTREGS_PSW+8)(%r15)
stmg %r2,%r14,(STACK_PTREGS_GPRS+2*8)(%r15)
#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index 48d48b6187c0..0eb1d1cc53a8 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -199,7 +199,7 @@ static const int cpumf_generic_events_user[] = {
[PERF_COUNT_HW_BUS_CYCLES] = -1,
};
-static int __hw_perf_event_init(struct perf_event *event)
+static int __hw_perf_event_init(struct perf_event *event, unsigned int type)
{
struct perf_event_attr *attr = &event->attr;
struct hw_perf_event *hwc = &event->hw;
@@ -207,7 +207,7 @@ static int __hw_perf_event_init(struct perf_event *event)
int err = 0;
u64 ev;
- switch (attr->type) {
+ switch (type) {
case PERF_TYPE_RAW:
/* Raw events are used to access counters directly,
* hence do not permit excludes */
@@ -294,17 +294,16 @@ static int __hw_perf_event_init(struct perf_event *event)
static int cpumf_pmu_event_init(struct perf_event *event)
{
+ unsigned int type = event->attr.type;
int err;
- switch (event->attr.type) {
- case PERF_TYPE_HARDWARE:
- case PERF_TYPE_HW_CACHE:
- case PERF_TYPE_RAW:
- err = __hw_perf_event_init(event);
- break;
- default:
+ if (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_RAW)
+ err = __hw_perf_event_init(event, type);
+ else if (event->pmu->type == type)
+ /* Registered as unknown PMU */
+ err = __hw_perf_event_init(event, PERF_TYPE_RAW);
+ else
return -ENOENT;
- }
if (unlikely(err) && event->destroy)
event->destroy(event);
@@ -553,7 +552,7 @@ static int __init cpumf_pmu_init(void)
return -ENODEV;
cpumf_pmu.attr_groups = cpumf_cf_event_group();
- rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", PERF_TYPE_RAW);
+ rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", -1);
if (rc)
pr_err("Registering the cpum_cf PMU failed with rc=%i\n", rc);
return rc;
diff --git a/arch/s390/kernel/perf_cpum_cf_diag.c b/arch/s390/kernel/perf_cpum_cf_diag.c
index d4e031f7b9c8..be66511302ff 100644
--- a/arch/s390/kernel/perf_cpum_cf_diag.c
+++ b/arch/s390/kernel/perf_cpum_cf_diag.c
@@ -243,13 +243,13 @@ static int cf_diag_event_init(struct perf_event *event)
int err = -ENOENT;
debug_sprintf_event(cf_diag_dbg, 5,
- "%s event %p cpu %d config %#llx "
+ "%s event %p cpu %d config %#llx type:%u "
"sample_type %#llx cf_diag_events %d\n", __func__,
- event, event->cpu, attr->config, attr->sample_type,
- atomic_read(&cf_diag_events));
+ event, event->cpu, attr->config, event->pmu->type,
+ attr->sample_type, atomic_read(&cf_diag_events));
if (event->attr.config != PERF_EVENT_CPUM_CF_DIAG ||
- event->attr.type != PERF_TYPE_RAW)
+ event->attr.type != event->pmu->type)
goto out;
/* Raw events are used to access counters directly,
@@ -693,7 +693,7 @@ static int __init cf_diag_init(void)
}
debug_register_view(cf_diag_dbg, &debug_sprintf_view);
- rc = perf_pmu_register(&cf_diag, "cpum_cf_diag", PERF_TYPE_RAW);
+ rc = perf_pmu_register(&cf_diag, "cpum_cf_diag", -1);
if (rc) {
debug_unregister_view(cf_diag_dbg, &debug_sprintf_view);
debug_unregister(cf_diag_dbg);
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index 1266194afb02..20e07fdb3710 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -193,7 +193,7 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb,
unsigned long num_sdb, gfp_t gfp_flags)
{
int i, rc;
- unsigned long *new, *tail;
+ unsigned long *new, *tail, *tail_prev = NULL;
if (!sfb->sdbt || !sfb->tail)
return -EINVAL;
@@ -232,6 +232,7 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb,
sfb->num_sdbt++;
/* Link current page to tail of chain */
*tail = (unsigned long)(void *) new + 1;
+ tail_prev = tail;
tail = new;
}
@@ -241,10 +242,22 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb,
* issue, a new realloc call (if required) might succeed.
*/
rc = alloc_sample_data_block(tail, gfp_flags);
- if (rc)
+ if (rc) {
+ /* Undo last SDBT. An SDBT with no SDB at its first
+ * entry but with an SDBT entry instead can not be
+ * handled by the interrupt handler code.
+ * Avoid this situation.
+ */
+ if (tail_prev) {
+ sfb->num_sdbt--;
+ free_page((unsigned long) new);
+ tail = tail_prev;
+ }
break;
+ }
sfb->num_sdb++;
tail++;
+ tail_prev = new = NULL; /* Allocated at least one SBD */
}
/* Link sampling buffer to its origin */
@@ -1248,18 +1261,28 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
*/
if (flush_all && done)
break;
-
- /* If an event overflow happened, discard samples by
- * processing any remaining sample-data-blocks.
- */
- if (event_overflow)
- flush_all = 1;
}
/* Account sample overflows in the event hardware structure */
if (sampl_overflow)
OVERFLOW_REG(hwc) = DIV_ROUND_UP(OVERFLOW_REG(hwc) +
sampl_overflow, 1 + num_sdb);
+
+ /* Perf_event_overflow() and perf_event_account_interrupt() limit
+ * the interrupt rate to an upper limit. Roughly 1000 samples per
+ * task tick.
+ * Hitting this limit results in a large number
+ * of throttled REF_REPORT_THROTTLE entries and the samples
+ * are dropped.
+ * Slightly increase the interval to avoid hitting this limit.
+ */
+ if (event_overflow) {
+ SAMPL_RATE(hwc) += DIV_ROUND_UP(SAMPL_RATE(hwc), 10);
+ debug_sprintf_event(sfdbg, 1, "%s: rate adjustment %ld\n",
+ __func__,
+ DIV_ROUND_UP(SAMPL_RATE(hwc), 10));
+ }
+
if (sampl_overflow || event_overflow)
debug_sprintf_event(sfdbg, 4, "hw_perf_event_update: "
"overflow stats: sample=%llu event=%llu\n",
diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
index fcb6c2e92b07..1e75cc983546 100644
--- a/arch/s390/kernel/perf_event.c
+++ b/arch/s390/kernel/perf_event.c
@@ -224,9 +224,13 @@ void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
struct pt_regs *regs)
{
struct unwind_state state;
+ unsigned long addr;
- unwind_for_each_frame(&state, current, regs, 0)
- perf_callchain_store(entry, state.ip);
+ unwind_for_each_frame(&state, current, regs, 0) {
+ addr = unwind_get_return_address(&state);
+ if (!addr || perf_callchain_store(entry, addr))
+ return;
+ }
}
/* Perf definitions for PMU event attributes in sysfs */
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 9f2727bf3cbe..e72b1cde227d 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -105,6 +105,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long new_stackp,
p->thread.system_timer = 0;
p->thread.hardirq_timer = 0;
p->thread.softirq_timer = 0;
+ p->thread.last_break = 1;
frame->sf.back_chain = 0;
/* new return point is ret_from_fork */
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
index 5de13307b703..28b6cddc7d07 100644
--- a/arch/s390/kernel/processor.c
+++ b/arch/s390/kernel/processor.c
@@ -158,8 +158,9 @@ static void show_cpu_mhz(struct seq_file *m, unsigned long n)
static int show_cpuinfo(struct seq_file *m, void *v)
{
unsigned long n = (unsigned long) v - 1;
+ unsigned long first = cpumask_first(cpu_online_mask);
- if (!n)
+ if (n == first)
show_cpu_summary(m, v);
if (!machine_has_cpu_mhz)
return 0;
@@ -172,6 +173,8 @@ static inline void *c_update(loff_t *pos)
{
if (*pos)
*pos = cpumask_next(*pos - 1, cpu_online_mask);
+ else
+ *pos = cpumask_first(cpu_online_mask);
return *pos < nr_cpu_ids ? (void *)*pos + 1 : NULL;
}
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index ad71132374f0..5aa786063eb3 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -324,6 +324,25 @@ static inline void __poke_user_per(struct task_struct *child,
child->thread.per_user.end = data;
}
+static void fixup_int_code(struct task_struct *child, addr_t data)
+{
+ struct pt_regs *regs = task_pt_regs(child);
+ int ilc = regs->int_code >> 16;
+ u16 insn;
+
+ if (ilc > 6)
+ return;
+
+ if (ptrace_access_vm(child, regs->psw.addr - (regs->int_code >> 16),
+ &insn, sizeof(insn), FOLL_FORCE) != sizeof(insn))
+ return;
+
+ /* double check that tracee stopped on svc instruction */
+ if ((insn >> 8) != 0xa)
+ return;
+
+ regs->int_code = 0x20000 | (data & 0xffff);
+}
/*
* Write a word to the user area of a process at location addr. This
* operation does have an additional problem compared to peek_user.
@@ -335,7 +354,9 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data)
struct user *dummy = NULL;
addr_t offset;
+
if (addr < (addr_t) &dummy->regs.acrs) {
+ struct pt_regs *regs = task_pt_regs(child);
/*
* psw and gprs are stored on the stack
*/
@@ -353,7 +374,11 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data)
/* Invalid addressing mode bits */
return -EINVAL;
}
- *(addr_t *)((addr_t) &task_pt_regs(child)->psw + addr) = data;
+
+ if (test_pt_regs_flag(regs, PIF_SYSCALL) &&
+ addr == offsetof(struct user, regs.gprs[2]))
+ fixup_int_code(child, data);
+ *(addr_t *)((addr_t) &regs->psw + addr) = data;
} else if (addr < (addr_t) (&dummy->regs.orig_gpr2)) {
/*
@@ -719,6 +744,10 @@ static int __poke_user_compat(struct task_struct *child,
regs->psw.mask = (regs->psw.mask & ~PSW_MASK_BA) |
(__u64)(tmp & PSW32_ADDR_AMODE);
} else {
+
+ if (test_pt_regs_flag(regs, PIF_SYSCALL) &&
+ addr == offsetof(struct compat_user, regs.gprs[2]))
+ fixup_int_code(child, data);
/* gpr 0-15 */
*(__u32*)((addr_t) &regs->psw + addr*2 + 4) = tmp;
}
@@ -844,11 +873,9 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
* call number to gprs[2].
*/
if (test_thread_flag(TIF_SYSCALL_TRACE) &&
- (tracehook_report_syscall_entry(regs) ||
- regs->gprs[2] >= NR_syscalls)) {
+ tracehook_report_syscall_entry(regs)) {
/*
- * Tracing decided this syscall should not happen or the
- * debugger stored an invalid system call number. Skip
+ * Tracing decided this syscall should not happen. Skip
* the system call and the system call restart handling.
*/
clear_pt_regs_flag(regs, PIF_SYSCALL);
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index f8544d517430..8fe09a672396 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -73,6 +73,7 @@
#include <asm/nospec-branch.h>
#include <asm/mem_detect.h>
#include <asm/uv.h>
+#include <asm/asm-offsets.h>
#include "entry.h"
/*
@@ -467,6 +468,8 @@ static void __init setup_lowcore_dat_off(void)
arch_spin_lock_setup(0);
#endif
lc->br_r1_trampoline = 0x07f1; /* br %r1 */
+ lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW);
+ lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW);
set_prefix((u32)(unsigned long) lc);
lowcore_ptr[0] = lc;
@@ -1066,7 +1069,7 @@ static void __init log_component_list(void)
if (!early_ipl_comp_list_addr)
return;
- if (ipl_block.hdr.flags & IPL_PL_FLAG_IPLSR)
+ if (ipl_block.hdr.flags & IPL_PL_FLAG_SIPL)
pr_info("Linux is running with Secure-IPL enabled\n");
else
pr_info("Linux is running with Secure-IPL disabled\n");
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 35fafa2b91a8..9760101e9d47 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -212,6 +212,8 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
lc->spinlock_lockval = arch_spin_lockval(cpu);
lc->spinlock_index = 0;
lc->br_r1_trampoline = 0x07f1; /* br %r1 */
+ lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW);
+ lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW);
if (nmi_alloc_per_cpu(lc))
goto out_async;
if (vdso_alloc_per_cpu(lc))
@@ -266,10 +268,13 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu)
lc->spinlock_index = 0;
lc->percpu_offset = __per_cpu_offset[cpu];
lc->kernel_asce = S390_lowcore.kernel_asce;
+ lc->user_asce = S390_lowcore.kernel_asce;
lc->machine_flags = S390_lowcore.machine_flags;
lc->user_timer = lc->system_timer =
lc->steal_timer = lc->avg_steal_timer = 0;
__ctl_store(lc->cregs_save_area, 0, 15);
+ lc->cregs_save_area[1] = lc->kernel_asce;
+ lc->cregs_save_area[7] = lc->vdso_asce;
save_access_regs((unsigned int *) lc->access_regs_save_area);
memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list,
sizeof(lc->stfle_fac_list));
@@ -728,39 +733,67 @@ static void __ref smp_get_core_info(struct sclp_core_info *info, int early)
static int smp_add_present_cpu(int cpu);
-static int __smp_rescan_cpus(struct sclp_core_info *info, int sysfs_add)
+static int smp_add_core(struct sclp_core_entry *core, cpumask_t *avail,
+ bool configured, bool early)
{
struct pcpu *pcpu;
- cpumask_t avail;
- int cpu, nr, i, j;
+ int cpu, nr, i;
u16 address;
nr = 0;
- cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask);
- cpu = cpumask_first(&avail);
- for (i = 0; (i < info->combined) && (cpu < nr_cpu_ids); i++) {
- if (sclp.has_core_type && info->core[i].type != boot_core_type)
+ if (sclp.has_core_type && core->type != boot_core_type)
+ return nr;
+ cpu = cpumask_first(avail);
+ address = core->core_id << smp_cpu_mt_shift;
+ for (i = 0; (i <= smp_cpu_mtid) && (cpu < nr_cpu_ids); i++) {
+ if (pcpu_find_address(cpu_present_mask, address + i))
continue;
- address = info->core[i].core_id << smp_cpu_mt_shift;
- for (j = 0; j <= smp_cpu_mtid; j++) {
- if (pcpu_find_address(cpu_present_mask, address + j))
- continue;
- pcpu = pcpu_devices + cpu;
- pcpu->address = address + j;
- pcpu->state =
- (cpu >= info->configured*(smp_cpu_mtid + 1)) ?
- CPU_STATE_STANDBY : CPU_STATE_CONFIGURED;
- smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
- set_cpu_present(cpu, true);
- if (sysfs_add && smp_add_present_cpu(cpu) != 0)
- set_cpu_present(cpu, false);
- else
- nr++;
- cpu = cpumask_next(cpu, &avail);
- if (cpu >= nr_cpu_ids)
+ pcpu = pcpu_devices + cpu;
+ pcpu->address = address + i;
+ if (configured)
+ pcpu->state = CPU_STATE_CONFIGURED;
+ else
+ pcpu->state = CPU_STATE_STANDBY;
+ smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
+ set_cpu_present(cpu, true);
+ if (!early && smp_add_present_cpu(cpu) != 0)
+ set_cpu_present(cpu, false);
+ else
+ nr++;
+ cpumask_clear_cpu(cpu, avail);
+ cpu = cpumask_next(cpu, avail);
+ }
+ return nr;
+}
+
+static int __smp_rescan_cpus(struct sclp_core_info *info, bool early)
+{
+ struct sclp_core_entry *core;
+ cpumask_t avail;
+ bool configured;
+ u16 core_id;
+ int nr, i;
+
+ nr = 0;
+ cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask);
+ /*
+ * Add IPL core first (which got logical CPU number 0) to make sure
+ * that all SMT threads get subsequent logical CPU numbers.
+ */
+ if (early) {
+ core_id = pcpu_devices[0].address >> smp_cpu_mt_shift;
+ for (i = 0; i < info->configured; i++) {
+ core = &info->core[i];
+ if (core->core_id == core_id) {
+ nr += smp_add_core(core, &avail, true, early);
break;
+ }
}
}
+ for (i = 0; i < info->combined; i++) {
+ configured = i < info->configured;
+ nr += smp_add_core(&info->core[i], &avail, configured, early);
+ }
return nr;
}
@@ -809,7 +842,7 @@ void __init smp_detect_cpus(void)
/* Add CPUs present at boot */
get_online_cpus();
- __smp_rescan_cpus(info, 0);
+ __smp_rescan_cpus(info, true);
put_online_cpus();
memblock_free_early((unsigned long)info, sizeof(*info));
}
@@ -820,6 +853,8 @@ static void smp_init_secondary(void)
S390_lowcore.last_update_clock = get_tod_clock();
restore_access_regs(S390_lowcore.access_regs_save_area);
+ set_cpu_flag(CIF_ASCE_PRIMARY);
+ set_cpu_flag(CIF_ASCE_SECONDARY);
cpu_init();
preempt_disable();
init_cpu_timer();
@@ -1164,7 +1199,7 @@ int __ref smp_rescan_cpus(void)
smp_get_core_info(info, 0);
get_online_cpus();
mutex_lock(&smp_cpu_state_mutex);
- nr = __smp_rescan_cpus(info, 1);
+ nr = __smp_rescan_cpus(info, false);
mutex_unlock(&smp_cpu_state_mutex);
put_online_cpus();
kfree(info);
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index e8766beee5ad..8ea9db599d38 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -310,6 +310,7 @@ void update_vsyscall(struct timekeeper *tk)
vdso_data->tk_mult = tk->tkr_mono.mult;
vdso_data->tk_shift = tk->tkr_mono.shift;
+ vdso_data->hrtimer_res = hrtimer_resolution;
smp_wmb();
++vdso_data->tb_update_count;
}
diff --git a/arch/s390/kernel/unwind_bc.c b/arch/s390/kernel/unwind_bc.c
index 57fd4e902f1f..f515aa7cc9e5 100644
--- a/arch/s390/kernel/unwind_bc.c
+++ b/arch/s390/kernel/unwind_bc.c
@@ -46,18 +46,28 @@ bool unwind_next_frame(struct unwind_state *state)
regs = state->regs;
if (unlikely(regs)) {
- sp = READ_ONCE_TASK_STACK(state->task, regs->gprs[15]);
- if (unlikely(outside_of_stack(state, sp))) {
- if (!update_stack_info(state, sp))
- goto out_err;
+ if (state->reuse_sp) {
+ sp = state->sp;
+ state->reuse_sp = false;
+ } else {
+ sp = READ_ONCE_NOCHECK(regs->gprs[15]);
+ if (unlikely(outside_of_stack(state, sp))) {
+ if (!update_stack_info(state, sp))
+ goto out_err;
+ }
}
sf = (struct stack_frame *) sp;
- ip = READ_ONCE_TASK_STACK(state->task, sf->gprs[8]);
+ ip = READ_ONCE_NOCHECK(sf->gprs[8]);
reliable = false;
regs = NULL;
+ if (!__kernel_text_address(ip)) {
+ /* skip bogus %r14 */
+ state->regs = NULL;
+ return unwind_next_frame(state);
+ }
} else {
sf = (struct stack_frame *) state->sp;
- sp = READ_ONCE_TASK_STACK(state->task, sf->back_chain);
+ sp = READ_ONCE_NOCHECK(sf->back_chain);
if (likely(sp)) {
/* Non-zero back-chain points to the previous frame */
if (unlikely(outside_of_stack(state, sp))) {
@@ -65,7 +75,7 @@ bool unwind_next_frame(struct unwind_state *state)
goto out_err;
}
sf = (struct stack_frame *) sp;
- ip = READ_ONCE_TASK_STACK(state->task, sf->gprs[8]);
+ ip = READ_ONCE_NOCHECK(sf->gprs[8]);
reliable = true;
} else {
/* No back-chain, look for a pt_regs structure */
@@ -73,9 +83,9 @@ bool unwind_next_frame(struct unwind_state *state)
if (!on_stack(info, sp, sizeof(struct pt_regs)))
goto out_stop;
regs = (struct pt_regs *) sp;
- if (user_mode(regs))
+ if (READ_ONCE_NOCHECK(regs->psw.mask) & PSW_MASK_PSTATE)
goto out_stop;
- ip = READ_ONCE_TASK_STACK(state->task, regs->psw.addr);
+ ip = READ_ONCE_NOCHECK(regs->psw.addr);
reliable = true;
}
}
@@ -107,9 +117,9 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
{
struct stack_info *info = &state->stack_info;
unsigned long *mask = &state->stack_mask;
+ bool reliable, reuse_sp;
struct stack_frame *sf;
unsigned long ip;
- bool reliable;
memset(state, 0, sizeof(*state));
state->task = task;
@@ -132,12 +142,14 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
/* Get the instruction pointer from pt_regs or the stack frame */
if (regs) {
- ip = READ_ONCE_TASK_STACK(state->task, regs->psw.addr);
+ ip = READ_ONCE_NOCHECK(regs->psw.addr);
reliable = true;
+ reuse_sp = true;
} else {
sf = (struct stack_frame *) sp;
- ip = READ_ONCE_TASK_STACK(state->task, sf->gprs[8]);
+ ip = READ_ONCE_NOCHECK(sf->gprs[8]);
reliable = false;
+ reuse_sp = false;
}
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
@@ -151,5 +163,6 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
state->sp = sp;
state->ip = ip;
state->reliable = reliable;
+ state->reuse_sp = reuse_sp;
}
EXPORT_SYMBOL_GPL(__unwind_start);
diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile
index bec19e7e6e1c..4a66a1cb919b 100644
--- a/arch/s390/kernel/vdso64/Makefile
+++ b/arch/s390/kernel/vdso64/Makefile
@@ -18,8 +18,8 @@ KBUILD_AFLAGS_64 += -m64 -s
KBUILD_CFLAGS_64 := $(filter-out -m64,$(KBUILD_CFLAGS))
KBUILD_CFLAGS_64 += -m64 -fPIC -shared -fno-common -fno-builtin
-KBUILD_CFLAGS_64 += -nostdlib -Wl,-soname=linux-vdso64.so.1 \
- -Wl,--hash-style=both
+ldflags-y := -fPIC -shared -nostdlib -soname=linux-vdso64.so.1 \
+ --hash-style=both --build-id -T
$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_64)
$(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_64)
@@ -37,8 +37,8 @@ KASAN_SANITIZE := n
$(obj)/vdso64_wrapper.o : $(obj)/vdso64.so
# link rule for the .so file, .lds has to be first
-$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) FORCE
- $(call if_changed,vdso64ld)
+$(obj)/vdso64.so.dbg: $(obj)/vdso64.lds $(obj-vdso64) FORCE
+ $(call if_changed,ld)
# strip rule for the .so file
$(obj)/%.so: OBJCOPYFLAGS := -S
@@ -50,8 +50,6 @@ $(obj-vdso64): %.o: %.S FORCE
$(call if_changed_dep,vdso64as)
# actual build commands
-quiet_cmd_vdso64ld = VDSO64L $@
- cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $(filter %.lds %.o,$^) -o $@
quiet_cmd_vdso64as = VDSO64A $@
cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $<
diff --git a/arch/s390/kernel/vdso64/clock_getres.S b/arch/s390/kernel/vdso64/clock_getres.S
index 081435398e0a..0c79caa32b59 100644
--- a/arch/s390/kernel/vdso64/clock_getres.S
+++ b/arch/s390/kernel/vdso64/clock_getres.S
@@ -17,12 +17,14 @@
.type __kernel_clock_getres,@function
__kernel_clock_getres:
CFI_STARTPROC
- larl %r1,4f
+ larl %r1,3f
+ lg %r0,0(%r1)
cghi %r2,__CLOCK_REALTIME_COARSE
je 0f
cghi %r2,__CLOCK_MONOTONIC_COARSE
je 0f
- larl %r1,3f
+ larl %r1,_vdso_data
+ llgf %r0,__VDSO_CLOCK_REALTIME_RES(%r1)
cghi %r2,__CLOCK_REALTIME
je 0f
cghi %r2,__CLOCK_MONOTONIC
@@ -36,7 +38,6 @@ __kernel_clock_getres:
jz 2f
0: ltgr %r3,%r3
jz 1f /* res == NULL */
- lg %r0,0(%r1)
xc 0(8,%r3),0(%r3) /* set tp->tv_sec to zero */
stg %r0,8(%r3) /* store tp->tv_usec */
1: lghi %r2,0
@@ -45,6 +46,5 @@ __kernel_clock_getres:
svc 0
br %r14
CFI_ENDPROC
-3: .quad __CLOCK_REALTIME_RES
-4: .quad __CLOCK_COARSE_RES
+3: .quad __CLOCK_COARSE_RES
.size __kernel_clock_getres,.-__kernel_clock_getres
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 149525b5df1b..f5d3b8711e20 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -2208,7 +2208,7 @@ static int flic_ais_mode_get_all(struct kvm *kvm, struct kvm_device_attr *attr)
return -EINVAL;
if (!test_kvm_facility(kvm, 72))
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
mutex_lock(&fi->ais_lock);
ais.simm = fi->simm;
@@ -2517,7 +2517,7 @@ static int modify_ais_mode(struct kvm *kvm, struct kvm_device_attr *attr)
int ret = 0;
if (!test_kvm_facility(kvm, 72))
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
if (copy_from_user(&req, (void __user *)attr->addr, sizeof(req)))
return -EFAULT;
@@ -2597,7 +2597,7 @@ static int flic_ais_mode_set_all(struct kvm *kvm, struct kvm_device_attr *attr)
struct kvm_s390_ais_all ais;
if (!test_kvm_facility(kvm, 72))
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
if (copy_from_user(&ais, (void __user *)attr->addr, sizeof(ais)))
return -EFAULT;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 681deaf9f20a..41455c6caae8 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1927,6 +1927,9 @@ static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
start = slot + 1;
}
+ if (start >= slots->used_slots)
+ return slots->used_slots - 1;
+
if (gfn >= memslots[start].base_gfn &&
gfn < memslots[start].base_gfn + memslots[start].npages) {
atomic_set(&slots->lru_slot, start);
@@ -2865,9 +2868,7 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
vcpu->arch.sie_block->gcr[14] = CR14_UNUSED_32 |
CR14_UNUSED_33 |
CR14_EXTERNAL_DAMAGE_SUBMASK;
- /* make sure the new fpc will be lazily loaded */
- save_fpu_regs();
- current->thread.fpu.fpc = 0;
+ vcpu->run->s.regs.fpc = 0;
vcpu->arch.sie_block->gbea = 1;
vcpu->arch.sie_block->pp = 0;
vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
@@ -4352,7 +4353,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
switch (ioctl) {
case KVM_S390_STORE_STATUS:
idx = srcu_read_lock(&vcpu->kvm->srcu);
- r = kvm_s390_vcpu_store_status(vcpu, arg);
+ r = kvm_s390_store_status_unloaded(vcpu, arg);
srcu_read_unlock(&vcpu->kvm->srcu, idx);
break;
case KVM_S390_SET_INITIAL_PSW: {
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index 076090f9e666..4f6c22d72072 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -1202,6 +1202,7 @@ static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
scb_s->iprcc = PGM_ADDRESSING;
scb_s->pgmilc = 4;
scb_s->gpsw.addr = __rewind_psw(scb_s->gpsw, 4);
+ rc = 1;
}
return rc;
}
diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c
index c4f8039a35e8..0267405ab7c6 100644
--- a/arch/s390/lib/uaccess.c
+++ b/arch/s390/lib/uaccess.c
@@ -64,10 +64,13 @@ mm_segment_t enable_sacf_uaccess(void)
{
mm_segment_t old_fs;
unsigned long asce, cr;
+ unsigned long flags;
old_fs = current->thread.mm_segment;
if (old_fs & 1)
return old_fs;
+ /* protect against a concurrent page table upgrade */
+ local_irq_save(flags);
current->thread.mm_segment |= 1;
asce = S390_lowcore.kernel_asce;
if (likely(old_fs == USER_DS)) {
@@ -83,6 +86,7 @@ mm_segment_t enable_sacf_uaccess(void)
__ctl_load(asce, 7, 7);
set_cpu_flag(CIF_ASCE_SECONDARY);
}
+ local_irq_restore(flags);
return old_fs;
}
EXPORT_SYMBOL(enable_sacf_uaccess);
diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c
index 510a18299196..a51c892f14f3 100644
--- a/arch/s390/mm/cmm.c
+++ b/arch/s390/mm/cmm.c
@@ -298,16 +298,16 @@ static int cmm_timeout_handler(struct ctl_table *ctl, int write,
}
if (write) {
- len = *lenp;
- if (copy_from_user(buf, buffer,
- len > sizeof(buf) ? sizeof(buf) : len))
+ len = min(*lenp, sizeof(buf));
+ if (copy_from_user(buf, buffer, len))
return -EFAULT;
- buf[sizeof(buf) - 1] = '\0';
+ buf[len - 1] = '\0';
cmm_skip_blanks(buf, &p);
nr = simple_strtoul(p, &p, 0);
cmm_skip_blanks(p, &p);
seconds = simple_strtoul(p, &p, 0);
cmm_set_timeout(nr, seconds);
+ *ppos += *lenp;
} else {
len = sprintf(buf, "%ld %ld\n",
cmm_timeout_pages, cmm_timeout_seconds);
@@ -315,9 +315,9 @@ static int cmm_timeout_handler(struct ctl_table *ctl, int write,
len = *lenp;
if (copy_to_user(buffer, buf, len))
return -EFAULT;
+ *lenp = len;
+ *ppos += len;
}
- *lenp = len;
- *ppos += len;
return 0;
}
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 1e668b95e0c6..f41053055c6f 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -787,14 +787,18 @@ static void gmap_call_notifier(struct gmap *gmap, unsigned long start,
static inline unsigned long *gmap_table_walk(struct gmap *gmap,
unsigned long gaddr, int level)
{
+ const int asce_type = gmap->asce & _ASCE_TYPE_MASK;
unsigned long *table;
if ((gmap->asce & _ASCE_TYPE_MASK) + 4 < (level * 4))
return NULL;
if (gmap_is_shadow(gmap) && gmap->removed)
return NULL;
- if (gaddr & (-1UL << (31 + ((gmap->asce & _ASCE_TYPE_MASK) >> 2)*11)))
+
+ if (asce_type != _ASCE_TYPE_REGION1 &&
+ gaddr & (-1UL << (31 + (asce_type >> 2) * 11)))
return NULL;
+
table = gmap->table;
switch (gmap->asce & _ASCE_TYPE_MASK) {
case _ASCE_TYPE_REGION1:
@@ -1840,6 +1844,7 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,
goto out_free;
} else if (*table & _REGION_ENTRY_ORIGIN) {
rc = -EAGAIN; /* Race with shadow */
+ goto out_free;
}
crst_table_init(s_r3t, _REGION3_ENTRY_EMPTY);
/* mark as invalid as long as the parent table is not protected */
@@ -2480,23 +2485,36 @@ void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long bitmap[4],
}
EXPORT_SYMBOL_GPL(gmap_sync_dirty_log_pmd);
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static int thp_split_walk_pmd_entry(pmd_t *pmd, unsigned long addr,
+ unsigned long end, struct mm_walk *walk)
+{
+ struct vm_area_struct *vma = walk->vma;
+
+ split_huge_pmd(vma, pmd, addr);
+ return 0;
+}
+
+static const struct mm_walk_ops thp_split_walk_ops = {
+ .pmd_entry = thp_split_walk_pmd_entry,
+};
+
static inline void thp_split_mm(struct mm_struct *mm)
{
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
struct vm_area_struct *vma;
- unsigned long addr;
for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) {
- for (addr = vma->vm_start;
- addr < vma->vm_end;
- addr += PAGE_SIZE)
- follow_page(vma, addr, FOLL_SPLIT);
vma->vm_flags &= ~VM_HUGEPAGE;
vma->vm_flags |= VM_NOHUGEPAGE;
+ walk_page_vma(vma, &thp_split_walk_ops, NULL);
}
mm->def_flags |= VM_NOHUGEPAGE;
-#endif
}
+#else
+static inline void thp_split_mm(struct mm_struct *mm)
+{
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
/*
* Remove all empty zero pages from the mapping for lazy refaulting
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index b0246c705a19..ff8234bca56c 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -2,7 +2,7 @@
/*
* IBM System z Huge TLB Page Support for Kernel.
*
- * Copyright IBM Corp. 2007,2016
+ * Copyright IBM Corp. 2007,2020
* Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com>
*/
@@ -11,6 +11,9 @@
#include <linux/mm.h>
#include <linux/hugetlb.h>
+#include <linux/mman.h>
+#include <linux/sched/mm.h>
+#include <linux/security.h>
/*
* If the bit selected by single-bit bitmask "a" is set within "x", move
@@ -114,7 +117,7 @@ static inline pte_t __rste_to_pte(unsigned long rste)
_PAGE_YOUNG);
#ifdef CONFIG_MEM_SOFT_DIRTY
pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_SOFT_DIRTY,
- _PAGE_DIRTY);
+ _PAGE_SOFT_DIRTY);
#endif
pte_val(pte) |= move_set_bit(rste, _SEGMENT_ENTRY_NOEXEC,
_PAGE_NOEXEC);
@@ -156,10 +159,13 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
rste &= ~_SEGMENT_ENTRY_NOEXEC;
/* Set correct table type for 2G hugepages */
- if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
- rste |= _REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE;
- else
+ if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) {
+ if (likely(pte_present(pte)))
+ rste |= _REGION3_ENTRY_LARGE;
+ rste |= _REGION_ENTRY_TYPE_R3;
+ } else if (likely(pte_present(pte)))
rste |= _SEGMENT_ENTRY_LARGE;
+
clear_huge_pte_skeys(mm, rste);
pte_val(*ptep) = rste;
}
@@ -267,3 +273,98 @@ static __init int setup_hugepagesz(char *opt)
return 1;
}
__setup("hugepagesz=", setup_hugepagesz);
+
+static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file,
+ unsigned long addr, unsigned long len,
+ unsigned long pgoff, unsigned long flags)
+{
+ struct hstate *h = hstate_file(file);
+ struct vm_unmapped_area_info info;
+
+ info.flags = 0;
+ info.length = len;
+ info.low_limit = current->mm->mmap_base;
+ info.high_limit = TASK_SIZE;
+ info.align_mask = PAGE_MASK & ~huge_page_mask(h);
+ info.align_offset = 0;
+ return vm_unmapped_area(&info);
+}
+
+static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
+ unsigned long addr0, unsigned long len,
+ unsigned long pgoff, unsigned long flags)
+{
+ struct hstate *h = hstate_file(file);
+ struct vm_unmapped_area_info info;
+ unsigned long addr;
+
+ info.flags = VM_UNMAPPED_AREA_TOPDOWN;
+ info.length = len;
+ info.low_limit = max(PAGE_SIZE, mmap_min_addr);
+ info.high_limit = current->mm->mmap_base;
+ info.align_mask = PAGE_MASK & ~huge_page_mask(h);
+ info.align_offset = 0;
+ addr = vm_unmapped_area(&info);
+
+ /*
+ * A failed mmap() very likely causes application failure,
+ * so fall back to the bottom-up function here. This scenario
+ * can happen with large stack limits and large mmap()
+ * allocations.
+ */
+ if (addr & ~PAGE_MASK) {
+ VM_BUG_ON(addr != -ENOMEM);
+ info.flags = 0;
+ info.low_limit = TASK_UNMAPPED_BASE;
+ info.high_limit = TASK_SIZE;
+ addr = vm_unmapped_area(&info);
+ }
+
+ return addr;
+}
+
+unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
+ unsigned long len, unsigned long pgoff, unsigned long flags)
+{
+ struct hstate *h = hstate_file(file);
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+ int rc;
+
+ if (len & ~huge_page_mask(h))
+ return -EINVAL;
+ if (len > TASK_SIZE - mmap_min_addr)
+ return -ENOMEM;
+
+ if (flags & MAP_FIXED) {
+ if (prepare_hugepage_range(file, addr, len))
+ return -EINVAL;
+ goto check_asce_limit;
+ }
+
+ if (addr) {
+ addr = ALIGN(addr, huge_page_size(h));
+ vma = find_vma(mm, addr);
+ if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
+ (!vma || addr + len <= vm_start_gap(vma)))
+ goto check_asce_limit;
+ }
+
+ if (mm->get_unmapped_area == arch_get_unmapped_area)
+ addr = hugetlb_get_unmapped_area_bottomup(file, addr, len,
+ pgoff, flags);
+ else
+ addr = hugetlb_get_unmapped_area_topdown(file, addr, len,
+ pgoff, flags);
+ if (addr & ~PAGE_MASK)
+ return addr;
+
+check_asce_limit:
+ if (addr + len > current->mm->context.asce_limit &&
+ addr + len <= TASK_SIZE) {
+ rc = crst_table_upgrade(mm, addr + len);
+ if (rc)
+ return (unsigned long) rc;
+ }
+ return addr;
+}
diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c
index 818deeb1ebc3..b1cba90ce49c 100644
--- a/arch/s390/mm/maccess.c
+++ b/arch/s390/mm/maccess.c
@@ -69,7 +69,7 @@ void notrace s390_kernel_write(void *dst, const void *src, size_t size)
}
}
-static int __memcpy_real(void *dest, void *src, size_t count)
+static int __no_sanitize_address __memcpy_real(void *dest, void *src, size_t count)
{
register unsigned long _dest asm("2") = (unsigned long) dest;
register unsigned long _len1 asm("3") = (unsigned long) count;
@@ -90,19 +90,23 @@ static int __memcpy_real(void *dest, void *src, size_t count)
return rc;
}
-static unsigned long _memcpy_real(unsigned long dest, unsigned long src,
- unsigned long count)
+static unsigned long __no_sanitize_address _memcpy_real(unsigned long dest,
+ unsigned long src,
+ unsigned long count)
{
int irqs_disabled, rc;
unsigned long flags;
if (!count)
return 0;
- flags = __arch_local_irq_stnsm(0xf8UL);
+ flags = arch_local_irq_save();
irqs_disabled = arch_irqs_disabled_flags(flags);
if (!irqs_disabled)
trace_hardirqs_off();
+ __arch_local_irq_stnsm(0xf8); // disable DAT
rc = __memcpy_real((void *) dest, (void *) src, (size_t) count);
+ if (flags & PSW_MASK_DAT)
+ __arch_local_irq_stosm(0x04); // enable DAT
if (!irqs_disabled)
trace_hardirqs_on();
__arch_local_irq_ssm(flags);
@@ -114,9 +118,15 @@ static unsigned long _memcpy_real(unsigned long dest, unsigned long src,
*/
int memcpy_real(void *dest, void *src, size_t count)
{
- if (S390_lowcore.nodat_stack != 0)
- return CALL_ON_STACK(_memcpy_real, S390_lowcore.nodat_stack,
- 3, dest, src, count);
+ int rc;
+
+ if (S390_lowcore.nodat_stack != 0) {
+ preempt_disable();
+ rc = CALL_ON_STACK(_memcpy_real, S390_lowcore.nodat_stack, 3,
+ dest, src, count);
+ preempt_enable();
+ return rc;
+ }
/*
* This is a really early memcpy_real call, the stacks are
* not set up yet. Just call _memcpy_real on the early boot
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index 99e06213a22b..2b366abca8c1 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -72,8 +72,20 @@ static void __crst_table_upgrade(void *arg)
{
struct mm_struct *mm = arg;
- if (current->active_mm == mm)
- set_user_asce(mm);
+ /* we must change all active ASCEs to avoid the creation of new TLBs */
+ if (current->active_mm == mm) {
+ S390_lowcore.user_asce = mm->context.asce;
+ if (current->thread.mm_segment == USER_DS) {
+ __ctl_load(S390_lowcore.user_asce, 1, 1);
+ /* Mark user-ASCE present in CR1 */
+ clear_cpu_flag(CIF_ASCE_PRIMARY);
+ }
+ if (current->thread.mm_segment == USER_DS_SACF) {
+ __ctl_load(S390_lowcore.user_asce, 7, 7);
+ /* enable_sacf_uaccess does all or nothing */
+ WARN_ON(!test_cpu_flag(CIF_ASCE_SECONDARY));
+ }
+ }
__tlb_flush_local();
}
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index b403fa14847d..f810930aff42 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -415,6 +415,10 @@ void __init vmem_map_init(void)
SET_MEMORY_RO | SET_MEMORY_X);
__set_memory(__stext_dma, (__etext_dma - __stext_dma) >> PAGE_SHIFT,
SET_MEMORY_RO | SET_MEMORY_X);
+
+ /* we need lowcore executable for our LPSWE instructions */
+ set_memory_x(0, 1);
+
pr_info("Write protected kernel read-only data: %luk\n",
(unsigned long)(__end_rodata - _stext) >> 10);
}
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 86ca7f88fb22..42f360fa8833 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -423,7 +423,7 @@ static void zpci_map_resources(struct pci_dev *pdev)
if (static_branch_likely(&have_mio))
pdev->resource[i].start =
- (resource_size_t __force) zdev->bars[i].mio_wb;
+ (resource_size_t __force) zdev->bars[i].mio_wt;
else
pdev->resource[i].start =
(resource_size_t __force) pci_iomap(pdev, i, 0);
@@ -529,7 +529,7 @@ static int zpci_setup_bus_resources(struct zpci_dev *zdev,
flags |= IORESOURCE_MEM_64;
if (static_branch_likely(&have_mio))
- addr = (unsigned long) zdev->bars[i].mio_wb;
+ addr = (unsigned long) zdev->bars[i].mio_wt;
else
addr = ZPCI_ADDR(entry);
size = 1UL << zdev->bars[i].size;
@@ -889,8 +889,10 @@ static int __init pci_base_init(void)
if (!test_facility(69) || !test_facility(71))
return 0;
- if (test_facility(153) && !s390_pci_no_mio)
+ if (test_facility(153) && !s390_pci_no_mio) {
static_branch_enable(&have_mio);
+ ctl_set_bit(2, 5);
+ }
rc = zpci_debug_init();
if (rc)
@@ -931,5 +933,5 @@ subsys_initcall_sync(pci_base_init);
void zpci_rescan(void)
{
if (zpci_is_enabled())
- clp_rescan_pci_devices_simple();
+ clp_rescan_pci_devices_simple(NULL);
}
diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
index d03631dba7c2..64674b49d951 100644
--- a/arch/s390/pci/pci_clp.c
+++ b/arch/s390/pci/pci_clp.c
@@ -240,12 +240,14 @@ error:
}
/*
- * Enable/Disable a given PCI function defined by its function handle.
+ * Enable/Disable a given PCI function and update its function handle if
+ * necessary
*/
-static int clp_set_pci_fn(u32 *fh, u8 nr_dma_as, u8 command)
+static int clp_set_pci_fn(struct zpci_dev *zdev, u8 nr_dma_as, u8 command)
{
struct clp_req_rsp_set_pci *rrb;
int rc, retries = 100;
+ u32 fid = zdev->fid;
rrb = clp_alloc_block(GFP_KERNEL);
if (!rrb)
@@ -256,7 +258,7 @@ static int clp_set_pci_fn(u32 *fh, u8 nr_dma_as, u8 command)
rrb->request.hdr.len = sizeof(rrb->request);
rrb->request.hdr.cmd = CLP_SET_PCI_FN;
rrb->response.hdr.len = sizeof(rrb->response);
- rrb->request.fh = *fh;
+ rrb->request.fh = zdev->fh;
rrb->request.oc = command;
rrb->request.ndas = nr_dma_as;
@@ -269,12 +271,17 @@ static int clp_set_pci_fn(u32 *fh, u8 nr_dma_as, u8 command)
}
} while (rrb->response.hdr.rsp == CLP_RC_SETPCIFN_BUSY);
- if (!rc && rrb->response.hdr.rsp == CLP_RC_OK)
- *fh = rrb->response.fh;
- else {
+ if (rc || rrb->response.hdr.rsp != CLP_RC_OK) {
zpci_err("Set PCI FN:\n");
zpci_err_clp(rrb->response.hdr.rsp, rc);
- rc = -EIO;
+ }
+
+ if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) {
+ zdev->fh = rrb->response.fh;
+ } else if (!rc && rrb->response.hdr.rsp == CLP_RC_SETPCIFN_ALRDY &&
+ rrb->response.fh == 0) {
+ /* Function is already in desired state - update handle */
+ rc = clp_rescan_pci_devices_simple(&fid);
}
clp_free_block(rrb);
return rc;
@@ -282,18 +289,17 @@ static int clp_set_pci_fn(u32 *fh, u8 nr_dma_as, u8 command)
int clp_enable_fh(struct zpci_dev *zdev, u8 nr_dma_as)
{
- u32 fh = zdev->fh;
int rc;
- rc = clp_set_pci_fn(&fh, nr_dma_as, CLP_SET_ENABLE_PCI_FN);
- zpci_dbg(3, "ena fid:%x, fh:%x, rc:%d\n", zdev->fid, fh, rc);
+ rc = clp_set_pci_fn(zdev, nr_dma_as, CLP_SET_ENABLE_PCI_FN);
+ zpci_dbg(3, "ena fid:%x, fh:%x, rc:%d\n", zdev->fid, zdev->fh, rc);
if (rc)
goto out;
- zdev->fh = fh;
if (zdev->mio_capable) {
- rc = clp_set_pci_fn(&fh, nr_dma_as, CLP_SET_ENABLE_MIO);
- zpci_dbg(3, "ena mio fid:%x, fh:%x, rc:%d\n", zdev->fid, fh, rc);
+ rc = clp_set_pci_fn(zdev, nr_dma_as, CLP_SET_ENABLE_MIO);
+ zpci_dbg(3, "ena mio fid:%x, fh:%x, rc:%d\n",
+ zdev->fid, zdev->fh, rc);
if (rc)
clp_disable_fh(zdev);
}
@@ -303,17 +309,13 @@ out:
int clp_disable_fh(struct zpci_dev *zdev)
{
- u32 fh = zdev->fh;
int rc;
if (!zdev_enabled(zdev))
return 0;
- rc = clp_set_pci_fn(&fh, 0, CLP_SET_DISABLE_PCI_FN);
- zpci_dbg(3, "dis fid:%x, fh:%x, rc:%d\n", zdev->fid, fh, rc);
- if (!rc)
- zdev->fh = fh;
-
+ rc = clp_set_pci_fn(zdev, 0, CLP_SET_DISABLE_PCI_FN);
+ zpci_dbg(3, "dis fid:%x, fh:%x, rc:%d\n", zdev->fid, zdev->fh, rc);
return rc;
}
@@ -370,10 +372,14 @@ static void __clp_add(struct clp_fh_list_entry *entry, void *data)
static void __clp_update(struct clp_fh_list_entry *entry, void *data)
{
struct zpci_dev *zdev;
+ u32 *fid = data;
if (!entry->vendor_id)
return;
+ if (fid && *fid != entry->fid)
+ return;
+
zdev = get_zdev_by_fid(entry->fid);
if (!zdev)
return;
@@ -413,7 +419,10 @@ int clp_rescan_pci_devices(void)
return rc;
}
-int clp_rescan_pci_devices_simple(void)
+/* Rescan PCI functions and refresh function handles. If fid is non-NULL only
+ * refresh the handle of the function matching @fid
+ */
+int clp_rescan_pci_devices_simple(u32 *fid)
{
struct clp_req_rsp_list_pci *rrb;
int rc;
@@ -422,7 +431,7 @@ int clp_rescan_pci_devices_simple(void)
if (!rrb)
return -ENOMEM;
- rc = clp_list_pci(rrb, NULL, __clp_update);
+ rc = clp_list_pci(rrb, fid, __clp_update);
clp_free_block(rrb);
return rc;
diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c
index d80616ae8dd8..743f257cf2cb 100644
--- a/arch/s390/pci/pci_irq.c
+++ b/arch/s390/pci/pci_irq.c
@@ -115,7 +115,6 @@ static struct irq_chip zpci_irq_chip = {
.name = "PCI-MSI",
.irq_unmask = pci_msi_unmask_irq,
.irq_mask = pci_msi_mask_irq,
- .irq_set_affinity = zpci_set_irq_affinity,
};
static void zpci_handle_cpu_local_irq(bool rescan)
@@ -276,7 +275,9 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
rc = -EIO;
if (hwirq - bit >= msi_vecs)
break;
- irq = __irq_alloc_descs(-1, 0, 1, 0, THIS_MODULE, msi->affinity);
+ irq = __irq_alloc_descs(-1, 0, 1, 0, THIS_MODULE,
+ (irq_delivery == DIRECTED) ?
+ msi->affinity : NULL);
if (irq < 0)
return -ENOMEM;
rc = irq_set_msi_desc(irq, msi);
@@ -284,7 +285,7 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
return rc;
irq_set_chip_and_handler(irq, &zpci_irq_chip,
handle_percpu_irq);
- msg.data = hwirq;
+ msg.data = hwirq - bit;
if (irq_delivery == DIRECTED) {
msg.address_lo = zdev->msi_addr & 0xff0000ff;
msg.address_lo |= msi->affinity ?
diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c
index 7d42a8794f10..020a2c514d96 100644
--- a/arch/s390/pci/pci_mmio.c
+++ b/arch/s390/pci/pci_mmio.c
@@ -11,6 +11,113 @@
#include <linux/mm.h>
#include <linux/errno.h>
#include <linux/pci.h>
+#include <asm/pci_io.h>
+#include <asm/pci_debug.h>
+
+static inline void zpci_err_mmio(u8 cc, u8 status, u64 offset)
+{
+ struct {
+ u64 offset;
+ u8 cc;
+ u8 status;
+ } data = {offset, cc, status};
+
+ zpci_err_hex(&data, sizeof(data));
+}
+
+static inline int __pcistb_mio_inuser(
+ void __iomem *ioaddr, const void __user *src,
+ u64 len, u8 *status)
+{
+ int cc = -ENXIO;
+
+ asm volatile (
+ " sacf 256\n"
+ "0: .insn rsy,0xeb00000000d4,%[len],%[ioaddr],%[src]\n"
+ "1: ipm %[cc]\n"
+ " srl %[cc],28\n"
+ "2: sacf 768\n"
+ EX_TABLE(0b, 2b) EX_TABLE(1b, 2b)
+ : [cc] "+d" (cc), [len] "+d" (len)
+ : [ioaddr] "a" (ioaddr), [src] "Q" (*((u8 __force *)src))
+ : "cc", "memory");
+ *status = len >> 24 & 0xff;
+ return cc;
+}
+
+static inline int __pcistg_mio_inuser(
+ void __iomem *ioaddr, const void __user *src,
+ u64 ulen, u8 *status)
+{
+ register u64 addr asm("2") = (u64 __force) ioaddr;
+ register u64 len asm("3") = ulen;
+ int cc = -ENXIO;
+ u64 val = 0;
+ u64 cnt = ulen;
+ u8 tmp;
+
+ /*
+ * copy 0 < @len <= 8 bytes from @src into the right most bytes of
+ * a register, then store it to PCI at @ioaddr while in secondary
+ * address space. pcistg then uses the user mappings.
+ */
+ asm volatile (
+ " sacf 256\n"
+ "0: llgc %[tmp],0(%[src])\n"
+ " sllg %[val],%[val],8\n"
+ " aghi %[src],1\n"
+ " ogr %[val],%[tmp]\n"
+ " brctg %[cnt],0b\n"
+ "1: .insn rre,0xb9d40000,%[val],%[ioaddr]\n"
+ "2: ipm %[cc]\n"
+ " srl %[cc],28\n"
+ "3: sacf 768\n"
+ EX_TABLE(0b, 3b) EX_TABLE(1b, 3b) EX_TABLE(2b, 3b)
+ :
+ [src] "+a" (src), [cnt] "+d" (cnt),
+ [val] "+d" (val), [tmp] "=d" (tmp),
+ [len] "+d" (len), [cc] "+d" (cc),
+ [ioaddr] "+a" (addr)
+ :: "cc", "memory");
+ *status = len >> 24 & 0xff;
+
+ /* did we read everything from user memory? */
+ if (!cc && cnt != 0)
+ cc = -EFAULT;
+
+ return cc;
+}
+
+static inline int __memcpy_toio_inuser(void __iomem *dst,
+ const void __user *src, size_t n)
+{
+ int size, rc = 0;
+ u8 status = 0;
+ mm_segment_t old_fs;
+
+ if (!src)
+ return -EINVAL;
+
+ old_fs = enable_sacf_uaccess();
+ while (n > 0) {
+ size = zpci_get_max_write_size((u64 __force) dst,
+ (u64 __force) src, n,
+ ZPCI_MAX_WRITE_SIZE);
+ if (size > 8) /* main path */
+ rc = __pcistb_mio_inuser(dst, src, size, &status);
+ else
+ rc = __pcistg_mio_inuser(dst, src, size, &status);
+ if (rc)
+ break;
+ src += size;
+ dst += size;
+ n -= size;
+ }
+ disable_sacf_uaccess(old_fs);
+ if (rc)
+ zpci_err_mmio(rc, status, (__force u64) dst);
+ return rc;
+}
static long get_pfn(unsigned long user_addr, unsigned long access,
unsigned long *pfn)
@@ -46,6 +153,20 @@ SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr,
if (length <= 0 || PAGE_SIZE - (mmio_addr & ~PAGE_MASK) < length)
return -EINVAL;
+
+ /*
+ * Only support read access to MIO capable devices on a MIO enabled
+ * system. Otherwise we would have to check for every address if it is
+ * a special ZPCI_ADDR and we would have to do a get_pfn() which we
+ * don't need for MIO capable devices.
+ */
+ if (static_branch_likely(&have_mio)) {
+ ret = __memcpy_toio_inuser((void __iomem *) mmio_addr,
+ user_buffer,
+ length);
+ return ret;
+ }
+
if (length > 64) {
buf = kmalloc(length, GFP_KERNEL);
if (!buf)
@@ -56,7 +177,8 @@ SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr,
ret = get_pfn(mmio_addr, VM_WRITE, &pfn);
if (ret)
goto out;
- io_addr = (void __iomem *)((pfn << PAGE_SHIFT) | (mmio_addr & ~PAGE_MASK));
+ io_addr = (void __iomem *)((pfn << PAGE_SHIFT) |
+ (mmio_addr & ~PAGE_MASK));
ret = -EFAULT;
if ((unsigned long) io_addr < ZPCI_IOMAP_ADDR_BASE)
@@ -72,6 +194,78 @@ out:
return ret;
}
+static inline int __pcilg_mio_inuser(
+ void __user *dst, const void __iomem *ioaddr,
+ u64 ulen, u8 *status)
+{
+ register u64 addr asm("2") = (u64 __force) ioaddr;
+ register u64 len asm("3") = ulen;
+ u64 cnt = ulen;
+ int shift = ulen * 8;
+ int cc = -ENXIO;
+ u64 val, tmp;
+
+ /*
+ * read 0 < @len <= 8 bytes from the PCI memory mapped at @ioaddr (in
+ * user space) into a register using pcilg then store these bytes at
+ * user address @dst
+ */
+ asm volatile (
+ " sacf 256\n"
+ "0: .insn rre,0xb9d60000,%[val],%[ioaddr]\n"
+ "1: ipm %[cc]\n"
+ " srl %[cc],28\n"
+ " ltr %[cc],%[cc]\n"
+ " jne 4f\n"
+ "2: ahi %[shift],-8\n"
+ " srlg %[tmp],%[val],0(%[shift])\n"
+ "3: stc %[tmp],0(%[dst])\n"
+ " aghi %[dst],1\n"
+ " brctg %[cnt],2b\n"
+ "4: sacf 768\n"
+ EX_TABLE(0b, 4b) EX_TABLE(1b, 4b) EX_TABLE(3b, 4b)
+ :
+ [cc] "+d" (cc), [val] "=d" (val), [len] "+d" (len),
+ [dst] "+a" (dst), [cnt] "+d" (cnt), [tmp] "=d" (tmp),
+ [shift] "+d" (shift)
+ :
+ [ioaddr] "a" (addr)
+ : "cc", "memory");
+
+ /* did we write everything to the user space buffer? */
+ if (!cc && cnt != 0)
+ cc = -EFAULT;
+
+ *status = len >> 24 & 0xff;
+ return cc;
+}
+
+static inline int __memcpy_fromio_inuser(void __user *dst,
+ const void __iomem *src,
+ unsigned long n)
+{
+ int size, rc = 0;
+ u8 status;
+ mm_segment_t old_fs;
+
+ old_fs = enable_sacf_uaccess();
+ while (n > 0) {
+ size = zpci_get_max_write_size((u64 __force) src,
+ (u64 __force) dst, n,
+ ZPCI_MAX_READ_SIZE);
+ rc = __pcilg_mio_inuser(dst, src, size, &status);
+ if (rc)
+ break;
+ src += size;
+ dst += size;
+ n -= size;
+ }
+ disable_sacf_uaccess(old_fs);
+ if (rc)
+ zpci_err_mmio(rc, status, (__force u64) dst);
+ return rc;
+}
+
SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr,
void __user *, user_buffer, size_t, length)
{
@@ -86,12 +280,27 @@ SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr,
if (length <= 0 || PAGE_SIZE - (mmio_addr & ~PAGE_MASK) < length)
return -EINVAL;
+
+ /*
+ * Only support write access to MIO capable devices on a MIO enabled
+ * system. Otherwise we would have to check for every address if it is
+ * a special ZPCI_ADDR and we would have to do a get_pfn() which we
+ * don't need for MIO capable devices.
+ */
+ if (static_branch_likely(&have_mio)) {
+ ret = __memcpy_fromio_inuser(
+ user_buffer, (const void __iomem *)mmio_addr,
+ length);
+ return ret;
+ }
+
if (length > 64) {
buf = kmalloc(length, GFP_KERNEL);
if (!buf)
return -ENOMEM;
- } else
+ } else {
buf = local_buf;
+ }
ret = get_pfn(mmio_addr, VM_READ, &pfn);
if (ret)
diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c
index 430c14b006d1..0e11fc023fe7 100644
--- a/arch/s390/pci/pci_sysfs.c
+++ b/arch/s390/pci/pci_sysfs.c
@@ -13,6 +13,8 @@
#include <linux/stat.h>
#include <linux/pci.h>
+#include "../../../drivers/pci/pci.h"
+
#include <asm/sclp.h>
#define zpci_attr(name, fmt, member) \
@@ -40,31 +42,50 @@ zpci_attr(segment3, "0x%02x\n", pfip[3]);
static ssize_t recover_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t count)
{
+ struct kernfs_node *kn;
struct pci_dev *pdev = to_pci_dev(dev);
struct zpci_dev *zdev = to_zpci(pdev);
- int ret;
-
- if (!device_remove_file_self(dev, attr))
- return count;
-
+ int ret = 0;
+
+ /* Can't use device_remove_self() here as that would lead us to lock
+ * the pci_rescan_remove_lock while holding the device' kernfs lock.
+ * This would create a possible deadlock with disable_slot() which is
+ * not directly protected by the device' kernfs lock but takes it
+ * during the device removal which happens under
+ * pci_rescan_remove_lock.
+ *
+ * This is analogous to sdev_store_delete() in
+ * drivers/scsi/scsi_sysfs.c
+ */
+ kn = sysfs_break_active_protection(&dev->kobj, &attr->attr);
+ WARN_ON_ONCE(!kn);
+ /* device_remove_file() serializes concurrent calls ignoring all but
+ * the first
+ */
+ device_remove_file(dev, attr);
+
+ /* A concurrent call to recover_store() may slip between
+ * sysfs_break_active_protection() and the sysfs file removal.
+ * Once it unblocks from pci_lock_rescan_remove() the original pdev
+ * will already be removed.
+ */
pci_lock_rescan_remove();
- pci_stop_and_remove_bus_device(pdev);
- ret = zpci_disable_device(zdev);
- if (ret)
- goto error;
-
- ret = zpci_enable_device(zdev);
- if (ret)
- goto error;
-
- pci_rescan_bus(zdev->bus);
+ if (pci_dev_is_added(pdev)) {
+ pci_stop_and_remove_bus_device(pdev);
+ ret = zpci_disable_device(zdev);
+ if (ret)
+ goto out;
+
+ ret = zpci_enable_device(zdev);
+ if (ret)
+ goto out;
+ pci_rescan_bus(zdev->bus);
+ }
+out:
pci_unlock_rescan_remove();
-
- return count;
-
-error:
- pci_unlock_rescan_remove();
- return ret;
+ if (kn)
+ sysfs_unbreak_active_protection(kn);
+ return ret ? ret : count;
}
static DEVICE_ATTR_WO(recover);
diff --git a/arch/s390/purgatory/Makefile b/arch/s390/purgatory/Makefile
index dc1ae4ff79d7..ff318acb3d77 100644
--- a/arch/s390/purgatory/Makefile
+++ b/arch/s390/purgatory/Makefile
@@ -13,8 +13,10 @@ $(obj)/sha256.o: $(srctree)/lib/sha256.c FORCE
$(obj)/mem.o: $(srctree)/arch/s390/lib/mem.S FORCE
$(call if_changed_rule,as_o_S)
-$(obj)/string.o: $(srctree)/arch/s390/lib/string.c FORCE
- $(call if_changed_rule,cc_o_c)
+KCOV_INSTRUMENT := n
+GCOV_PROFILE := n
+UBSAN_SANITIZE := n
+KASAN_SANITIZE := n
KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes
KBUILD_CFLAGS += -Wno-pointer-sign -Wno-sign-compare
diff --git a/arch/s390/purgatory/string.c b/arch/s390/purgatory/string.c
new file mode 100644
index 000000000000..c98c22a72db7
--- /dev/null
+++ b/arch/s390/purgatory/string.c
@@ -0,0 +1,3 @@
+// SPDX-License-Identifier: GPL-2.0
+#define __HAVE_ARCH_MEMCMP /* arch function */
+#include "../lib/string.c"