summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/devicetree/bindings/Makefile6
-rw-r--r--arch/c6x/include/asm/Kbuild1
-rw-r--r--arch/c6x/include/uapi/asm/Kbuild1
-rw-r--r--arch/h8300/include/asm/Kbuild1
-rw-r--r--arch/h8300/include/uapi/asm/Kbuild1
-rw-r--r--arch/hexagon/include/asm/Kbuild1
-rw-r--r--arch/hexagon/include/uapi/asm/Kbuild1
-rw-r--r--arch/m68k/include/asm/Kbuild1
-rw-r--r--arch/m68k/include/uapi/asm/Kbuild1
-rw-r--r--arch/microblaze/include/asm/Kbuild1
-rw-r--r--arch/microblaze/include/uapi/asm/Kbuild1
-rw-r--r--arch/openrisc/include/asm/Kbuild1
-rw-r--r--arch/openrisc/include/uapi/asm/Kbuild1
-rw-r--r--arch/riscv/Kconfig2
-rw-r--r--arch/riscv/configs/defconfig8
-rw-r--r--arch/riscv/include/asm/page.h2
-rw-r--r--arch/riscv/include/asm/processor.h2
-rw-r--r--arch/riscv/kernel/asm-offsets.c1
-rw-r--r--arch/riscv/kernel/entry.S18
-rw-r--r--arch/riscv/kernel/setup.c2
-rw-r--r--arch/riscv/kernel/smpboot.c6
-rw-r--r--arch/riscv/mm/init.c3
-rw-r--r--arch/unicore32/include/asm/Kbuild1
-rw-r--r--arch/unicore32/include/uapi/asm/Kbuild1
-rw-r--r--arch/x86/include/asm/page_64_types.h4
-rw-r--r--block/blk-core.c6
-rw-r--r--block/blk-flush.c2
-rw-r--r--drivers/ide/ide-atapi.c9
-rw-r--r--drivers/ide/ide-io.c61
-rw-r--r--drivers/ide/ide-park.c2
-rw-r--r--drivers/ide/ide-probe.c23
-rw-r--r--drivers/md/raid5-cache.c33
-rw-r--r--drivers/md/raid5.c8
-rw-r--r--drivers/s390/scsi/zfcp_aux.c1
-rw-r--r--drivers/s390/scsi/zfcp_scsi.c2
-rw-r--r--drivers/scsi/53c700.c2
-rw-r--r--drivers/scsi/bnx2fc/bnx2fc_io.c4
-rw-r--r--drivers/scsi/libfc/fc_lport.c6
-rw-r--r--drivers/scsi/scsi_debug.c41
-rw-r--r--drivers/tty/serial/earlycon-riscv-sbi.c13
-rw-r--r--fs/autofs/expire.c3
-rw-r--r--fs/autofs/inode.c4
-rw-r--r--fs/btrfs/ctree.c78
-rw-r--r--fs/btrfs/super.c3
-rw-r--r--fs/btrfs/transaction.c24
-rw-r--r--fs/btrfs/volumes.c4
-rw-r--r--fs/drop_caches.c8
-rw-r--r--fs/proc/generic.c4
-rw-r--r--fs/proc/internal.h1
-rw-r--r--fs/proc/proc_net.c20
-rw-r--r--include/linux/ide.h2
-rw-r--r--include/linux/memory_hotplug.h18
-rw-r--r--include/linux/sched/coredump.h1
-rw-r--r--init/Kconfig13
-rw-r--r--kernel/exit.c12
-rw-r--r--kernel/sched/psi.c21
-rw-r--r--kernel/workqueue.c23
-rw-r--r--kernel/workqueue_internal.h6
-rw-r--r--lib/test_kmod.c2
-rw-r--r--mm/hugetlb.c3
-rw-r--r--mm/kasan/Makefile1
-rw-r--r--mm/memory-failure.c3
-rw-r--r--mm/memory_hotplug.c62
-rw-r--r--mm/migrate.c12
-rw-r--r--mm/oom_kill.c12
-rw-r--r--tools/testing/selftests/proc/.gitignore1
-rw-r--r--tools/testing/selftests/proc/Makefile1
-rw-r--r--tools/testing/selftests/proc/setns-dcache.c129
68 files changed, 540 insertions, 212 deletions
diff --git a/Documentation/devicetree/bindings/Makefile b/Documentation/devicetree/bindings/Makefile
index 6e5cef0ed6fb..50daa0b3b032 100644
--- a/Documentation/devicetree/bindings/Makefile
+++ b/Documentation/devicetree/bindings/Makefile
@@ -17,7 +17,11 @@ extra-y += $(DT_TMP_SCHEMA)
quiet_cmd_mk_schema = SCHEMA $@
cmd_mk_schema = $(DT_MK_SCHEMA) $(DT_MK_SCHEMA_FLAGS) -o $@ $(filter-out FORCE, $^)
-DT_DOCS = $(shell cd $(srctree)/$(src) && find * -name '*.yaml')
+DT_DOCS = $(shell \
+ cd $(srctree)/$(src) && \
+ find * \( -name '*.yaml' ! -name $(DT_TMP_SCHEMA) \) \
+ )
+
DT_SCHEMA_FILES ?= $(addprefix $(src)/,$(DT_DOCS))
extra-y += $(patsubst $(src)/%.yaml,%.example.dts, $(DT_SCHEMA_FILES))
diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild
index 33a2c94fed0d..63b4a1705182 100644
--- a/arch/c6x/include/asm/Kbuild
+++ b/arch/c6x/include/asm/Kbuild
@@ -30,6 +30,7 @@ generic-y += pgalloc.h
generic-y += preempt.h
generic-y += segment.h
generic-y += serial.h
+generic-y += shmparam.h
generic-y += tlbflush.h
generic-y += topology.h
generic-y += trace_clock.h
diff --git a/arch/c6x/include/uapi/asm/Kbuild b/arch/c6x/include/uapi/asm/Kbuild
index 6c6f6301012e..0febf1a07c30 100644
--- a/arch/c6x/include/uapi/asm/Kbuild
+++ b/arch/c6x/include/uapi/asm/Kbuild
@@ -1,5 +1,4 @@
include include/uapi/asm-generic/Kbuild.asm
generic-y += kvm_para.h
-generic-y += shmparam.h
generic-y += ucontext.h
diff --git a/arch/h8300/include/asm/Kbuild b/arch/h8300/include/asm/Kbuild
index cd400d353d18..961c1dc064e1 100644
--- a/arch/h8300/include/asm/Kbuild
+++ b/arch/h8300/include/asm/Kbuild
@@ -40,6 +40,7 @@ generic-y += preempt.h
generic-y += scatterlist.h
generic-y += sections.h
generic-y += serial.h
+generic-y += shmparam.h
generic-y += sizes.h
generic-y += spinlock.h
generic-y += timex.h
diff --git a/arch/h8300/include/uapi/asm/Kbuild b/arch/h8300/include/uapi/asm/Kbuild
index 6c6f6301012e..0febf1a07c30 100644
--- a/arch/h8300/include/uapi/asm/Kbuild
+++ b/arch/h8300/include/uapi/asm/Kbuild
@@ -1,5 +1,4 @@
include include/uapi/asm-generic/Kbuild.asm
generic-y += kvm_para.h
-generic-y += shmparam.h
generic-y += ucontext.h
diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild
index 47c4da3d64a4..b25fd42aa0f4 100644
--- a/arch/hexagon/include/asm/Kbuild
+++ b/arch/hexagon/include/asm/Kbuild
@@ -30,6 +30,7 @@ generic-y += rwsem.h
generic-y += sections.h
generic-y += segment.h
generic-y += serial.h
+generic-y += shmparam.h
generic-y += sizes.h
generic-y += topology.h
generic-y += trace_clock.h
diff --git a/arch/hexagon/include/uapi/asm/Kbuild b/arch/hexagon/include/uapi/asm/Kbuild
index 61d955c1747a..c1b06dcf6cf8 100644
--- a/arch/hexagon/include/uapi/asm/Kbuild
+++ b/arch/hexagon/include/uapi/asm/Kbuild
@@ -1,4 +1,3 @@
include include/uapi/asm-generic/Kbuild.asm
-generic-y += shmparam.h
generic-y += ucontext.h
diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild
index 9f1dd26903e3..95f8f631c4df 100644
--- a/arch/m68k/include/asm/Kbuild
+++ b/arch/m68k/include/asm/Kbuild
@@ -20,6 +20,7 @@ generic-y += mm-arch-hooks.h
generic-y += percpu.h
generic-y += preempt.h
generic-y += sections.h
+generic-y += shmparam.h
generic-y += spinlock.h
generic-y += topology.h
generic-y += trace_clock.h
diff --git a/arch/m68k/include/uapi/asm/Kbuild b/arch/m68k/include/uapi/asm/Kbuild
index b8b3525271fa..960bf1e4be53 100644
--- a/arch/m68k/include/uapi/asm/Kbuild
+++ b/arch/m68k/include/uapi/asm/Kbuild
@@ -2,4 +2,3 @@ include include/uapi/asm-generic/Kbuild.asm
generated-y += unistd_32.h
generic-y += kvm_para.h
-generic-y += shmparam.h
diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild
index 9c7d1d25bf3d..791cc8d54d0a 100644
--- a/arch/microblaze/include/asm/Kbuild
+++ b/arch/microblaze/include/asm/Kbuild
@@ -26,6 +26,7 @@ generic-y += parport.h
generic-y += percpu.h
generic-y += preempt.h
generic-y += serial.h
+generic-y += shmparam.h
generic-y += syscalls.h
generic-y += topology.h
generic-y += trace_clock.h
diff --git a/arch/microblaze/include/uapi/asm/Kbuild b/arch/microblaze/include/uapi/asm/Kbuild
index 28823e3db825..97823ec46e97 100644
--- a/arch/microblaze/include/uapi/asm/Kbuild
+++ b/arch/microblaze/include/uapi/asm/Kbuild
@@ -2,5 +2,4 @@ include include/uapi/asm-generic/Kbuild.asm
generated-y += unistd_32.h
generic-y += kvm_para.h
-generic-y += shmparam.h
generic-y += ucontext.h
diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild
index eb87cd8327c8..1f04844b6b82 100644
--- a/arch/openrisc/include/asm/Kbuild
+++ b/arch/openrisc/include/asm/Kbuild
@@ -34,6 +34,7 @@ generic-y += qrwlock_types.h
generic-y += qrwlock.h
generic-y += sections.h
generic-y += segment.h
+generic-y += shmparam.h
generic-y += string.h
generic-y += switch_to.h
generic-y += topology.h
diff --git a/arch/openrisc/include/uapi/asm/Kbuild b/arch/openrisc/include/uapi/asm/Kbuild
index 6c6f6301012e..0febf1a07c30 100644
--- a/arch/openrisc/include/uapi/asm/Kbuild
+++ b/arch/openrisc/include/uapi/asm/Kbuild
@@ -1,5 +1,4 @@
include include/uapi/asm-generic/Kbuild.asm
generic-y += kvm_para.h
-generic-y += shmparam.h
generic-y += ucontext.h
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index feeeaa60697c..515fc3cc9687 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -103,7 +103,7 @@ choice
prompt "Base ISA"
default ARCH_RV64I
help
- This selects the base ISA that this kernel will traget and must match
+ This selects the base ISA that this kernel will target and must match
the target platform.
config ARCH_RV32I
diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig
index f399659d3b8d..2fd3461e50ab 100644
--- a/arch/riscv/configs/defconfig
+++ b/arch/riscv/configs/defconfig
@@ -13,8 +13,6 @@ CONFIG_BLK_DEV_INITRD=y
CONFIG_EXPERT=y
CONFIG_BPF_SYSCALL=y
CONFIG_SMP=y
-CONFIG_PCI=y
-CONFIG_PCIE_XILINX=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
CONFIG_NET=y
@@ -28,6 +26,10 @@ CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
CONFIG_IP_PNP_RARP=y
CONFIG_NETLINK_DIAG=y
+CONFIG_PCI=y
+CONFIG_PCIEPORTBUS=y
+CONFIG_PCI_HOST_GENERIC=y
+CONFIG_PCIE_XILINX=y
CONFIG_DEVTMPFS=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_VIRTIO_BLK=y
@@ -63,7 +65,6 @@ CONFIG_USB_STORAGE=y
CONFIG_USB_UAS=y
CONFIG_VIRTIO_MMIO=y
CONFIG_SIFIVE_PLIC=y
-CONFIG_RAS=y
CONFIG_EXT4_FS=y
CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_AUTOFS4_FS=y
@@ -77,5 +78,6 @@ CONFIG_NFS_V4_1=y
CONFIG_NFS_V4_2=y
CONFIG_ROOT_NFS=y
CONFIG_CRYPTO_USER_API_HASH=y
+CONFIG_CRYPTO_DEV_VIRTIO=y
CONFIG_PRINTK_TIME=y
# CONFIG_RCU_TRACE is not set
diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
index 06cfbb3aacbb..2a546a52f02a 100644
--- a/arch/riscv/include/asm/page.h
+++ b/arch/riscv/include/asm/page.h
@@ -80,7 +80,7 @@ typedef struct page *pgtable_t;
#define __pgd(x) ((pgd_t) { (x) })
#define __pgprot(x) ((pgprot_t) { (x) })
-#ifdef CONFIG_64BITS
+#ifdef CONFIG_64BIT
#define PTE_FMT "%016lx"
#else
#define PTE_FMT "%08lx"
diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h
index 0531f49af5c3..ce70bceb8872 100644
--- a/arch/riscv/include/asm/processor.h
+++ b/arch/riscv/include/asm/processor.h
@@ -22,7 +22,7 @@
* This decides where the kernel will search for a free chunk of vm
* space during mmap's.
*/
-#define TASK_UNMAPPED_BASE PAGE_ALIGN(TASK_SIZE >> 1)
+#define TASK_UNMAPPED_BASE PAGE_ALIGN(TASK_SIZE / 3)
#define STACK_TOP TASK_SIZE
#define STACK_TOP_MAX STACK_TOP
diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c
index 6a92a2fe198e..dac98348c6a3 100644
--- a/arch/riscv/kernel/asm-offsets.c
+++ b/arch/riscv/kernel/asm-offsets.c
@@ -39,6 +39,7 @@ void asm_offsets(void)
OFFSET(TASK_STACK, task_struct, stack);
OFFSET(TASK_TI, task_struct, thread_info);
OFFSET(TASK_TI_FLAGS, task_struct, thread_info.flags);
+ OFFSET(TASK_TI_PREEMPT_COUNT, task_struct, thread_info.preempt_count);
OFFSET(TASK_TI_KERNEL_SP, task_struct, thread_info.kernel_sp);
OFFSET(TASK_TI_USER_SP, task_struct, thread_info.user_sp);
OFFSET(TASK_TI_CPU, task_struct, thread_info.cpu);
diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index 355166f57205..fd9b57c8b4ce 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -144,6 +144,10 @@ _save_context:
REG_L x2, PT_SP(sp)
.endm
+#if !IS_ENABLED(CONFIG_PREEMPT)
+.set resume_kernel, restore_all
+#endif
+
ENTRY(handle_exception)
SAVE_ALL
@@ -228,7 +232,7 @@ ret_from_exception:
REG_L s0, PT_SSTATUS(sp)
csrc sstatus, SR_SIE
andi s0, s0, SR_SPP
- bnez s0, restore_all
+ bnez s0, resume_kernel
resume_userspace:
/* Interrupts must be disabled here so flags are checked atomically */
@@ -250,6 +254,18 @@ restore_all:
RESTORE_ALL
sret
+#if IS_ENABLED(CONFIG_PREEMPT)
+resume_kernel:
+ REG_L s0, TASK_TI_PREEMPT_COUNT(tp)
+ bnez s0, restore_all
+need_resched:
+ REG_L s0, TASK_TI_FLAGS(tp)
+ andi s0, s0, _TIF_NEED_RESCHED
+ beqz s0, restore_all
+ call preempt_schedule_irq
+ j need_resched
+#endif
+
work_pending:
/* Enter slow path for supplementary processing */
la ra, ret_from_exception
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index 6e079e94b638..77564310235f 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -181,7 +181,7 @@ static void __init setup_bootmem(void)
BUG_ON(mem_size == 0);
set_max_mapnr(PFN_DOWN(mem_size));
- max_low_pfn = memblock_end_of_DRAM();
+ max_low_pfn = PFN_DOWN(memblock_end_of_DRAM());
#ifdef CONFIG_BLK_DEV_INITRD
setup_initrd();
diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c
index fc185ecabb0a..18cda0e8cf94 100644
--- a/arch/riscv/kernel/smpboot.c
+++ b/arch/riscv/kernel/smpboot.c
@@ -57,15 +57,12 @@ void __init setup_smp(void)
while ((dn = of_find_node_by_type(dn, "cpu"))) {
hart = riscv_of_processor_hartid(dn);
- if (hart < 0) {
- of_node_put(dn);
+ if (hart < 0)
continue;
- }
if (hart == cpuid_to_hartid_map(0)) {
BUG_ON(found_boot_cpu);
found_boot_cpu = 1;
- of_node_put(dn);
continue;
}
@@ -73,7 +70,6 @@ void __init setup_smp(void)
set_cpu_possible(cpuid, true);
set_cpu_present(cpuid, true);
cpuid++;
- of_node_put(dn);
}
BUG_ON(!found_boot_cpu);
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 1d9bfaff60bc..658ebf645f42 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -28,7 +28,8 @@ static void __init zone_sizes_init(void)
unsigned long max_zone_pfns[MAX_NR_ZONES] = { 0, };
#ifdef CONFIG_ZONE_DMA32
- max_zone_pfns[ZONE_DMA32] = PFN_DOWN(min(4UL * SZ_1G, max_low_pfn));
+ max_zone_pfns[ZONE_DMA32] = PFN_DOWN(min(4UL * SZ_1G,
+ (unsigned long) PFN_PHYS(max_low_pfn)));
#endif
max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
diff --git a/arch/unicore32/include/asm/Kbuild b/arch/unicore32/include/asm/Kbuild
index 1372553dc0a9..1d1544b6ca74 100644
--- a/arch/unicore32/include/asm/Kbuild
+++ b/arch/unicore32/include/asm/Kbuild
@@ -28,6 +28,7 @@ generic-y += preempt.h
generic-y += sections.h
generic-y += segment.h
generic-y += serial.h
+generic-y += shmparam.h
generic-y += sizes.h
generic-y += syscalls.h
generic-y += topology.h
diff --git a/arch/unicore32/include/uapi/asm/Kbuild b/arch/unicore32/include/uapi/asm/Kbuild
index 6c6f6301012e..0febf1a07c30 100644
--- a/arch/unicore32/include/uapi/asm/Kbuild
+++ b/arch/unicore32/include/uapi/asm/Kbuild
@@ -1,5 +1,4 @@
include include/uapi/asm-generic/Kbuild.asm
generic-y += kvm_para.h
-generic-y += shmparam.h
generic-y += ucontext.h
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index 8f657286d599..0ce558a8150d 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -7,7 +7,11 @@
#endif
#ifdef CONFIG_KASAN
+#ifdef CONFIG_KASAN_EXTRA
+#define KASAN_STACK_ORDER 2
+#else
#define KASAN_STACK_ORDER 1
+#endif
#else
#define KASAN_STACK_ORDER 0
#endif
diff --git a/block/blk-core.c b/block/blk-core.c
index 3c5f61ceeb67..6b78ec56a4f2 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -462,6 +462,10 @@ static void blk_rq_timed_out_timer(struct timer_list *t)
kblockd_schedule_work(&q->timeout_work);
}
+static void blk_timeout_work(struct work_struct *work)
+{
+}
+
/**
* blk_alloc_queue_node - allocate a request queue
* @gfp_mask: memory allocation flags
@@ -505,7 +509,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
timer_setup(&q->backing_dev_info->laptop_mode_wb_timer,
laptop_mode_timer_fn, 0);
timer_setup(&q->timeout, blk_rq_timed_out_timer, 0);
- INIT_WORK(&q->timeout_work, NULL);
+ INIT_WORK(&q->timeout_work, blk_timeout_work);
INIT_LIST_HEAD(&q->icq_list);
#ifdef CONFIG_BLK_CGROUP
INIT_LIST_HEAD(&q->blkg_list);
diff --git a/block/blk-flush.c b/block/blk-flush.c
index a3fc7191c694..6e0f2d97fc6d 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -335,7 +335,7 @@ static void mq_flush_data_end_io(struct request *rq, blk_status_t error)
blk_flush_complete_seq(rq, fq, REQ_FSEQ_DATA, error);
spin_unlock_irqrestore(&fq->mq_flush_lock, flags);
- blk_mq_run_hw_queue(hctx, true);
+ blk_mq_sched_restart(hctx);
}
/**
diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index da58020a144e..33a28cde126c 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c
@@ -235,21 +235,28 @@ EXPORT_SYMBOL_GPL(ide_prep_sense);
int ide_queue_sense_rq(ide_drive_t *drive, void *special)
{
- struct request *sense_rq = drive->sense_rq;
+ ide_hwif_t *hwif = drive->hwif;
+ struct request *sense_rq;
+ unsigned long flags;
+
+ spin_lock_irqsave(&hwif->lock, flags);
/* deferred failure from ide_prep_sense() */
if (!drive->sense_rq_armed) {
printk(KERN_WARNING PFX "%s: error queuing a sense request\n",
drive->name);
+ spin_unlock_irqrestore(&hwif->lock, flags);
return -ENOMEM;
}
+ sense_rq = drive->sense_rq;
ide_req(sense_rq)->special = special;
drive->sense_rq_armed = false;
drive->hwif->rq = NULL;
ide_insert_request_head(drive, sense_rq);
+ spin_unlock_irqrestore(&hwif->lock, flags);
return 0;
}
EXPORT_SYMBOL_GPL(ide_queue_sense_rq);
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 8445b484ae69..b137f27a34d5 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -68,8 +68,10 @@ int ide_end_rq(ide_drive_t *drive, struct request *rq, blk_status_t error,
}
if (!blk_update_request(rq, error, nr_bytes)) {
- if (rq == drive->sense_rq)
+ if (rq == drive->sense_rq) {
drive->sense_rq = NULL;
+ drive->sense_rq_active = false;
+ }
__blk_mq_end_request(rq, error);
return 0;
@@ -451,16 +453,11 @@ void ide_requeue_and_plug(ide_drive_t *drive, struct request *rq)
blk_mq_delay_run_hw_queue(q->queue_hw_ctx[0], 3);
}
-/*
- * Issue a new request to a device.
- */
-blk_status_t ide_queue_rq(struct blk_mq_hw_ctx *hctx,
- const struct blk_mq_queue_data *bd)
+blk_status_t ide_issue_rq(ide_drive_t *drive, struct request *rq,
+ bool local_requeue)
{
- ide_drive_t *drive = hctx->queue->queuedata;
- ide_hwif_t *hwif = drive->hwif;
+ ide_hwif_t *hwif = drive->hwif;
struct ide_host *host = hwif->host;
- struct request *rq = bd->rq;
ide_startstop_t startstop;
if (!blk_rq_is_passthrough(rq) && !(rq->rq_flags & RQF_DONTPREP)) {
@@ -474,8 +471,6 @@ blk_status_t ide_queue_rq(struct blk_mq_hw_ctx *hctx,
if (ide_lock_host(host, hwif))
return BLK_STS_DEV_RESOURCE;
- blk_mq_start_request(rq);
-
spin_lock_irq(&hwif->lock);
if (!ide_lock_port(hwif)) {
@@ -511,18 +506,6 @@ repeat:
drive->dev_flags &= ~(IDE_DFLAG_SLEEPING | IDE_DFLAG_PARKED);
/*
- * we know that the queue isn't empty, but this can happen
- * if ->prep_rq() decides to kill a request
- */
- if (!rq) {
- rq = bd->rq;
- if (!rq) {
- ide_unlock_port(hwif);
- goto out;
- }
- }
-
- /*
* Sanity: don't accept a request that isn't a PM request
* if we are currently power managed. This is very important as
* blk_stop_queue() doesn't prevent the blk_fetch_request()
@@ -560,9 +543,12 @@ repeat:
}
} else {
plug_device:
+ if (local_requeue)
+ list_add(&rq->queuelist, &drive->rq_list);
spin_unlock_irq(&hwif->lock);
ide_unlock_host(host);
- ide_requeue_and_plug(drive, rq);
+ if (!local_requeue)
+ ide_requeue_and_plug(drive, rq);
return BLK_STS_OK;
}
@@ -573,6 +559,26 @@ out:
return BLK_STS_OK;
}
+/*
+ * Issue a new request to a device.
+ */
+blk_status_t ide_queue_rq(struct blk_mq_hw_ctx *hctx,
+ const struct blk_mq_queue_data *bd)
+{
+ ide_drive_t *drive = hctx->queue->queuedata;
+ ide_hwif_t *hwif = drive->hwif;
+
+ spin_lock_irq(&hwif->lock);
+ if (drive->sense_rq_active) {
+ spin_unlock_irq(&hwif->lock);
+ return BLK_STS_DEV_RESOURCE;
+ }
+ spin_unlock_irq(&hwif->lock);
+
+ blk_mq_start_request(bd->rq);
+ return ide_issue_rq(drive, bd->rq, false);
+}
+
static int drive_is_ready(ide_drive_t *drive)
{
ide_hwif_t *hwif = drive->hwif;
@@ -893,13 +899,8 @@ EXPORT_SYMBOL_GPL(ide_pad_transfer);
void ide_insert_request_head(ide_drive_t *drive, struct request *rq)
{
- ide_hwif_t *hwif = drive->hwif;
- unsigned long flags;
-
- spin_lock_irqsave(&hwif->lock, flags);
+ drive->sense_rq_active = true;
list_add_tail(&rq->queuelist, &drive->rq_list);
- spin_unlock_irqrestore(&hwif->lock, flags);
-
kblockd_schedule_work(&drive->rq_work);
}
EXPORT_SYMBOL_GPL(ide_insert_request_head);
diff --git a/drivers/ide/ide-park.c b/drivers/ide/ide-park.c
index 102aa3bc3e7f..8af7af6001eb 100644
--- a/drivers/ide/ide-park.c
+++ b/drivers/ide/ide-park.c
@@ -54,7 +54,9 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout)
scsi_req(rq)->cmd[0] = REQ_UNPARK_HEADS;
scsi_req(rq)->cmd_len = 1;
ide_req(rq)->type = ATA_PRIV_MISC;
+ spin_lock_irq(&hwif->lock);
ide_insert_request_head(drive, rq);
+ spin_unlock_irq(&hwif->lock);
out:
return;
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 63627be0811a..5aeaca24a28f 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -1159,18 +1159,27 @@ static void drive_rq_insert_work(struct work_struct *work)
ide_drive_t *drive = container_of(work, ide_drive_t, rq_work);
ide_hwif_t *hwif = drive->hwif;
struct request *rq;
+ blk_status_t ret;
LIST_HEAD(list);
- spin_lock_irq(&hwif->lock);
- if (!list_empty(&drive->rq_list))
- list_splice_init(&drive->rq_list, &list);
- spin_unlock_irq(&hwif->lock);
+ blk_mq_quiesce_queue(drive->queue);
- while (!list_empty(&list)) {
- rq = list_first_entry(&list, struct request, queuelist);
+ ret = BLK_STS_OK;
+ spin_lock_irq(&hwif->lock);
+ while (!list_empty(&drive->rq_list)) {
+ rq = list_first_entry(&drive->rq_list, struct request, queuelist);
list_del_init(&rq->queuelist);
- blk_execute_rq_nowait(drive->queue, rq->rq_disk, rq, true, NULL);
+
+ spin_unlock_irq(&hwif->lock);
+ ret = ide_issue_rq(drive, rq, true);
+ spin_lock_irq(&hwif->lock);
}
+ spin_unlock_irq(&hwif->lock);
+
+ blk_mq_unquiesce_queue(drive->queue);
+
+ if (ret != BLK_STS_OK)
+ kblockd_schedule_work(&drive->rq_work);
}
static const u8 ide_hwif_to_major[] =
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index ec3a5ef7fee0..cbbe6b6535be 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -1935,12 +1935,14 @@ out:
}
static struct stripe_head *
-r5c_recovery_alloc_stripe(struct r5conf *conf,
- sector_t stripe_sect)
+r5c_recovery_alloc_stripe(
+ struct r5conf *conf,
+ sector_t stripe_sect,
+ int noblock)
{
struct stripe_head *sh;
- sh = raid5_get_active_stripe(conf, stripe_sect, 0, 1, 0);
+ sh = raid5_get_active_stripe(conf, stripe_sect, 0, noblock, 0);
if (!sh)
return NULL; /* no more stripe available */
@@ -2150,7 +2152,7 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log,
stripe_sect);
if (!sh) {
- sh = r5c_recovery_alloc_stripe(conf, stripe_sect);
+ sh = r5c_recovery_alloc_stripe(conf, stripe_sect, 1);
/*
* cannot get stripe from raid5_get_active_stripe
* try replay some stripes
@@ -2159,20 +2161,29 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log,
r5c_recovery_replay_stripes(
cached_stripe_list, ctx);
sh = r5c_recovery_alloc_stripe(
- conf, stripe_sect);
+ conf, stripe_sect, 1);
}
if (!sh) {
+ int new_size = conf->min_nr_stripes * 2;
pr_debug("md/raid:%s: Increasing stripe cache size to %d to recovery data on journal.\n",
mdname(mddev),
- conf->min_nr_stripes * 2);
- raid5_set_cache_size(mddev,
- conf->min_nr_stripes * 2);
- sh = r5c_recovery_alloc_stripe(conf,
- stripe_sect);
+ new_size);
+ ret = raid5_set_cache_size(mddev, new_size);
+ if (conf->min_nr_stripes <= new_size / 2) {
+ pr_err("md/raid:%s: Cannot increase cache size, ret=%d, new_size=%d, min_nr_stripes=%d, max_nr_stripes=%d\n",
+ mdname(mddev),
+ ret,
+ new_size,
+ conf->min_nr_stripes,
+ conf->max_nr_stripes);
+ return -ENOMEM;
+ }
+ sh = r5c_recovery_alloc_stripe(
+ conf, stripe_sect, 0);
}
if (!sh) {
pr_err("md/raid:%s: Cannot get enough stripes due to memory pressure. Recovery failed.\n",
- mdname(mddev));
+ mdname(mddev));
return -ENOMEM;
}
list_add_tail(&sh->lru, cached_stripe_list);
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 4990f0319f6c..cecea901ab8c 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -6369,6 +6369,7 @@ raid5_show_stripe_cache_size(struct mddev *mddev, char *page)
int
raid5_set_cache_size(struct mddev *mddev, int size)
{
+ int result = 0;
struct r5conf *conf = mddev->private;
if (size <= 16 || size > 32768)
@@ -6385,11 +6386,14 @@ raid5_set_cache_size(struct mddev *mddev, int size)
mutex_lock(&conf->cache_size_mutex);
while (size > conf->max_nr_stripes)
- if (!grow_one_stripe(conf, GFP_KERNEL))
+ if (!grow_one_stripe(conf, GFP_KERNEL)) {
+ conf->min_nr_stripes = conf->max_nr_stripes;
+ result = -ENOMEM;
break;
+ }
mutex_unlock(&conf->cache_size_mutex);
- return 0;
+ return result;
}
EXPORT_SYMBOL(raid5_set_cache_size);
diff --git a/drivers/s390/scsi/zfcp_aux.c b/drivers/s390/scsi/zfcp_aux.c
index 9cf30d124b9e..e390f8c6d5f3 100644
--- a/drivers/s390/scsi/zfcp_aux.c
+++ b/drivers/s390/scsi/zfcp_aux.c
@@ -403,7 +403,6 @@ struct zfcp_adapter *zfcp_adapter_enqueue(struct ccw_device *ccw_device)
goto failed;
/* report size limit per scatter-gather segment */
- adapter->dma_parms.max_segment_size = ZFCP_QDIO_SBALE_LEN;
adapter->ccw_device->dev.dma_parms = &adapter->dma_parms;
adapter->stat_read_buf_num = FSF_STATUS_READS_RECOM;
diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c
index 00acc7144bbc..f4f6a07c5222 100644
--- a/drivers/s390/scsi/zfcp_scsi.c
+++ b/drivers/s390/scsi/zfcp_scsi.c
@@ -428,6 +428,8 @@ static struct scsi_host_template zfcp_scsi_host_template = {
.max_sectors = (((QDIO_MAX_ELEMENTS_PER_BUFFER - 1)
* ZFCP_QDIO_MAX_SBALS_PER_REQ) - 2) * 8,
/* GCD, adjusted later */
+ /* report size limit per scatter-gather segment */
+ .max_segment_size = ZFCP_QDIO_SBALE_LEN,
.dma_boundary = ZFCP_QDIO_SBALE_LEN - 1,
.shost_attrs = zfcp_sysfs_shost_attrs,
.sdev_attrs = zfcp_sysfs_sdev_attrs,
diff --git a/drivers/scsi/53c700.c b/drivers/scsi/53c700.c
index 128d658d472a..16957d7ac414 100644
--- a/drivers/scsi/53c700.c
+++ b/drivers/scsi/53c700.c
@@ -295,7 +295,7 @@ NCR_700_detect(struct scsi_host_template *tpnt,
if(tpnt->sdev_attrs == NULL)
tpnt->sdev_attrs = NCR_700_dev_attrs;
- memory = dma_alloc_attrs(hostdata->dev, TOTAL_MEM_SIZE, &pScript,
+ memory = dma_alloc_attrs(dev, TOTAL_MEM_SIZE, &pScript,
GFP_KERNEL, DMA_ATTR_NON_CONSISTENT);
if(memory == NULL) {
printk(KERN_ERR "53c700: Failed to allocate memory for driver, detaching\n");
diff --git a/drivers/scsi/bnx2fc/bnx2fc_io.c b/drivers/scsi/bnx2fc/bnx2fc_io.c
index 350257c13a5b..bc9f2a2365f4 100644
--- a/drivers/scsi/bnx2fc/bnx2fc_io.c
+++ b/drivers/scsi/bnx2fc/bnx2fc_io.c
@@ -240,6 +240,7 @@ struct bnx2fc_cmd_mgr *bnx2fc_cmd_mgr_alloc(struct bnx2fc_hba *hba)
return NULL;
}
+ cmgr->hba = hba;
cmgr->free_list = kcalloc(arr_sz, sizeof(*cmgr->free_list),
GFP_KERNEL);
if (!cmgr->free_list) {
@@ -256,7 +257,6 @@ struct bnx2fc_cmd_mgr *bnx2fc_cmd_mgr_alloc(struct bnx2fc_hba *hba)
goto mem_err;
}
- cmgr->hba = hba;
cmgr->cmds = (struct bnx2fc_cmd **)(cmgr + 1);
for (i = 0; i < arr_sz; i++) {
@@ -295,7 +295,7 @@ struct bnx2fc_cmd_mgr *bnx2fc_cmd_mgr_alloc(struct bnx2fc_hba *hba)
/* Allocate pool of io_bdts - one for each bnx2fc_cmd */
mem_size = num_ios * sizeof(struct io_bdt *);
- cmgr->io_bdt_pool = kmalloc(mem_size, GFP_KERNEL);
+ cmgr->io_bdt_pool = kzalloc(mem_size, GFP_KERNEL);
if (!cmgr->io_bdt_pool) {
printk(KERN_ERR PFX "failed to alloc io_bdt_pool\n");
goto mem_err;
diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c
index be83590ed955..ff943f477d6f 100644
--- a/drivers/scsi/libfc/fc_lport.c
+++ b/drivers/scsi/libfc/fc_lport.c
@@ -1726,14 +1726,14 @@ void fc_lport_flogi_resp(struct fc_seq *sp, struct fc_frame *fp,
fc_frame_payload_op(fp) != ELS_LS_ACC) {
FC_LPORT_DBG(lport, "FLOGI not accepted or bad response\n");
fc_lport_error(lport, fp);
- goto err;
+ goto out;
}
flp = fc_frame_payload_get(fp, sizeof(*flp));
if (!flp) {
FC_LPORT_DBG(lport, "FLOGI bad response\n");
fc_lport_error(lport, fp);
- goto err;
+ goto out;
}
mfs = ntohs(flp->fl_csp.sp_bb_data) &
@@ -1743,7 +1743,7 @@ void fc_lport_flogi_resp(struct fc_seq *sp, struct fc_frame *fp,
FC_LPORT_DBG(lport, "FLOGI bad mfs:%hu response, "
"lport->mfs:%hu\n", mfs, lport->mfs);
fc_lport_error(lport, fp);
- goto err;
+ goto out;
}
if (mfs <= lport->mfs) {
diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index 661512bec3ac..e27f4df24021 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -62,7 +62,7 @@
/* make sure inq_product_rev string corresponds to this version */
#define SDEBUG_VERSION "0188" /* format to fit INQUIRY revision field */
-static const char *sdebug_version_date = "20180128";
+static const char *sdebug_version_date = "20190125";
#define MY_NAME "scsi_debug"
@@ -735,7 +735,7 @@ static inline bool scsi_debug_lbp(void)
(sdebug_lbpu || sdebug_lbpws || sdebug_lbpws10);
}
-static void *fake_store(unsigned long long lba)
+static void *lba2fake_store(unsigned long long lba)
{
lba = do_div(lba, sdebug_store_sectors);
@@ -2514,8 +2514,8 @@ static int do_device_access(struct scsi_cmnd *scmd, u32 sg_skip, u64 lba,
return ret;
}
-/* If fake_store(lba,num) compares equal to arr(num), then copy top half of
- * arr into fake_store(lba,num) and return true. If comparison fails then
+/* If lba2fake_store(lba,num) compares equal to arr(num), then copy top half of
+ * arr into lba2fake_store(lba,num) and return true. If comparison fails then
* return false. */
static bool comp_write_worker(u64 lba, u32 num, const u8 *arr)
{
@@ -2643,7 +2643,7 @@ static int prot_verify_read(struct scsi_cmnd *SCpnt, sector_t start_sec,
if (sdt->app_tag == cpu_to_be16(0xffff))
continue;
- ret = dif_verify(sdt, fake_store(sector), sector, ei_lba);
+ ret = dif_verify(sdt, lba2fake_store(sector), sector, ei_lba);
if (ret) {
dif_errors++;
return ret;
@@ -3261,10 +3261,12 @@ err_out:
static int resp_write_same(struct scsi_cmnd *scp, u64 lba, u32 num,
u32 ei_lba, bool unmap, bool ndob)
{
+ int ret;
unsigned long iflags;
unsigned long long i;
- int ret;
- u64 lba_off;
+ u32 lb_size = sdebug_sector_size;
+ u64 block, lbaa;
+ u8 *fs1p;
ret = check_device_access_params(scp, lba, num);
if (ret)
@@ -3276,31 +3278,30 @@ static int resp_write_same(struct scsi_cmnd *scp, u64 lba, u32 num,
unmap_region(lba, num);
goto out;
}
-
- lba_off = lba * sdebug_sector_size;
+ lbaa = lba;
+ block = do_div(lbaa, sdebug_store_sectors);
/* if ndob then zero 1 logical block, else fetch 1 logical block */
+ fs1p = fake_storep + (block * lb_size);
if (ndob) {
- memset(fake_storep + lba_off, 0, sdebug_sector_size);
+ memset(fs1p, 0, lb_size);
ret = 0;
} else
- ret = fetch_to_dev_buffer(scp, fake_storep + lba_off,
- sdebug_sector_size);
+ ret = fetch_to_dev_buffer(scp, fs1p, lb_size);
if (-1 == ret) {
write_unlock_irqrestore(&atomic_rw, iflags);
return DID_ERROR << 16;
- } else if (sdebug_verbose && !ndob && (ret < sdebug_sector_size))
+ } else if (sdebug_verbose && !ndob && (ret < lb_size))
sdev_printk(KERN_INFO, scp->device,
"%s: %s: lb size=%u, IO sent=%d bytes\n",
- my_name, "write same",
- sdebug_sector_size, ret);
+ my_name, "write same", lb_size, ret);
/* Copy first sector to remaining blocks */
- for (i = 1 ; i < num ; i++)
- memcpy(fake_storep + ((lba + i) * sdebug_sector_size),
- fake_storep + lba_off,
- sdebug_sector_size);
-
+ for (i = 1 ; i < num ; i++) {
+ lbaa = lba + i;
+ block = do_div(lbaa, sdebug_store_sectors);
+ memmove(fake_storep + (block * lb_size), fs1p, lb_size);
+ }
if (scsi_debug_lbp())
map_region(lba, num);
out:
diff --git a/drivers/tty/serial/earlycon-riscv-sbi.c b/drivers/tty/serial/earlycon-riscv-sbi.c
index e1a551aae336..ce81523c3113 100644
--- a/drivers/tty/serial/earlycon-riscv-sbi.c
+++ b/drivers/tty/serial/earlycon-riscv-sbi.c
@@ -10,13 +10,16 @@
#include <linux/serial_core.h>
#include <asm/sbi.h>
-static void sbi_console_write(struct console *con,
- const char *s, unsigned int n)
+static void sbi_putc(struct uart_port *port, int c)
{
- int i;
+ sbi_console_putchar(c);
+}
- for (i = 0; i < n; ++i)
- sbi_console_putchar(s[i]);
+static void sbi_console_write(struct console *con,
+ const char *s, unsigned n)
+{
+ struct earlycon_device *dev = con->data;
+ uart_console_write(&dev->port, s, n, sbi_putc);
}
static int __init early_sbi_setup(struct earlycon_device *device,
diff --git a/fs/autofs/expire.c b/fs/autofs/expire.c
index d441244b79df..28d9c2b1b3bb 100644
--- a/fs/autofs/expire.c
+++ b/fs/autofs/expire.c
@@ -596,7 +596,6 @@ int autofs_expire_run(struct super_block *sb,
pkt.len = dentry->d_name.len;
memcpy(pkt.name, dentry->d_name.name, pkt.len);
pkt.name[pkt.len] = '\0';
- dput(dentry);
if (copy_to_user(pkt_p, &pkt, sizeof(struct autofs_packet_expire)))
ret = -EFAULT;
@@ -609,6 +608,8 @@ int autofs_expire_run(struct super_block *sb,
complete_all(&ino->expire_complete);
spin_unlock(&sbi->fs_lock);
+ dput(dentry);
+
return ret;
}
diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
index 0e8ea2d9a2bb..078992eee299 100644
--- a/fs/autofs/inode.c
+++ b/fs/autofs/inode.c
@@ -266,8 +266,10 @@ int autofs_fill_super(struct super_block *s, void *data, int silent)
}
root_inode = autofs_get_inode(s, S_IFDIR | 0755);
root = d_make_root(root_inode);
- if (!root)
+ if (!root) {
+ ret = -ENOMEM;
goto fail_ino;
+ }
pipe = NULL;
root->d_fsdata = ino;
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index f64aad613727..5a6c39b44c84 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -968,6 +968,48 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
return 0;
}
+static struct extent_buffer *alloc_tree_block_no_bg_flush(
+ struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 parent_start,
+ const struct btrfs_disk_key *disk_key,
+ int level,
+ u64 hint,
+ u64 empty_size)
+{
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ struct extent_buffer *ret;
+
+ /*
+ * If we are COWing a node/leaf from the extent, chunk, device or free
+ * space trees, make sure that we do not finish block group creation of
+ * pending block groups. We do this to avoid a deadlock.
+ * COWing can result in allocation of a new chunk, and flushing pending
+ * block groups (btrfs_create_pending_block_groups()) can be triggered
+ * when finishing allocation of a new chunk. Creation of a pending block
+ * group modifies the extent, chunk, device and free space trees,
+ * therefore we could deadlock with ourselves since we are holding a
+ * lock on an extent buffer that btrfs_create_pending_block_groups() may
+ * try to COW later.
+ * For similar reasons, we also need to delay flushing pending block
+ * groups when splitting a leaf or node, from one of those trees, since
+ * we are holding a write lock on it and its parent or when inserting a
+ * new root node for one of those trees.
+ */
+ if (root == fs_info->extent_root ||
+ root == fs_info->chunk_root ||
+ root == fs_info->dev_root ||
+ root == fs_info->free_space_root)
+ trans->can_flush_pending_bgs = false;
+
+ ret = btrfs_alloc_tree_block(trans, root, parent_start,
+ root->root_key.objectid, disk_key, level,
+ hint, empty_size);
+ trans->can_flush_pending_bgs = true;
+
+ return ret;
+}
+
/*
* does the dirty work in cow of a single block. The parent block (if
* supplied) is updated to point to the new cow copy. The new buffer is marked
@@ -1015,28 +1057,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
if ((root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && parent)
parent_start = parent->start;
- /*
- * If we are COWing a node/leaf from the extent, chunk, device or free
- * space trees, make sure that we do not finish block group creation of
- * pending block groups. We do this to avoid a deadlock.
- * COWing can result in allocation of a new chunk, and flushing pending
- * block groups (btrfs_create_pending_block_groups()) can be triggered
- * when finishing allocation of a new chunk. Creation of a pending block
- * group modifies the extent, chunk, device and free space trees,
- * therefore we could deadlock with ourselves since we are holding a
- * lock on an extent buffer that btrfs_create_pending_block_groups() may
- * try to COW later.
- */
- if (root == fs_info->extent_root ||
- root == fs_info->chunk_root ||
- root == fs_info->dev_root ||
- root == fs_info->free_space_root)
- trans->can_flush_pending_bgs = false;
-
- cow = btrfs_alloc_tree_block(trans, root, parent_start,
- root->root_key.objectid, &disk_key, level,
- search_start, empty_size);
- trans->can_flush_pending_bgs = true;
+ cow = alloc_tree_block_no_bg_flush(trans, root, parent_start, &disk_key,
+ level, search_start, empty_size);
if (IS_ERR(cow))
return PTR_ERR(cow);
@@ -3345,8 +3367,8 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
else
btrfs_node_key(lower, &lower_key, 0);
- c = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid,
- &lower_key, level, root->node->start, 0);
+ c = alloc_tree_block_no_bg_flush(trans, root, 0, &lower_key, level,
+ root->node->start, 0);
if (IS_ERR(c))
return PTR_ERR(c);
@@ -3475,8 +3497,8 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
mid = (c_nritems + 1) / 2;
btrfs_node_key(c, &disk_key, mid);
- split = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid,
- &disk_key, level, c->start, 0);
+ split = alloc_tree_block_no_bg_flush(trans, root, 0, &disk_key, level,
+ c->start, 0);
if (IS_ERR(split))
return PTR_ERR(split);
@@ -4260,8 +4282,8 @@ again:
else
btrfs_item_key(l, &disk_key, mid);
- right = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid,
- &disk_key, 0, l->start, 0);
+ right = alloc_tree_block_no_bg_flush(trans, root, 0, &disk_key, 0,
+ l->start, 0);
if (IS_ERR(right))
return PTR_ERR(right);
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index c5586ffd1426..0a3f122dd61f 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1621,6 +1621,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
flags | SB_RDONLY, device_name, data);
if (IS_ERR(mnt_root)) {
root = ERR_CAST(mnt_root);
+ kfree(subvol_name);
goto out;
}
@@ -1630,12 +1631,14 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
if (error < 0) {
root = ERR_PTR(error);
mntput(mnt_root);
+ kfree(subvol_name);
goto out;
}
}
}
if (IS_ERR(mnt_root)) {
root = ERR_CAST(mnt_root);
+ kfree(subvol_name);
goto out;
}
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 127fa1535f58..4ec2b660d014 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -850,14 +850,6 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
btrfs_trans_release_chunk_metadata(trans);
- if (lock && should_end_transaction(trans) &&
- READ_ONCE(cur_trans->state) == TRANS_STATE_RUNNING) {
- spin_lock(&info->trans_lock);
- if (cur_trans->state == TRANS_STATE_RUNNING)
- cur_trans->state = TRANS_STATE_BLOCKED;
- spin_unlock(&info->trans_lock);
- }
-
if (lock && READ_ONCE(cur_trans->state) == TRANS_STATE_BLOCKED) {
if (throttle)
return btrfs_commit_transaction(trans);
@@ -1879,6 +1871,21 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans, int err)
kmem_cache_free(btrfs_trans_handle_cachep, trans);
}
+/*
+ * Release reserved delayed ref space of all pending block groups of the
+ * transaction and remove them from the list
+ */
+static void btrfs_cleanup_pending_block_groups(struct btrfs_trans_handle *trans)
+{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
+ struct btrfs_block_group_cache *block_group, *tmp;
+
+ list_for_each_entry_safe(block_group, tmp, &trans->new_bgs, bg_list) {
+ btrfs_delayed_refs_rsv_release(fs_info, 1);
+ list_del_init(&block_group->bg_list);
+ }
+}
+
static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
{
/*
@@ -2270,6 +2277,7 @@ scrub_continue:
btrfs_scrub_continue(fs_info);
cleanup_transaction:
btrfs_trans_release_metadata(trans);
+ btrfs_cleanup_pending_block_groups(trans);
btrfs_trans_release_chunk_metadata(trans);
trans->block_rsv = NULL;
btrfs_warn(fs_info, "Skipping commit of aborted transaction.");
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 3e4f8f88353e..15561926ab32 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -957,11 +957,11 @@ static noinline struct btrfs_device *device_list_add(const char *path,
else
fs_devices = alloc_fs_devices(disk_super->fsid, NULL);
- fs_devices->fsid_change = fsid_change_in_progress;
-
if (IS_ERR(fs_devices))
return ERR_CAST(fs_devices);
+ fs_devices->fsid_change = fsid_change_in_progress;
+
mutex_lock(&fs_devices->device_list_mutex);
list_add(&fs_devices->fs_list, &fs_uuids);
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index 82377017130f..d31b6c72b476 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -21,8 +21,13 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
spin_lock(&sb->s_inode_list_lock);
list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
spin_lock(&inode->i_lock);
+ /*
+ * We must skip inodes in unusual state. We may also skip
+ * inodes without pages but we deliberately won't in case
+ * we need to reschedule to avoid softlockups.
+ */
if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
- (inode->i_mapping->nrpages == 0)) {
+ (inode->i_mapping->nrpages == 0 && !need_resched())) {
spin_unlock(&inode->i_lock);
continue;
}
@@ -30,6 +35,7 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
spin_unlock(&inode->i_lock);
spin_unlock(&sb->s_inode_list_lock);
+ cond_resched();
invalidate_mapping_pages(inode->i_mapping, 0, -1);
iput(toput_inode);
toput_inode = inode;
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 8ae109429a88..e39bac94dead 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -256,7 +256,7 @@ struct dentry *proc_lookup_de(struct inode *dir, struct dentry *dentry,
inode = proc_get_inode(dir->i_sb, de);
if (!inode)
return ERR_PTR(-ENOMEM);
- d_set_d_op(dentry, &proc_misc_dentry_ops);
+ d_set_d_op(dentry, de->proc_dops);
return d_splice_alias(inode, dentry);
}
read_unlock(&proc_subdir_lock);
@@ -429,6 +429,8 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
INIT_LIST_HEAD(&ent->pde_openers);
proc_set_user(ent, (*parent)->uid, (*parent)->gid);
+ ent->proc_dops = &proc_misc_dentry_ops;
+
out:
return ent;
}
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 5185d7f6a51e..95b14196f284 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -44,6 +44,7 @@ struct proc_dir_entry {
struct completion *pde_unload_completion;
const struct inode_operations *proc_iops;
const struct file_operations *proc_fops;
+ const struct dentry_operations *proc_dops;
union {
const struct seq_operations *seq_ops;
int (*single_show)(struct seq_file *, void *);
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index d5e0fcb3439e..a7b12435519e 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -38,6 +38,22 @@ static struct net *get_proc_net(const struct inode *inode)
return maybe_get_net(PDE_NET(PDE(inode)));
}
+static int proc_net_d_revalidate(struct dentry *dentry, unsigned int flags)
+{
+ return 0;
+}
+
+static const struct dentry_operations proc_net_dentry_ops = {
+ .d_revalidate = proc_net_d_revalidate,
+ .d_delete = always_delete_dentry,
+};
+
+static void pde_force_lookup(struct proc_dir_entry *pde)
+{
+ /* /proc/net/ entries can be changed under us by setns(CLONE_NEWNET) */
+ pde->proc_dops = &proc_net_dentry_ops;
+}
+
static int seq_open_net(struct inode *inode, struct file *file)
{
unsigned int state_size = PDE(inode)->state_size;
@@ -90,6 +106,7 @@ struct proc_dir_entry *proc_create_net_data(const char *name, umode_t mode,
p = proc_create_reg(name, mode, &parent, data);
if (!p)
return NULL;
+ pde_force_lookup(p);
p->proc_fops = &proc_net_seq_fops;
p->seq_ops = ops;
p->state_size = state_size;
@@ -133,6 +150,7 @@ struct proc_dir_entry *proc_create_net_data_write(const char *name, umode_t mode
p = proc_create_reg(name, mode, &parent, data);
if (!p)
return NULL;
+ pde_force_lookup(p);
p->proc_fops = &proc_net_seq_fops;
p->seq_ops = ops;
p->state_size = state_size;
@@ -181,6 +199,7 @@ struct proc_dir_entry *proc_create_net_single(const char *name, umode_t mode,
p = proc_create_reg(name, mode, &parent, data);
if (!p)
return NULL;
+ pde_force_lookup(p);
p->proc_fops = &proc_net_single_fops;
p->single_show = show;
return proc_register(parent, p);
@@ -223,6 +242,7 @@ struct proc_dir_entry *proc_create_net_single_write(const char *name, umode_t mo
p = proc_create_reg(name, mode, &parent, data);
if (!p)
return NULL;
+ pde_force_lookup(p);
p->proc_fops = &proc_net_single_fops;
p->single_show = show;
p->write = write;
diff --git a/include/linux/ide.h b/include/linux/ide.h
index e7d29ae633cd..971cf76a78a0 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -615,6 +615,7 @@ struct ide_drive_s {
/* current sense rq and buffer */
bool sense_rq_armed;
+ bool sense_rq_active;
struct request *sense_rq;
struct request_sense sense_data;
@@ -1219,6 +1220,7 @@ extern void ide_stall_queue(ide_drive_t *drive, unsigned long timeout);
extern void ide_timer_expiry(struct timer_list *t);
extern irqreturn_t ide_intr(int irq, void *dev_id);
extern blk_status_t ide_queue_rq(struct blk_mq_hw_ctx *, const struct blk_mq_queue_data *);
+extern blk_status_t ide_issue_rq(ide_drive_t *, struct request *, bool);
extern void ide_requeue_and_plug(ide_drive_t *drive, struct request *rq);
void ide_init_disk(struct gendisk *, ide_drive_t *);
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 07da5c6c5ba0..368267c1b71b 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -21,14 +21,16 @@ struct vmem_altmap;
* walkers which rely on the fully initialized page->flags and others
* should use this rather than pfn_valid && pfn_to_page
*/
-#define pfn_to_online_page(pfn) \
-({ \
- struct page *___page = NULL; \
- unsigned long ___nr = pfn_to_section_nr(pfn); \
- \
- if (___nr < NR_MEM_SECTIONS && online_section_nr(___nr))\
- ___page = pfn_to_page(pfn); \
- ___page; \
+#define pfn_to_online_page(pfn) \
+({ \
+ struct page *___page = NULL; \
+ unsigned long ___pfn = pfn; \
+ unsigned long ___nr = pfn_to_section_nr(___pfn); \
+ \
+ if (___nr < NR_MEM_SECTIONS && online_section_nr(___nr) && \
+ pfn_valid_within(___pfn)) \
+ ___page = pfn_to_page(___pfn); \
+ ___page; \
})
/*
diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h
index ec912d01126f..ecdc6542070f 100644
--- a/include/linux/sched/coredump.h
+++ b/include/linux/sched/coredump.h
@@ -71,6 +71,7 @@ static inline int get_dumpable(struct mm_struct *mm)
#define MMF_HUGE_ZERO_PAGE 23 /* mm has ever used the global huge zero page */
#define MMF_DISABLE_THP 24 /* disable THP for all VMAs */
#define MMF_OOM_VICTIM 25 /* mm is the oom victim */
+#define MMF_OOM_REAP_QUEUED 26 /* mm was queued for oom_reaper */
#define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP)
#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\
diff --git a/init/Kconfig b/init/Kconfig
index 513fa544a134..c9386a365eea 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -512,6 +512,17 @@ config PSI_DEFAULT_DISABLED
per default but can be enabled through passing psi=1 on the
kernel commandline during boot.
+ This feature adds some code to the task wakeup and sleep
+ paths of the scheduler. The overhead is too low to affect
+ common scheduling-intense workloads in practice (such as
+ webservers, memcache), but it does show up in artificial
+ scheduler stress tests, such as hackbench.
+
+ If you are paranoid and not sure what the kernel will be
+ used for, say Y.
+
+ Say N if unsure.
+
endmenu # "CPU/Task time and stats accounting"
config CPU_ISOLATION
@@ -825,7 +836,7 @@ config CGROUP_PIDS
PIDs controller is designed to stop this from happening.
It should be noted that organisational operations (such as attaching
- to a cgroup hierarchy will *not* be blocked by the PIDs controller),
+ to a cgroup hierarchy) will *not* be blocked by the PIDs controller,
since the PIDs limit only affects a process's ability to fork, not to
attach to a cgroup.
diff --git a/kernel/exit.c b/kernel/exit.c
index 3fb7be001964..2639a30a8aa5 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -558,12 +558,14 @@ static struct task_struct *find_alive_thread(struct task_struct *p)
return NULL;
}
-static struct task_struct *find_child_reaper(struct task_struct *father)
+static struct task_struct *find_child_reaper(struct task_struct *father,
+ struct list_head *dead)
__releases(&tasklist_lock)
__acquires(&tasklist_lock)
{
struct pid_namespace *pid_ns = task_active_pid_ns(father);
struct task_struct *reaper = pid_ns->child_reaper;
+ struct task_struct *p, *n;
if (likely(reaper != father))
return reaper;
@@ -579,6 +581,12 @@ static struct task_struct *find_child_reaper(struct task_struct *father)
panic("Attempted to kill init! exitcode=0x%08x\n",
father->signal->group_exit_code ?: father->exit_code);
}
+
+ list_for_each_entry_safe(p, n, dead, ptrace_entry) {
+ list_del_init(&p->ptrace_entry);
+ release_task(p);
+ }
+
zap_pid_ns_processes(pid_ns);
write_lock_irq(&tasklist_lock);
@@ -668,7 +676,7 @@ static void forget_original_parent(struct task_struct *father,
exit_ptrace(father, dead);
/* Can drop and reacquire tasklist_lock */
- reaper = find_child_reaper(father);
+ reaper = find_child_reaper(father, dead);
if (list_empty(&father->children))
return;
diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
index fe24de3fbc93..c3484785b179 100644
--- a/kernel/sched/psi.c
+++ b/kernel/sched/psi.c
@@ -124,6 +124,7 @@
* sampling of the aggregate task states would be.
*/
+#include "../workqueue_internal.h"
#include <linux/sched/loadavg.h>
#include <linux/seq_file.h>
#include <linux/proc_fs.h>
@@ -480,9 +481,6 @@ static void psi_group_change(struct psi_group *group, int cpu,
groupc->tasks[t]++;
write_seqcount_end(&groupc->seq);
-
- if (!delayed_work_pending(&group->clock_work))
- schedule_delayed_work(&group->clock_work, PSI_FREQ);
}
static struct psi_group *iterate_groups(struct task_struct *task, void **iter)
@@ -513,6 +511,7 @@ void psi_task_change(struct task_struct *task, int clear, int set)
{
int cpu = task_cpu(task);
struct psi_group *group;
+ bool wake_clock = true;
void *iter = NULL;
if (!task->pid)
@@ -530,8 +529,22 @@ void psi_task_change(struct task_struct *task, int clear, int set)
task->psi_flags &= ~clear;
task->psi_flags |= set;
- while ((group = iterate_groups(task, &iter)))
+ /*
+ * Periodic aggregation shuts off if there is a period of no
+ * task changes, so we wake it back up if necessary. However,
+ * don't do this if the task change is the aggregation worker
+ * itself going to sleep, or we'll ping-pong forever.
+ */
+ if (unlikely((clear & TSK_RUNNING) &&
+ (task->flags & PF_WQ_WORKER) &&
+ wq_worker_last_func(task) == psi_update_work))
+ wake_clock = false;
+
+ while ((group = iterate_groups(task, &iter))) {
psi_group_change(group, cpu, clear, set);
+ if (wake_clock && !delayed_work_pending(&group->clock_work))
+ schedule_delayed_work(&group->clock_work, PSI_FREQ);
+ }
}
void psi_memstall_tick(struct task_struct *task, int cpu)
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 392be4b252f6..fc5d23d752a5 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -910,6 +910,26 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task)
}
/**
+ * wq_worker_last_func - retrieve worker's last work function
+ *
+ * Determine the last function a worker executed. This is called from
+ * the scheduler to get a worker's last known identity.
+ *
+ * CONTEXT:
+ * spin_lock_irq(rq->lock)
+ *
+ * Return:
+ * The last work function %current executed as a worker, NULL if it
+ * hasn't executed any work yet.
+ */
+work_func_t wq_worker_last_func(struct task_struct *task)
+{
+ struct worker *worker = kthread_data(task);
+
+ return worker->last_func;
+}
+
+/**
* worker_set_flags - set worker flags and adjust nr_running accordingly
* @worker: self
* @flags: flags to set
@@ -2184,6 +2204,9 @@ __acquires(&pool->lock)
if (unlikely(cpu_intensive))
worker_clr_flags(worker, WORKER_CPU_INTENSIVE);
+ /* tag the worker for identification in schedule() */
+ worker->last_func = worker->current_func;
+
/* we're done with it, release */
hash_del(&worker->hentry);
worker->current_work = NULL;
diff --git a/kernel/workqueue_internal.h b/kernel/workqueue_internal.h
index 66fbb5a9e633..cb68b03ca89a 100644
--- a/kernel/workqueue_internal.h
+++ b/kernel/workqueue_internal.h
@@ -53,6 +53,9 @@ struct worker {
/* used only by rescuers to point to the target workqueue */
struct workqueue_struct *rescue_wq; /* I: the workqueue to rescue */
+
+ /* used by the scheduler to determine a worker's last known identity */
+ work_func_t last_func;
};
/**
@@ -67,9 +70,10 @@ static inline struct worker *current_wq_worker(void)
/*
* Scheduler hooks for concurrency managed workqueue. Only to be used from
- * sched/core.c and workqueue.c.
+ * sched/ and workqueue.c.
*/
void wq_worker_waking_up(struct task_struct *task, int cpu);
struct task_struct *wq_worker_sleeping(struct task_struct *task);
+work_func_t wq_worker_last_func(struct task_struct *task);
#endif /* _KERNEL_WORKQUEUE_INTERNAL_H */
diff --git a/lib/test_kmod.c b/lib/test_kmod.c
index d82d022111e0..9cf77628fc91 100644
--- a/lib/test_kmod.c
+++ b/lib/test_kmod.c
@@ -632,7 +632,7 @@ static void __kmod_config_free(struct test_config *config)
config->test_driver = NULL;
kfree_const(config->test_fs);
- config->test_driver = NULL;
+ config->test_fs = NULL;
}
static void kmod_config_free(struct kmod_test_device *test_dev)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index df2e7dd5ff17..afef61656c1e 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4268,7 +4268,8 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
break;
}
if (ret & VM_FAULT_RETRY) {
- if (nonblocking)
+ if (nonblocking &&
+ !(fault_flags & FAULT_FLAG_RETRY_NOWAIT))
*nonblocking = 0;
*nr_pages = 0;
/*
diff --git a/mm/kasan/Makefile b/mm/kasan/Makefile
index 0a14fcff70ed..e2bb06c1b45e 100644
--- a/mm/kasan/Makefile
+++ b/mm/kasan/Makefile
@@ -5,6 +5,7 @@ UBSAN_SANITIZE_generic.o := n
UBSAN_SANITIZE_tags.o := n
KCOV_INSTRUMENT := n
+CFLAGS_REMOVE_common.o = -pg
CFLAGS_REMOVE_generic.o = -pg
# Function splitter causes unnecessary splits in __asan_load1/__asan_store1
# see: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63533
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 7c72f2a95785..831be5ff5f4d 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -372,7 +372,8 @@ static void kill_procs(struct list_head *to_kill, int forcekill, bool fail,
if (fail || tk->addr_valid == 0) {
pr_err("Memory failure: %#lx: forcibly killing %s:%d because of failure to unmap corrupted page\n",
pfn, tk->tsk->comm, tk->tsk->pid);
- force_sig(SIGKILL, tk->tsk);
+ do_send_sig_info(SIGKILL, SEND_SIG_PRIV,
+ tk->tsk, PIDTYPE_PID);
}
/*
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index b9a667d36c55..124e794867c5 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1233,7 +1233,8 @@ static bool is_pageblock_removable_nolock(struct page *page)
bool is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages)
{
struct page *page = pfn_to_page(start_pfn);
- struct page *end_page = page + nr_pages;
+ unsigned long end_pfn = min(start_pfn + nr_pages, zone_end_pfn(page_zone(page)));
+ struct page *end_page = pfn_to_page(end_pfn);
/* Check the starting page of each pageblock within the range */
for (; page < end_page; page = next_active_pageblock(page)) {
@@ -1273,6 +1274,9 @@ int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn,
i++;
if (i == MAX_ORDER_NR_PAGES || pfn + i >= end_pfn)
continue;
+ /* Check if we got outside of the zone */
+ if (zone && !zone_spans_pfn(zone, pfn + i))
+ return 0;
page = pfn_to_page(pfn + i);
if (zone && page_zone(page) != zone)
return 0;
@@ -1301,23 +1305,27 @@ int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn,
static unsigned long scan_movable_pages(unsigned long start, unsigned long end)
{
unsigned long pfn;
- struct page *page;
+
for (pfn = start; pfn < end; pfn++) {
- if (pfn_valid(pfn)) {
- page = pfn_to_page(pfn);
- if (PageLRU(page))
- return pfn;
- if (__PageMovable(page))
- return pfn;
- if (PageHuge(page)) {
- if (hugepage_migration_supported(page_hstate(page)) &&
- page_huge_active(page))
- return pfn;
- else
- pfn = round_up(pfn + 1,
- 1 << compound_order(page)) - 1;
- }
- }
+ struct page *page, *head;
+ unsigned long skip;
+
+ if (!pfn_valid(pfn))
+ continue;
+ page = pfn_to_page(pfn);
+ if (PageLRU(page))
+ return pfn;
+ if (__PageMovable(page))
+ return pfn;
+
+ if (!PageHuge(page))
+ continue;
+ head = compound_head(page);
+ if (hugepage_migration_supported(page_hstate(head)) &&
+ page_huge_active(head))
+ return pfn;
+ skip = (1 << compound_order(head)) - (page - head);
+ pfn += skip - 1;
}
return 0;
}
@@ -1344,7 +1352,6 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
{
unsigned long pfn;
struct page *page;
- int not_managed = 0;
int ret = 0;
LIST_HEAD(source);
@@ -1392,7 +1399,6 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
else
ret = isolate_movable_page(page, ISOLATE_UNEVICTABLE);
if (!ret) { /* Success */
- put_page(page);
list_add_tail(&page->lru, &source);
if (!__PageMovable(page))
inc_node_page_state(page, NR_ISOLATED_ANON +
@@ -1401,22 +1407,10 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
} else {
pr_warn("failed to isolate pfn %lx\n", pfn);
dump_page(page, "isolation failed");
- put_page(page);
- /* Because we don't have big zone->lock. we should
- check this again here. */
- if (page_count(page)) {
- not_managed++;
- ret = -EBUSY;
- break;
- }
}
+ put_page(page);
}
if (!list_empty(&source)) {
- if (not_managed) {
- putback_movable_pages(&source);
- goto out;
- }
-
/* Allocate a new page from the nearest neighbor node */
ret = migrate_pages(&source, new_node_page, NULL, 0,
MIGRATE_SYNC, MR_MEMORY_HOTPLUG);
@@ -1429,7 +1423,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
putback_movable_pages(&source);
}
}
-out:
+
return ret;
}
@@ -1576,7 +1570,6 @@ static int __ref __offline_pages(unsigned long start_pfn,
we assume this for now. .*/
if (!test_pages_in_a_zone(start_pfn, end_pfn, &valid_start,
&valid_end)) {
- mem_hotplug_done();
ret = -EINVAL;
reason = "multizone range";
goto failed_removal;
@@ -1591,7 +1584,6 @@ static int __ref __offline_pages(unsigned long start_pfn,
MIGRATE_MOVABLE,
SKIP_HWPOISON | REPORT_FAILURE);
if (ret) {
- mem_hotplug_done();
reason = "failure to isolate range";
goto failed_removal;
}
diff --git a/mm/migrate.c b/mm/migrate.c
index a16b15090df3..d4fd680be3b0 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -709,7 +709,6 @@ static bool buffer_migrate_lock_buffers(struct buffer_head *head,
/* Simple case, sync compaction */
if (mode != MIGRATE_ASYNC) {
do {
- get_bh(bh);
lock_buffer(bh);
bh = bh->b_this_page;
@@ -720,18 +719,15 @@ static bool buffer_migrate_lock_buffers(struct buffer_head *head,
/* async case, we cannot block on lock_buffer so use trylock_buffer */
do {
- get_bh(bh);
if (!trylock_buffer(bh)) {
/*
* We failed to lock the buffer and cannot stall in
* async migration. Release the taken locks
*/
struct buffer_head *failed_bh = bh;
- put_bh(failed_bh);
bh = head;
while (bh != failed_bh) {
unlock_buffer(bh);
- put_bh(bh);
bh = bh->b_this_page;
}
return false;
@@ -818,7 +814,6 @@ unlock_buffers:
bh = head;
do {
unlock_buffer(bh);
- put_bh(bh);
bh = bh->b_this_page;
} while (bh != head);
@@ -1135,10 +1130,13 @@ out:
* If migration is successful, decrease refcount of the newpage
* which will not free the page because new page owner increased
* refcounter. As well, if it is LRU page, add the page to LRU
- * list in here.
+ * list in here. Use the old state of the isolated source page to
+ * determine if we migrated a LRU page. newpage was already unlocked
+ * and possibly modified by its owner - don't rely on the page
+ * state.
*/
if (rc == MIGRATEPAGE_SUCCESS) {
- if (unlikely(__PageMovable(newpage)))
+ if (unlikely(!is_lru))
put_page(newpage);
else
putback_lru_page(newpage);
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index f0e8cd9edb1a..26ea8636758f 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -647,8 +647,8 @@ static int oom_reaper(void *unused)
static void wake_oom_reaper(struct task_struct *tsk)
{
- /* tsk is already queued? */
- if (tsk == oom_reaper_list || tsk->oom_reaper_list)
+ /* mm is already queued? */
+ if (test_and_set_bit(MMF_OOM_REAP_QUEUED, &tsk->signal->oom_mm->flags))
return;
get_task_struct(tsk);
@@ -975,6 +975,13 @@ static void oom_kill_process(struct oom_control *oc, const char *message)
* still freeing memory.
*/
read_lock(&tasklist_lock);
+
+ /*
+ * The task 'p' might have already exited before reaching here. The
+ * put_task_struct() will free task_struct 'p' while the loop still try
+ * to access the field of 'p', so, get an extra reference.
+ */
+ get_task_struct(p);
for_each_thread(p, t) {
list_for_each_entry(child, &t->children, sibling) {
unsigned int child_points;
@@ -994,6 +1001,7 @@ static void oom_kill_process(struct oom_control *oc, const char *message)
}
}
}
+ put_task_struct(p);
read_unlock(&tasklist_lock);
/*
diff --git a/tools/testing/selftests/proc/.gitignore b/tools/testing/selftests/proc/.gitignore
index 82121a81681f..29bac5ef9a93 100644
--- a/tools/testing/selftests/proc/.gitignore
+++ b/tools/testing/selftests/proc/.gitignore
@@ -10,4 +10,5 @@
/proc-uptime-002
/read
/self
+/setns-dcache
/thread-self
diff --git a/tools/testing/selftests/proc/Makefile b/tools/testing/selftests/proc/Makefile
index 1c12c34cf85d..434d033ee067 100644
--- a/tools/testing/selftests/proc/Makefile
+++ b/tools/testing/selftests/proc/Makefile
@@ -14,6 +14,7 @@ TEST_GEN_PROGS += proc-uptime-001
TEST_GEN_PROGS += proc-uptime-002
TEST_GEN_PROGS += read
TEST_GEN_PROGS += self
+TEST_GEN_PROGS += setns-dcache
TEST_GEN_PROGS += thread-self
include ../lib.mk
diff --git a/tools/testing/selftests/proc/setns-dcache.c b/tools/testing/selftests/proc/setns-dcache.c
new file mode 100644
index 000000000000..60ab197a73fc
--- /dev/null
+++ b/tools/testing/selftests/proc/setns-dcache.c
@@ -0,0 +1,129 @@
+/*
+ * Copyright © 2019 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+/*
+ * Test that setns(CLONE_NEWNET) points to new /proc/net content even
+ * if old one is in dcache.
+ *
+ * FIXME /proc/net/unix is under CONFIG_UNIX which can be disabled.
+ */
+#undef NDEBUG
+#include <assert.h>
+#include <errno.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+
+static pid_t pid = -1;
+
+static void f(void)
+{
+ if (pid > 0) {
+ kill(pid, SIGTERM);
+ }
+}
+
+int main(void)
+{
+ int fd[2];
+ char _ = 0;
+ int nsfd;
+
+ atexit(f);
+
+ /* Check for priviledges and syscall availability straight away. */
+ if (unshare(CLONE_NEWNET) == -1) {
+ if (errno == ENOSYS || errno == EPERM) {
+ return 4;
+ }
+ return 1;
+ }
+ /* Distinguisher between two otherwise empty net namespaces. */
+ if (socket(AF_UNIX, SOCK_STREAM, 0) == -1) {
+ return 1;
+ }
+
+ if (pipe(fd) == -1) {
+ return 1;
+ }
+
+ pid = fork();
+ if (pid == -1) {
+ return 1;
+ }
+
+ if (pid == 0) {
+ if (unshare(CLONE_NEWNET) == -1) {
+ return 1;
+ }
+
+ if (write(fd[1], &_, 1) != 1) {
+ return 1;
+ }
+
+ pause();
+
+ return 0;
+ }
+
+ if (read(fd[0], &_, 1) != 1) {
+ return 1;
+ }
+
+ {
+ char buf[64];
+ snprintf(buf, sizeof(buf), "/proc/%u/ns/net", pid);
+ nsfd = open(buf, O_RDONLY);
+ if (nsfd == -1) {
+ return 1;
+ }
+ }
+
+ /* Reliably pin dentry into dcache. */
+ (void)open("/proc/net/unix", O_RDONLY);
+
+ if (setns(nsfd, CLONE_NEWNET) == -1) {
+ return 1;
+ }
+
+ kill(pid, SIGTERM);
+ pid = 0;
+
+ {
+ char buf[4096];
+ ssize_t rv;
+ int fd;
+
+ fd = open("/proc/net/unix", O_RDONLY);
+ if (fd == -1) {
+ return 1;
+ }
+
+#define S "Num RefCount Protocol Flags Type St Inode Path\n"
+ rv = read(fd, buf, sizeof(buf));
+
+ assert(rv == strlen(S));
+ assert(memcmp(buf, S, strlen(S)) == 0);
+ }
+
+ return 0;
+}