aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/bpf
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/bpf')
-rw-r--r--kernel/bpf/percpu_freelist.c101
-rw-r--r--kernel/bpf/percpu_freelist.h1
-rw-r--r--kernel/bpf/verifier.c34
3 files changed, 131 insertions, 5 deletions
diff --git a/kernel/bpf/percpu_freelist.c b/kernel/bpf/percpu_freelist.c
index b367430e611c..3d897de89061 100644
--- a/kernel/bpf/percpu_freelist.c
+++ b/kernel/bpf/percpu_freelist.c
@@ -17,6 +17,8 @@ int pcpu_freelist_init(struct pcpu_freelist *s)
raw_spin_lock_init(&head->lock);
head->first = NULL;
}
+ raw_spin_lock_init(&s->extralist.lock);
+ s->extralist.first = NULL;
return 0;
}
@@ -40,12 +42,50 @@ static inline void ___pcpu_freelist_push(struct pcpu_freelist_head *head,
raw_spin_unlock(&head->lock);
}
+static inline bool pcpu_freelist_try_push_extra(struct pcpu_freelist *s,
+ struct pcpu_freelist_node *node)
+{
+ if (!raw_spin_trylock(&s->extralist.lock))
+ return false;
+
+ pcpu_freelist_push_node(&s->extralist, node);
+ raw_spin_unlock(&s->extralist.lock);
+ return true;
+}
+
+static inline void ___pcpu_freelist_push_nmi(struct pcpu_freelist *s,
+ struct pcpu_freelist_node *node)
+{
+ int cpu, orig_cpu;
+
+ orig_cpu = cpu = raw_smp_processor_id();
+ while (1) {
+ struct pcpu_freelist_head *head;
+
+ head = per_cpu_ptr(s->freelist, cpu);
+ if (raw_spin_trylock(&head->lock)) {
+ pcpu_freelist_push_node(head, node);
+ raw_spin_unlock(&head->lock);
+ return;
+ }
+ cpu = cpumask_next(cpu, cpu_possible_mask);
+ if (cpu >= nr_cpu_ids)
+ cpu = 0;
+
+ /* cannot lock any per cpu lock, try extralist */
+ if (cpu == orig_cpu &&
+ pcpu_freelist_try_push_extra(s, node))
+ return;
+ }
+}
+
void __pcpu_freelist_push(struct pcpu_freelist *s,
struct pcpu_freelist_node *node)
{
- struct pcpu_freelist_head *head = this_cpu_ptr(s->freelist);
-
- ___pcpu_freelist_push(head, node);
+ if (in_nmi())
+ ___pcpu_freelist_push_nmi(s, node);
+ else
+ ___pcpu_freelist_push(this_cpu_ptr(s->freelist), node);
}
void pcpu_freelist_push(struct pcpu_freelist *s,
@@ -81,7 +121,7 @@ again:
}
}
-struct pcpu_freelist_node *__pcpu_freelist_pop(struct pcpu_freelist *s)
+static struct pcpu_freelist_node *___pcpu_freelist_pop(struct pcpu_freelist *s)
{
struct pcpu_freelist_head *head;
struct pcpu_freelist_node *node;
@@ -102,8 +142,59 @@ struct pcpu_freelist_node *__pcpu_freelist_pop(struct pcpu_freelist *s)
if (cpu >= nr_cpu_ids)
cpu = 0;
if (cpu == orig_cpu)
- return NULL;
+ break;
+ }
+
+ /* per cpu lists are all empty, try extralist */
+ raw_spin_lock(&s->extralist.lock);
+ node = s->extralist.first;
+ if (node)
+ s->extralist.first = node->next;
+ raw_spin_unlock(&s->extralist.lock);
+ return node;
+}
+
+static struct pcpu_freelist_node *
+___pcpu_freelist_pop_nmi(struct pcpu_freelist *s)
+{
+ struct pcpu_freelist_head *head;
+ struct pcpu_freelist_node *node;
+ int orig_cpu, cpu;
+
+ orig_cpu = cpu = raw_smp_processor_id();
+ while (1) {
+ head = per_cpu_ptr(s->freelist, cpu);
+ if (raw_spin_trylock(&head->lock)) {
+ node = head->first;
+ if (node) {
+ head->first = node->next;
+ raw_spin_unlock(&head->lock);
+ return node;
+ }
+ raw_spin_unlock(&head->lock);
+ }
+ cpu = cpumask_next(cpu, cpu_possible_mask);
+ if (cpu >= nr_cpu_ids)
+ cpu = 0;
+ if (cpu == orig_cpu)
+ break;
}
+
+ /* cannot pop from per cpu lists, try extralist */
+ if (!raw_spin_trylock(&s->extralist.lock))
+ return NULL;
+ node = s->extralist.first;
+ if (node)
+ s->extralist.first = node->next;
+ raw_spin_unlock(&s->extralist.lock);
+ return node;
+}
+
+struct pcpu_freelist_node *__pcpu_freelist_pop(struct pcpu_freelist *s)
+{
+ if (in_nmi())
+ return ___pcpu_freelist_pop_nmi(s);
+ return ___pcpu_freelist_pop(s);
}
struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *s)
diff --git a/kernel/bpf/percpu_freelist.h b/kernel/bpf/percpu_freelist.h
index fbf8a8a28979..3c76553cfe57 100644
--- a/kernel/bpf/percpu_freelist.h
+++ b/kernel/bpf/percpu_freelist.h
@@ -13,6 +13,7 @@ struct pcpu_freelist_head {
struct pcpu_freelist {
struct pcpu_freelist_head __percpu *freelist;
+ struct pcpu_freelist_head extralist;
};
struct pcpu_freelist_node {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 89b07db14676..12eb9e47d101 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1470,6 +1470,10 @@ static int check_subprogs(struct bpf_verifier_env *env)
for (i = 0; i < insn_cnt; i++) {
u8 code = insn[i].code;
+ if (code == (BPF_JMP | BPF_CALL) &&
+ insn[i].imm == BPF_FUNC_tail_call &&
+ insn[i].src_reg != BPF_PSEUDO_CALL)
+ subprog[cur_subprog].has_tail_call = true;
if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32)
goto next;
if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL)
@@ -2951,6 +2955,31 @@ static int check_max_stack_depth(struct bpf_verifier_env *env)
int ret_prog[MAX_CALL_FRAMES];
process_func:
+ /* protect against potential stack overflow that might happen when
+ * bpf2bpf calls get combined with tailcalls. Limit the caller's stack
+ * depth for such case down to 256 so that the worst case scenario
+ * would result in 8k stack size (32 which is tailcall limit * 256 =
+ * 8k).
+ *
+ * To get the idea what might happen, see an example:
+ * func1 -> sub rsp, 128
+ * subfunc1 -> sub rsp, 256
+ * tailcall1 -> add rsp, 256
+ * func2 -> sub rsp, 192 (total stack size = 128 + 192 = 320)
+ * subfunc2 -> sub rsp, 64
+ * subfunc22 -> sub rsp, 128
+ * tailcall2 -> add rsp, 128
+ * func3 -> sub rsp, 32 (total stack size 128 + 192 + 64 + 32 = 416)
+ *
+ * tailcall will unwind the current stack frame but it will not get rid
+ * of caller's stack as shown on the example above.
+ */
+ if (idx && subprog[idx].has_tail_call && depth >= 256) {
+ verbose(env,
+ "tail_calls are not allowed when call stack of previous frames is %d bytes. Too large\n",
+ depth);
+ return -EACCES;
+ }
/* round up to 32-bytes, since this is granularity
* of interpreter stack size
*/
@@ -10862,6 +10891,11 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
}
if (prog->expected_attach_type == BPF_MODIFY_RETURN) {
+ if (tgt_prog) {
+ verbose(env, "can't modify return codes of BPF programs\n");
+ ret = -EINVAL;
+ goto out;
+ }
ret = check_attach_modify_return(prog, addr);
if (ret)
verbose(env, "%s() is not modifiable\n",