diff options
Diffstat (limited to 'net/core/sock_map.c')
-rw-r--r-- | net/core/sock_map.c | 69 |
1 files changed, 53 insertions, 16 deletions
diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 3b14de0e36d2..285976f79cba 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -238,20 +238,26 @@ static void sock_map_free(struct bpf_map *map) struct bpf_stab *stab = container_of(map, struct bpf_stab, map); int i; + /* After the sync no updates or deletes will be in-flight so it + * is safe to walk map and remove entries without risking a race + * in EEXIST update case. + */ synchronize_rcu(); - rcu_read_lock(); - raw_spin_lock_bh(&stab->lock); for (i = 0; i < stab->map.max_entries; i++) { struct sock **psk = &stab->sks[i]; struct sock *sk; sk = xchg(psk, NULL); - if (sk) + if (sk) { + lock_sock(sk); + rcu_read_lock(); sock_map_unref(sk, psk); + release_sock(sk); + rcu_read_unlock(); + } } - raw_spin_unlock_bh(&stab->lock); - rcu_read_unlock(); + /* wait for psock readers accessing its map link */ synchronize_rcu(); bpf_map_area_free(stab->sks); @@ -418,14 +424,16 @@ static int sock_map_update_elem(struct bpf_map *map, void *key, ret = -EINVAL; goto out; } - if (!sock_map_sk_is_suitable(sk) || - sk->sk_state != TCP_ESTABLISHED) { + if (!sock_map_sk_is_suitable(sk)) { ret = -EOPNOTSUPP; goto out; } sock_map_sk_acquire(sk); - ret = sock_map_update_common(map, idx, sk, flags); + if (sk->sk_state != TCP_ESTABLISHED) + ret = -EOPNOTSUPP; + else + ret = sock_map_update_common(map, idx, sk, flags); sock_map_sk_release(sk); out: fput(sock->file); @@ -741,14 +749,16 @@ static int sock_hash_update_elem(struct bpf_map *map, void *key, ret = -EINVAL; goto out; } - if (!sock_map_sk_is_suitable(sk) || - sk->sk_state != TCP_ESTABLISHED) { + if (!sock_map_sk_is_suitable(sk)) { ret = -EOPNOTSUPP; goto out; } sock_map_sk_acquire(sk); - ret = sock_hash_update_common(map, key, sk, flags); + if (sk->sk_state != TCP_ESTABLISHED) + ret = -EOPNOTSUPP; + else + ret = sock_hash_update_common(map, key, sk, flags); sock_map_sk_release(sk); out: fput(sock->file); @@ -856,22 +866,49 @@ static void sock_hash_free(struct bpf_map *map) { struct bpf_htab *htab = container_of(map, struct bpf_htab, map); struct bpf_htab_bucket *bucket; + struct hlist_head unlink_list; struct bpf_htab_elem *elem; struct hlist_node *node; int i; + /* After the sync no updates or deletes will be in-flight so it + * is safe to walk map and remove entries without risking a race + * in EEXIST update case. + */ synchronize_rcu(); - rcu_read_lock(); for (i = 0; i < htab->buckets_num; i++) { bucket = sock_hash_select_bucket(htab, i); + + /* We are racing with sock_hash_delete_from_link to + * enter the spin-lock critical section. Every socket on + * the list is still linked to sockhash. Since link + * exists, psock exists and holds a ref to socket. That + * lets us to grab a socket ref too. + */ raw_spin_lock_bh(&bucket->lock); - hlist_for_each_entry_safe(elem, node, &bucket->head, node) { - hlist_del_rcu(&elem->node); + hlist_for_each_entry(elem, &bucket->head, node) + sock_hold(elem->sk); + hlist_move_list(&bucket->head, &unlink_list); + raw_spin_unlock_bh(&bucket->lock); + + /* Process removed entries out of atomic context to + * block for socket lock before deleting the psock's + * link to sockhash. + */ + hlist_for_each_entry_safe(elem, node, &unlink_list, node) { + hlist_del(&elem->node); + rcu_read_lock(); + lock_sock(elem->sk); sock_map_unref(elem->sk, elem); + release_sock(elem->sk); + sock_put(elem->sk); + sock_hash_free_elem(htab, elem); + rcu_read_unlock(); } - raw_spin_unlock_bh(&bucket->lock); } - rcu_read_unlock(); + + /* wait for psock readers accessing its map link */ + synchronize_rcu(); bpf_map_area_free(htab->buckets); kfree(htab); |