Index: net/pf.c =================================================================== RCS file: /cvs/src/sys/net/pf.c,v retrieving revision 1.1180 diff -u -p -r1.1180 pf.c --- net/pf.c 15 May 2023 16:34:56 -0000 1.1180 +++ net/pf.c 2 Jun 2023 04:20:08 -0000 @@ -40,6 +40,7 @@ #include "pflog.h" #include "pfsync.h" #include "pflow.h" +#include "kstat.h" #include #include @@ -53,6 +54,7 @@ #include #include #include +#include #include @@ -8250,4 +8252,133 @@ pf_pktenqueue_delayed(void *arg) m_freem(pdy->m); pool_put(&pf_pktdelay_pl, pdy); +} + +/* + * pf locks + */ + +enum pf_state_lock_counters { + pfslkc_wlocks, + pfslkc_wcontended, + pfslkc_rlocks, + pfslkc_rcontended, + + pfslkc_ncounters +}; + +COUNTERS_BOOT_MEMORY(pf_state_lock_counters_boot, pfslkc_ncounters); +struct cpumem *pf_state_lock_counters = + COUNTERS_BOOT_INITIALIZER(pf_state_lock_counters_boot); + +struct pf_lock_kstat_data { + struct kstat_kv kd_locks; + struct kstat_kv kd_contended; +}; + +/* this is protected by PF_LOCK() */ +static struct pf_lock_kstat_data pf_lock_kstats = { + KSTAT_KV_INITIALIZER("locks", KSTAT_KV_T_COUNTER64), + KSTAT_KV_INITIALIZER("contended", KSTAT_KV_T_COUNTER64), +}; + +#if NKSTAT > 0 +static int +pf_state_lock_kstat_read(struct kstat *ks) +{ + uint64_t counters[pfslkc_ncounters]; + struct kstat_kv *kvs = ks->ks_data; + size_t i; + + counters_read(pf_state_lock_counters, counters, nitems(counters)); + nanouptime(&ks->ks_updated); + + for (i = 0; i < nitems(counters); i++) + kstat_kv_u64(&kvs[i]) = counters[i]; + + return (0); +} +#endif + +void +pf_locks_init(void) +{ +#if NKSTAT > 0 + struct kstat *ks; + struct kstat_kv *kvs; + + ks = kstat_create("pf", 0, "lock", 0, KSTAT_T_KV, 0); + KASSERT(ks != NULL); + kstat_set_wlock(ks, &pf_lock); + ks->ks_data = &pf_lock_kstats; + ks->ks_datalen = sizeof(pf_lock_kstats); + kstat_install(ks); + + kvs = mallocarray(pfslkc_ncounters, sizeof(*kvs), M_DEVBUF, M_WAITOK); + kstat_kv_init(&kvs[pfslkc_wlocks], "wlocks", + KSTAT_KV_T_COUNTER64); + kstat_kv_init(&kvs[pfslkc_wcontended], "wcontended", + KSTAT_KV_T_COUNTER64); + kstat_kv_init(&kvs[pfslkc_rlocks], "rlocks", + KSTAT_KV_T_COUNTER64); + kstat_kv_init(&kvs[pfslkc_rcontended], "rcontended", + KSTAT_KV_T_COUNTER64); + + ks = kstat_create("pf", 0, "state-lock", 0, KSTAT_T_KV, 0); + KASSERT(ks != NULL); + ks->ks_data = kvs; + ks->ks_datalen = pfslkc_ncounters * sizeof(*kvs); + ks->ks_read = pf_state_lock_kstat_read; + kstat_install(ks); +#endif + + pf_state_lock_counters = counters_alloc_ncpus(pf_state_lock_counters, + pfslkc_ncounters); +} + +void +pf_lock_enter(void) +{ + if (rw_enter_write(&pf_lock) != 0) + kstat_kv_u64(&pf_lock_kstats.kd_contended)++; + + kstat_kv_u64(&pf_lock_kstats.kd_locks)++; +} + +void +pf_lock_leave(void) +{ + rw_exit_write(&pf_lock); +} + +void +pf_state_enter_read(void) +{ + struct counters_ref r; + uint64_t *c; + int busy; + + busy = rw_enter_read(&pf_state_lock); + + c = counters_enter(&r, pf_state_lock_counters); + c[pfslkc_rlocks]++; + if (busy) + c[pfslkc_rcontended]++; + counters_leave(&r, pf_state_lock_counters); +} + +void +pf_state_enter_write(void) +{ + struct counters_ref r; + uint64_t *c; + int busy; + + busy = rw_enter_write(&pf_state_lock); + + c = counters_enter(&r, pf_state_lock_counters); + c[pfslkc_wlocks]++; + if (busy) + c[pfslkc_wcontended]++; + counters_leave(&r, pf_state_lock_counters); } Index: net/pf_ioctl.c =================================================================== RCS file: /cvs/src/sys/net/pf_ioctl.c,v retrieving revision 1.405 diff -u -p -r1.405 pf_ioctl.c --- net/pf_ioctl.c 26 May 2023 12:13:26 -0000 1.405 +++ net/pf_ioctl.c 2 Jun 2023 04:20:08 -0000 @@ -187,6 +187,8 @@ pfattach(int num) struct pf_anchor_stackframe *sf; struct cpumem_iter cmi; + pf_locks_init(); + pool_init(&pf_rule_pl, sizeof(struct pf_rule), 0, IPL_SOFTNET, 0, "pfrule", NULL); pool_init(&pf_src_tree_pl, sizeof(struct pf_src_node), 0, Index: net/pfvar_priv.h =================================================================== RCS file: /cvs/src/sys/net/pfvar_priv.h,v retrieving revision 1.33 diff -u -p -r1.33 pfvar_priv.h --- net/pfvar_priv.h 10 May 2023 22:42:51 -0000 1.33 +++ net/pfvar_priv.h 2 Jun 2023 04:20:08 -0000 @@ -354,14 +354,13 @@ void pf_state_unref(struct pf_state * extern struct rwlock pf_lock; extern struct rwlock pf_state_lock; -#define PF_LOCK() do { \ - rw_enter_write(&pf_lock); \ - } while (0) +void pf_locks_init(void); -#define PF_UNLOCK() do { \ - PF_ASSERT_LOCKED(); \ - rw_exit_write(&pf_lock); \ - } while (0) +void pf_lock_enter(void); +void pf_lock_leave(void); + +#define PF_LOCK() pf_lock_enter() +#define PF_UNLOCK() pf_lock_leave() #define PF_ASSERT_LOCKED() do { \ if (rw_status(&pf_lock) != RW_WRITE) \ @@ -374,22 +373,14 @@ extern struct rwlock pf_state_lock; splassert_fail(0, rw_status(&pf_lock), __func__);\ } while (0) -#define PF_STATE_ENTER_READ() do { \ - rw_enter_read(&pf_state_lock); \ - } while (0) - -#define PF_STATE_EXIT_READ() do { \ - rw_exit_read(&pf_state_lock); \ - } while (0) +void pf_state_enter_read(void); +void pf_state_enter_write(void); -#define PF_STATE_ENTER_WRITE() do { \ - rw_enter_write(&pf_state_lock); \ - } while (0) +#define PF_STATE_ENTER_READ() pf_state_enter_read() +#define PF_STATE_EXIT_READ() rw_exit_read(&pf_state_lock) -#define PF_STATE_EXIT_WRITE() do { \ - PF_STATE_ASSERT_LOCKED(); \ - rw_exit_write(&pf_state_lock); \ - } while (0) +#define PF_STATE_ENTER_WRITE() pf_state_enter_write() +#define PF_STATE_EXIT_WRITE() rw_exit_write(&pf_state_lock) #define PF_STATE_ASSERT_LOCKED() do { \ if (rw_status(&pf_state_lock) != RW_WRITE)\ Index: sys/rwlock.h =================================================================== RCS file: /cvs/src/sys/sys/rwlock.h,v retrieving revision 1.28 diff -u -p -r1.28 rwlock.h --- sys/rwlock.h 11 Jan 2021 18:49:38 -0000 1.28 +++ sys/rwlock.h 2 Jun 2023 04:20:08 -0000 @@ -32,11 +32,11 @@ * * We provide a simple machine independent implementation: * - * void rw_enter_read(struct rwlock *) + * int rw_enter_read(struct rwlock *) * atomically test for RWLOCK_WRLOCK and if not set, increment the lock * by RWLOCK_READ_INCR. While RWLOCK_WRLOCK is set, loop into rw_enter_wait. * - * void rw_enter_write(struct rwlock *); + * int rw_enter_write(struct rwlock *); * atomically test for the lock being 0 (it's not possible to have * owner/read count unset and waiter bits set) and if 0 set the owner to * the proc and RWLOCK_WRLOCK. While not zero, loop into rw_enter_wait. @@ -147,8 +147,8 @@ void _rw_init_flags(struct rwlock *, con #define rw_init(rwl, name) _rw_init_flags(rwl, name, 0, NULL) #endif /* WITNESS */ -void rw_enter_read(struct rwlock *); -void rw_enter_write(struct rwlock *); +int rw_enter_read(struct rwlock *); +int rw_enter_write(struct rwlock *); void rw_exit_read(struct rwlock *); void rw_exit_write(struct rwlock *); Index: kern/kern_rwlock.c =================================================================== RCS file: /cvs/src/sys/kern/kern_rwlock.c,v retrieving revision 1.48 diff -u -p -r1.48 kern_rwlock.c --- kern/kern_rwlock.c 10 May 2022 16:56:16 -0000 1.48 +++ kern/kern_rwlock.c 2 Jun 2023 04:20:08 -0000 @@ -98,35 +98,43 @@ static const struct rwlock_op { }, }; -void +int rw_enter_read(struct rwlock *rwl) { unsigned long owner = rwl->rwl_owner; + int rv = 0; if (__predict_false((owner & (RWLOCK_WRLOCK | RWLOCK_WRWANT)) || - rw_cas(&rwl->rwl_owner, owner, owner + RWLOCK_READ_INCR))) + rw_cas(&rwl->rwl_owner, owner, owner + RWLOCK_READ_INCR))) { rw_enter(rwl, RW_READ); - else { + rv = EBUSY; + } else { membar_enter_after_atomic(); WITNESS_CHECKORDER(&rwl->rwl_lock_obj, LOP_NEWORDER, NULL); WITNESS_LOCK(&rwl->rwl_lock_obj, 0); } + + return (rv); } -void +int rw_enter_write(struct rwlock *rwl) { struct proc *p = curproc; + int rv = 0; if (__predict_false(rw_cas(&rwl->rwl_owner, 0, - RW_PROC(p) | RWLOCK_WRLOCK))) + RW_PROC(p) | RWLOCK_WRLOCK))) { rw_enter(rwl, RW_WRITE); - else { + rv = EBUSY; + } else { membar_enter_after_atomic(); WITNESS_CHECKORDER(&rwl->rwl_lock_obj, LOP_EXCLUSIVE | LOP_NEWORDER, NULL); WITNESS_LOCK(&rwl->rwl_lock_obj, LOP_EXCLUSIVE); } + + return (rv); } void