Index: dev/kstat.c =================================================================== RCS file: /cvs/src/sys/dev/kstat.c,v diff -u -p -r1.5 kstat.c --- dev/kstat.c 18 Jan 2025 12:31:49 -0000 1.5 +++ dev/kstat.c 8 Jul 2025 05:41:36 -0000 @@ -166,6 +166,9 @@ int kstatattach(int num) { /* XXX install system stats here */ +#if 1 + mxq_kstat_attach(&sched_lock, "bsd", 0, "sched-lock", 0); +#endif return (0); } Index: kern/kern_fork.c =================================================================== RCS file: /cvs/src/sys/kern/kern_fork.c,v diff -u -p -r1.273 kern_fork.c --- kern/kern_fork.c 9 Jun 2025 11:11:03 -0000 1.273 +++ kern/kern_fork.c 8 Jul 2025 05:41:36 -0000 @@ -709,7 +709,7 @@ proc_trampoline_mi(void) SCHED_ASSERT_LOCKED(); clear_resched(curcpu()); - mtx_leave(&sched_lock); + SCHED_UNLOCK(); spl0(); SCHED_ASSERT_UNLOCKED(); Index: kern/kern_lock.c =================================================================== RCS file: /cvs/src/sys/kern/kern_lock.c,v diff -u -p -r1.81 kern_lock.c --- kern/kern_lock.c 24 Jun 2025 15:37:43 -0000 1.81 +++ kern/kern_lock.c 8 Jul 2025 05:41:36 -0000 @@ -18,6 +18,8 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +#include "kstat.h" + #include #include #include @@ -560,3 +562,256 @@ pc_cons_leave(struct pc_lock *pcl, unsig *genp = gen; return (EBUSY); } + +/* + * mutex queue lock thing + */ + +struct mxq_waiter { + struct cpu_info * volatile w_owner; + struct mxq_waiter *w_next; +}; + +void +mxq_init(struct mxq *mxq, int wantipl) +{ + mxq->mxq_lock = 0; + mxq->mxq_head = NULL; + mxq->mxq_tail = &mxq->mxq_head; + mxq->mxq_owner = NULL; + mxq->mxq_wantipl = wantipl; + mxq->mxq_oldipl = IPL_NONE; + + mxq->mxq_spins = 0; + mxq->mxq_enter = 0; + mxq->mxq_enter_fails = 0; + mxq->mxq_waits = 0; +} + +static int +mxq_spin_enter(struct mxq *mxq, struct cpu_info *ci) +{ + struct schedstate_percpu *spc = &ci->ci_schedstate; + + if (atomic_cas_uint(&mxq->mxq_lock, 0, 1) == 0) { + membar_enter_after_atomic(); + return (0); + } + + spc->spc_spinning++; + do { + do { + CPU_BUSY_CYCLE(); + } while (mxq->mxq_lock != 0); + } while (atomic_cas_uint(&mxq->mxq_lock, 0, 1) != 0); + membar_enter_after_atomic(); + spc->spc_spinning--; + + mxq->mxq_spins++; + + return (1); +} + +static void +mxq_spin_leave(struct mxq *mxq) +{ + membar_exit(); + mxq->mxq_lock = 0; +} + +int +mxq_enter_try(struct mxq *mxq) +{ + struct cpu_info *self = curcpu(); + struct cpu_info *owner; + int rv = 0; + int s; + + s = splraise(mxq->mxq_wantipl); + + mxq_spin_enter(mxq, self); + owner = mxq->mxq_owner; + if (owner == NULL) { + mxq->mxq_enter++; + mxq->mxq_owner = self; + rv = 1; + } else if (__predict_false(owner == self)) + panic("%s: mxq %p: already owned by this CPU", __func__, mxq); + else + mxq->mxq_enter_fails++; + mxq_spin_leave(mxq); + + if (rv) { + membar_enter(); + mxq->mxq_oldipl = s; + } else + splx(s); + + return (rv); +} + +void +mxq_enter(struct mxq *mxq) +{ + struct cpu_info *self = curcpu(); + struct cpu_info *owner; + struct mxq_waiter w = { .w_owner = self }; + int s; + + s = splraise(mxq->mxq_wantipl); + + mxq_spin_enter(mxq, self); + mxq->mxq_enter++; + owner = mxq->mxq_owner; + if (owner == NULL) /* take ownership */ + mxq->mxq_owner = self; + else if (__predict_false(owner == self)) + panic("%s: mxq %p: already owned by this CPU", __func__, mxq); + else { /* queue for ownership */ + mxq->mxq_waits++; + + if (mxq->mxq_tail == NULL) /* cope with MXQ_INITIALIZER */ + mxq->mxq_tail = &mxq->mxq_head; + + *mxq->mxq_tail = &w; + mxq->mxq_tail = &w.w_next; + } + mxq_spin_leave(mxq); + + if (owner != NULL) { + struct schedstate_percpu *spc = &self->ci_schedstate; + + spc->spc_spinning++; + while (w.w_owner != NULL) + CPU_BUSY_CYCLE(); + spc->spc_spinning--; + } + + membar_enter(); + mxq->mxq_oldipl = s; +} + +void +mxq_leave(struct mxq *mxq) +{ + struct mxq_waiter *next; + int s; + + s = mxq->mxq_oldipl; + + membar_exit(); + mxq_spin_enter(mxq, curcpu()); + if (__predict_false(mxq->mxq_owner != curcpu())) + panic("%s: mxq %p: not owned by this CPU", __func__, mxq); + + next = mxq->mxq_head; + if (next != NULL) { /* move ownership */ + mxq->mxq_owner = next->w_owner; + + mxq->mxq_head = next->w_next; + if (mxq->mxq_head == NULL) + mxq->mxq_tail = &mxq->mxq_head; + + membar_producer(); + next->w_owner = NULL; /* let the next cpu proceed */ + } else /* drop ownership */ + mxq->mxq_owner = NULL; + mxq_spin_leave(mxq); + + splx(s); +} + +#if NKSTAT > 0 +#include +#include + +struct mxq_kstats { + struct kstat_kv mxq_ks_spins; + struct kstat_kv mxq_ks_enter; + struct kstat_kv mxq_ks_enter_fails; + struct kstat_kv mxq_ks_waits; +}; + +static int mxq_kstat_read(struct kstat *); + +struct kstat * +mxq_kstat_attach(struct mxq *mxq, const char *provider, unsigned int instance, + const char *name, unsigned int unit) +{ + struct kstat *ks; + struct mxq_kstats *kvs; + + kvs = malloc(sizeof(*kvs), M_COUNTERS, M_NOWAIT|M_ZERO); + if (kvs == NULL) + return (NULL); + + ks = kstat_create(provider, instance, name, unit, KSTAT_T_KV, 0); + if (ks == NULL) { + free(kvs, M_COUNTERS, sizeof(*kvs)); + return (NULL); + } + + kstat_kv_init(&kvs->mxq_ks_spins, "spins", KSTAT_KV_T_COUNTER64); + kstat_kv_init(&kvs->mxq_ks_enter, "enters", KSTAT_KV_T_COUNTER64); + kstat_kv_init(&kvs->mxq_ks_enter_fails, "enter_try-fails", + KSTAT_KV_T_COUNTER64); + kstat_kv_init(&kvs->mxq_ks_waits, "contended", KSTAT_KV_T_COUNTER64); + + ks->ks_softc = mxq; + ks->ks_data = kvs; + ks->ks_datalen = sizeof(*kvs); + ks->ks_read = mxq_kstat_read; + + kstat_install(ks); + + return (ks); +} + +static int +mxq_kstat_read(struct kstat *ks) +{ + struct mxq *mxq = ks->ks_softc; + struct mxq_kstats *kvs = ks->ks_data; + int s; + + s = splraise(mxq->mxq_wantipl); + mxq_spin_enter(mxq, curcpu()); + kstat_kv_u64(&kvs->mxq_ks_spins) = mxq->mxq_spins; + kstat_kv_u64(&kvs->mxq_ks_enter) = mxq->mxq_enter; + kstat_kv_u64(&kvs->mxq_ks_enter_fails) = mxq->mxq_enter_fails; + kstat_kv_u64(&kvs->mxq_ks_waits) = mxq->mxq_waits; + mxq_spin_leave(mxq); + splx(s); + + nanouptime(&ks->ks_updated); + + return (0); +} + +void +mxq_kstat_detach(struct kstat *ks) +{ + struct mxq_kstats *kvs; + + if (ks == NULL) + return; + + kstat_remove(ks); + kvs = ks->ks_data; + kstat_destroy(ks); + + free(kvs, M_COUNTERS, sizeof(*kvs)); +} +#else /* NKSTAT > 0 */ +void +mxq_kstat_attach(struct mxq *mxq) +{ + return (NULL); +} + +void +mxq_kstat_detach(struct kstat *ks) +{ + /* nop */ +} +#endif /* NKSTAT > 0 */ Index: kern/sched_bsd.c =================================================================== RCS file: /cvs/src/sys/kern/sched_bsd.c,v diff -u -p -r1.103 sched_bsd.c --- kern/sched_bsd.c 16 Jun 2025 09:55:47 -0000 1.103 +++ kern/sched_bsd.c 8 Jul 2025 05:41:36 -0000 @@ -57,7 +57,11 @@ uint64_t roundrobin_period; /* [I] roundrobin period (ns) */ int lbolt; /* once a second sleep address */ +#if 0 struct mutex sched_lock; +#else +struct mxq sched_lock __aligned(64); +#endif void update_loadavg(void *); void schedcpu(void *); @@ -388,7 +392,11 @@ mi_switch(void) nextproc = sched_chooseproc(); /* preserve old IPL level so we can switch back to that */ +#if 0 oldipl = MUTEX_OLDIPL(&sched_lock); +#else + oldipl = MXQ_OLDIPL(&sched_lock); +#endif if (p != nextproc) { uvmexp.swtch++; @@ -406,7 +414,11 @@ mi_switch(void) SCHED_ASSERT_LOCKED(); /* Restore proc's IPL. */ +#if 0 MUTEX_OLDIPL(&sched_lock) = oldipl; +#else + MXQ_OLDIPL(&sched_lock) = oldipl; +#endif SCHED_UNLOCK(); SCHED_ASSERT_UNLOCKED(); Index: sys/mutex.h =================================================================== RCS file: /cvs/src/sys/sys/mutex.h,v diff -u -p -r1.23 mutex.h --- sys/mutex.h 2 Jul 2025 14:36:56 -0000 1.23 +++ sys/mutex.h 8 Jul 2025 05:41:36 -0000 @@ -165,4 +165,52 @@ void db_mtx_leave(struct db_mutex *); #endif /* _KERNEL && DDB */ -#endif +/* + * mutex with a wait queue + */ +struct mxq_waiter; + +struct mxq { + void *mxq_owner; + struct mxq_waiter *mxq_head; + struct mxq_waiter **mxq_tail; + volatile unsigned int mxq_lock; + int mxq_wantipl; + int mxq_oldipl; + + uint64_t mxq_spins; + uint64_t mxq_enter; + uint64_t mxq_enter_fails; + uint64_t mxq_waits; +}; + +#if defined(_KERNEL) +#define MXQ_INITIALIZER(_ipl) { .mxq_wantipl = (_ipl) } + +void mxq_init(struct mxq *, int); +int mxq_enter_try(struct mxq *); +void mxq_enter(struct mxq *); +void mxq_leave(struct mxq *); + +#define MXQ_OLDIPL(_mxq) (_mxq)->mxq_oldipl + +#define MXQ_ASSERT_LOCKED(_mxq) do { \ + if (((_mxq)->mxq_owner != curcpu()) && \ + !(panicstr || db_active)) \ + panic("mxq %p not held in %s", (_mxq), __func__); \ +} while (0) + +#define MXQ_ASSERT_UNLOCKED(_mxq) do { \ + if (((_mxq)->mxq_owner == curcpu()) && \ + !(panicstr || db_active)) \ + panic("mxq %p held in %s", (_mxq), __func__); \ +} while (0) + +struct kstat; + +struct kstat *mxq_kstat_attach(struct mxq *, + const char *, unsigned int, const char *, unsigned int); +void mxq_kstat_detach(struct kstat *); +#endif /* defined(_KERNEL) */ + +#endif /* _SYS_MUTEX_H_ */ Index: sys/sched.h =================================================================== RCS file: /cvs/src/sys/sys/sched.h,v diff -u -p -r1.77 sched.h --- sys/sched.h 9 Jun 2025 10:57:46 -0000 1.77 +++ sys/sched.h 8 Jul 2025 05:41:36 -0000 @@ -203,6 +203,7 @@ void remrunqueue(struct proc *); func(); \ } while (0) +#if 0 extern struct mutex sched_lock; #define SCHED_ASSERT_LOCKED() MUTEX_ASSERT_LOCKED(&sched_lock) @@ -211,6 +212,16 @@ extern struct mutex sched_lock; #define SCHED_LOCK_INIT() mtx_init(&sched_lock, IPL_SCHED) #define SCHED_LOCK() mtx_enter(&sched_lock) #define SCHED_UNLOCK() mtx_leave(&sched_lock) +#else +extern struct mxq sched_lock; + +#define SCHED_ASSERT_LOCKED() MXQ_ASSERT_LOCKED(&sched_lock) +#define SCHED_ASSERT_UNLOCKED() MXQ_ASSERT_UNLOCKED(&sched_lock) + +#define SCHED_LOCK_INIT() mxq_init(&sched_lock, IPL_SCHED) +#define SCHED_LOCK() mxq_enter(&sched_lock) +#define SCHED_UNLOCK() mxq_leave(&sched_lock) +#endif #endif /* _KERNEL */ #endif /* _SYS_SCHED_H_ */