Index: dev/kstat.c =================================================================== RCS file: /cvs/src/sys/dev/kstat.c,v diff -u -p -r1.5 kstat.c --- dev/kstat.c 18 Jan 2025 12:31:49 -0000 1.5 +++ dev/kstat.c 18 May 2025 02:16:35 -0000 @@ -166,6 +166,9 @@ int kstatattach(int num) { /* XXX install system stats here */ +#if 1 + mxq_kstat_attach(&sched_lock, "bsd", 0, "sched-lock", 0); +#endif return (0); } Index: kern/kern_fork.c =================================================================== RCS file: /cvs/src/sys/kern/kern_fork.c,v diff -u -p -r1.271 kern_fork.c --- kern/kern_fork.c 7 May 2025 00:39:09 -0000 1.271 +++ kern/kern_fork.c 18 May 2025 02:16:38 -0000 @@ -701,7 +701,7 @@ proc_trampoline_mi(void) SCHED_ASSERT_LOCKED(); clear_resched(curcpu()); - mtx_leave(&sched_lock); + SCHED_UNLOCK(); spl0(); SCHED_ASSERT_UNLOCKED(); Index: kern/kern_lock.c =================================================================== RCS file: /cvs/src/sys/kern/kern_lock.c,v diff -u -p -r1.75 kern_lock.c --- kern/kern_lock.c 3 Jul 2024 01:36:50 -0000 1.75 +++ kern/kern_lock.c 18 May 2025 02:16:38 -0000 @@ -18,12 +18,15 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +#include "kstat.h" + #include #include #include #include #include #include +#include #include @@ -418,3 +421,344 @@ _mtx_init_flags(struct mutex *m, int ipl _mtx_init(m, ipl); } #endif /* WITNESS */ + +void +pc_lock_init(struct pc_lock *pcl) +{ + pcl->pcl_gen = 0; +} + +unsigned int +pc_sprod_enter(struct pc_lock *pcl) +{ + unsigned int gen; + + gen = pcl->pcl_gen; + pcl->pcl_gen = ++gen; + membar_producer(); + + return (gen); +} + +void +pc_sprod_leave(struct pc_lock *pcl, unsigned int gen) +{ + membar_producer(); + pcl->pcl_gen = ++gen; +} + +#ifdef MULTIPROCESSOR +unsigned int +pc_mprod_enter(struct pc_lock *pcl) +{ + unsigned int gen, ngen, ogen; + + gen = pcl->pcl_gen; + for (;;) { + while (gen & 1) { + CPU_BUSY_CYCLE(); + gen = pcl->pcl_gen; + } + + ngen = 1 + gen; + ogen = atomic_cas_uint(&pcl->pcl_gen, gen, ngen); + if (gen == ogen) + break; + + CPU_BUSY_CYCLE(); + gen = ogen; + } + + membar_enter_after_atomic(); + return (ngen); +} + +void +pc_mprod_leave(struct pc_lock *pcl, unsigned int gen) +{ + membar_exit(); + pcl->pcl_gen = ++gen; +} +#else /* MULTIPROCESSOR */ +unsigned int pc_mprod_enter(struct pc_lock *) + __attribute__((alias("pc_sprod_enter"))); +void pc_mprod_leave(struct pc_lock *, unsigned int) + __attribute__((alias("pc_sprod_leave"))); +#endif /* MULTIPROCESSOR */ + +void +pc_cons_enter(struct pc_lock *pcl, unsigned int *genp) +{ + unsigned int gen; + + gen = pcl->pcl_gen; + while (gen & 1) { + CPU_BUSY_CYCLE(); + gen = pcl->pcl_gen; + } + + membar_consumer(); + *genp = gen; +} + +int +pc_cons_leave(struct pc_lock *pcl, unsigned int *genp) +{ + unsigned int gen; + + membar_consumer(); + + gen = pcl->pcl_gen; + if (gen & 1) { + do { + CPU_BUSY_CYCLE(); + gen = pcl->pcl_gen; + } while (gen & 1); + } else if (gen == *genp) + return (0); + + *genp = gen; + return (EBUSY); +} + +/* + * mutex queue lock thing + */ + +struct mxq_waiter { + struct cpu_info * volatile w_owner; + struct mxq_waiter *w_next; +}; + +void +mxq_init(struct mxq *mxq, int wantipl) +{ + mxq->mxq_lock = 0; + mxq->mxq_head = NULL; + mxq->mxq_tail = &mxq->mxq_head; + mxq->mxq_owner = NULL; + mxq->mxq_wantipl = wantipl; + mxq->mxq_oldipl = IPL_NONE; + + mxq->mxq_spins = 0; + mxq->mxq_enter = 0; + mxq->mxq_enter_fails = 0; + mxq->mxq_waits = 0; +} + +static int +mxq_spin_enter(struct mxq *mxq, struct cpu_info *ci) +{ + struct schedstate_percpu *spc = &ci->ci_schedstate; + + if (atomic_cas_uint(&mxq->mxq_lock, 0, 1) == 0) + return (0); + + spc->spc_spinning++; + do { + do { + CPU_BUSY_CYCLE(); + } while (mxq->mxq_lock != 0); + } while (atomic_cas_uint(&mxq->mxq_lock, 0, 1) != 0); + spc->spc_spinning--; + + mxq->mxq_spins++; + + return (1); +} + +static void +mxq_spin_leave(struct mxq *mxq) +{ + mxq->mxq_lock = 0; +} + +int +mxq_enter_try(struct mxq *mxq) +{ + struct cpu_info *self = curcpu(); + struct cpu_info *owner; + int rv = 0; + int s; + + s = splraise(mxq->mxq_wantipl); + + mxq_spin_enter(mxq, self); + owner = mxq->mxq_owner; + if (owner == NULL) { + mxq->mxq_enter++; + mxq->mxq_owner = self; + rv = 1; + } else + mxq->mxq_enter_fails++; + mxq_spin_leave(mxq); + + if (rv) { + membar_enter(); + mxq->mxq_oldipl = s; + } else + splx(s); + + return (rv); +} + +void +mxq_enter(struct mxq *mxq) +{ + struct cpu_info *self = curcpu(); + struct cpu_info *owner; + struct mxq_waiter w = { .w_owner = self }; + int s; + + s = splraise(mxq->mxq_wantipl); + + mxq_spin_enter(mxq, self); + mxq->mxq_enter++; + owner = mxq->mxq_owner; + if (owner == NULL) /* take ownership */ + mxq->mxq_owner = self; + else { /* queue for ownership */ + mxq->mxq_waits++; + + if (mxq->mxq_tail == NULL) /* cope with MXQ_INITIALIZER */ + mxq->mxq_tail = &mxq->mxq_head; + + *mxq->mxq_tail = &w; + mxq->mxq_tail = &w.w_next; + } + mxq_spin_leave(mxq); + + if (owner != NULL) { + struct schedstate_percpu *spc = &self->ci_schedstate; + + spc->spc_spinning++; + while (w.w_owner != NULL) + CPU_BUSY_CYCLE(); + spc->spc_spinning--; + } + + membar_enter(); + mxq->mxq_oldipl = s; +} + +void +mxq_leave(struct mxq *mxq) +{ + struct mxq_waiter *next; + int s; + + s = mxq->mxq_oldipl; + mxq->mxq_oldipl = IPL_NONE; + + membar_exit_before_atomic(); + mxq_spin_enter(mxq, curcpu()); + next = mxq->mxq_head; + if (next != NULL) { /* move ownership */ + mxq->mxq_owner = next->w_owner; + + mxq->mxq_head = next->w_next; + if (mxq->mxq_head == NULL) + mxq->mxq_tail = &mxq->mxq_head; + + next->w_owner = NULL; /* let the next cpu proceed */ + } else /* drop ownership */ + mxq->mxq_owner = NULL; + mxq_spin_leave(mxq); + + splx(s); +} + +#if NKSTAT > 0 +#include +#include + +struct mxq_kstats { + struct kstat_kv mxq_ks_spins; + struct kstat_kv mxq_ks_enter; + struct kstat_kv mxq_ks_enter_fails; + struct kstat_kv mxq_ks_waits; +}; + +static int mxq_kstat_read(struct kstat *); + +struct kstat * +mxq_kstat_attach(struct mxq *mxq, const char *provider, unsigned int instance, + const char *name, unsigned int unit) +{ + struct kstat *ks; + struct mxq_kstats *kvs; + + kvs = malloc(sizeof(*kvs), M_COUNTERS, M_NOWAIT|M_ZERO); + if (kvs == NULL) + return (NULL); + + ks = kstat_create(provider, instance, name, unit, KSTAT_T_KV, 0); + if (ks == NULL) { + free(kvs, M_COUNTERS, sizeof(*kvs)); + return (NULL); + } + + kstat_kv_init(&kvs->mxq_ks_spins, "spins", KSTAT_KV_T_COUNTER64); + kstat_kv_init(&kvs->mxq_ks_enter, "enters", KSTAT_KV_T_COUNTER64); + kstat_kv_init(&kvs->mxq_ks_enter_fails, "enter_try-fails", + KSTAT_KV_T_COUNTER64); + kstat_kv_init(&kvs->mxq_ks_waits, "contended", KSTAT_KV_T_COUNTER64); + + ks->ks_softc = mxq; + ks->ks_data = kvs; + ks->ks_datalen = sizeof(*kvs); + ks->ks_read = mxq_kstat_read; + + kstat_install(ks); + + return (ks); +} + +static int +mxq_kstat_read(struct kstat *ks) +{ + struct mxq *mxq = ks->ks_softc; + struct mxq_kstats *kvs = ks->ks_data; + int s; + + s = splraise(mxq->mxq_wantipl); + mxq_spin_enter(mxq, curcpu()); + kstat_kv_u64(&kvs->mxq_ks_spins) = mxq->mxq_spins; + kstat_kv_u64(&kvs->mxq_ks_enter) = mxq->mxq_enter; + kstat_kv_u64(&kvs->mxq_ks_enter_fails) = mxq->mxq_enter_fails; + kstat_kv_u64(&kvs->mxq_ks_waits) = mxq->mxq_waits; + mxq_spin_leave(mxq); + splx(s); + + nanouptime(&ks->ks_updated); + + return (0); +} + +void +mxq_kstat_detach(struct kstat *ks) +{ + struct mxq_kstats *kvs; + + if (ks == NULL) + return; + + kstat_remove(ks); + kvs = ks->ks_data; + kstat_destroy(ks); + + free(kvs, M_COUNTERS, sizeof(*kvs)); +} +#else /* NKSTAT > 0 */ +void +mxq_kstat_attach(struct mxq *mxq) +{ + return (NULL); +} + +void +mxq_kstat_detach(struct kstat *ks) +{ + /* nop */ +} +#endif /* NKSTAT > 0 */ Index: kern/sched_bsd.c =================================================================== RCS file: /cvs/src/sys/kern/sched_bsd.c,v diff -u -p -r1.99 sched_bsd.c --- kern/sched_bsd.c 10 Mar 2025 09:28:56 -0000 1.99 +++ kern/sched_bsd.c 18 May 2025 02:16:38 -0000 @@ -57,7 +57,11 @@ uint64_t roundrobin_period; /* [I] roundrobin period (ns) */ int lbolt; /* once a second sleep address */ +#if 0 struct mutex sched_lock; +#else +struct mxq sched_lock __aligned(64); +#endif void update_loadavg(void *); void schedcpu(void *); @@ -385,7 +389,11 @@ mi_switch(void) nextproc = sched_chooseproc(); /* preserve old IPL level so we can switch back to that */ +#if 0 oldipl = MUTEX_OLDIPL(&sched_lock); +#else + oldipl = MXQ_OLDIPL(&sched_lock); +#endif if (p != nextproc) { uvmexp.swtch++; @@ -403,7 +411,11 @@ mi_switch(void) SCHED_ASSERT_LOCKED(); /* Restore proc's IPL. */ +#if 0 MUTEX_OLDIPL(&sched_lock) = oldipl; +#else + MXQ_OLDIPL(&sched_lock) = oldipl; +#endif SCHED_UNLOCK(); SCHED_ASSERT_UNLOCKED(); Index: sys/mutex.h =================================================================== RCS file: /cvs/src/sys/sys/mutex.h,v diff -u -p -r1.22 mutex.h --- sys/mutex.h 16 May 2024 09:30:03 -0000 1.22 +++ sys/mutex.h 18 May 2025 02:16:39 -0000 @@ -165,4 +165,52 @@ void db_mtx_leave(struct db_mutex *); #endif /* _KERNEL && DDB */ -#endif +/* + * mutex with a wait queue + */ +struct mxq_waiter; + +struct mxq { + void *mxq_owner; + struct mxq_waiter *mxq_head; + struct mxq_waiter **mxq_tail; + volatile unsigned int mxq_lock; + int mxq_wantipl; + int mxq_oldipl; + + uint64_t mxq_spins; + uint64_t mxq_enter; + uint64_t mxq_enter_fails; + uint64_t mxq_waits; +}; + +#if defined(_KERNEL) +#define MXQ_INITIALIZER(_ipl) { .mxq_wantipl = (_ipl) } + +void mxq_init(struct mxq *, int); +int mxq_enter_try(struct mxq *); +void mxq_enter(struct mxq *); +void mxq_leave(struct mxq *); + +#define MXQ_OLDIPL(_mxq) (_mxq)->mxq_oldipl + +#define MXQ_ASSERT_LOCKED(_mxq) do { \ + if (((_mxq)->mxq_owner != curcpu()) && \ + !(panicstr || db_active)) \ + panic("mxq %p not held in %s", (_mxq), __func__); \ +} while (0) + +#define MXQ_ASSERT_UNLOCKED(_mxq) do { \ + if (((_mxq)->mxq_owner == curcpu()) && \ + !(panicstr || db_active)) \ + panic("mxq %p held in %s", (_mxq), __func__); \ +} while (0) + +struct kstat; + +struct kstat *mxq_kstat_attach(struct mxq *, + const char *, unsigned int, const char *, unsigned int); +void mxq_kstat_detach(struct kstat *); +#endif /* defined(_KERNEL) */ + +#endif /* _SYS_MUTEX_H_ */ Index: sys/pclock.h =================================================================== RCS file: sys/pclock.h diff -N sys/pclock.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/pclock.h 18 May 2025 02:16:39 -0000 @@ -0,0 +1,49 @@ +/* $OpenBSD$ */ + +/* + * Copyright (c) 2023 David Gwynne + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef _SYS_PCLOCK_H +#define _SYS_PCLOCK_H + +#include + +struct pc_lock { + volatile unsigned int pcl_gen; +}; + +#ifdef _KERNEL + +#define PC_LOCK_INITIALIZER() { .pcl_gen = 0 } + +void pc_lock_init(struct pc_lock *); + +/* single (non-interlocking) producer */ +unsigned int pc_sprod_enter(struct pc_lock *); +void pc_sprod_leave(struct pc_lock *, unsigned int); + +/* multiple (interlocking) producers */ +unsigned int pc_mprod_enter(struct pc_lock *); +void pc_mprod_leave(struct pc_lock *, unsigned int); + +/* consumer */ +void pc_cons_enter(struct pc_lock *, unsigned int *); +__warn_unused_result int + pc_cons_leave(struct pc_lock *, unsigned int *); + +#endif /* _KERNEL */ + +#endif /* _SYS_PCLOCK_H */ Index: sys/sched.h =================================================================== RCS file: /cvs/src/sys/sys/sched.h,v diff -u -p -r1.73 sched.h --- sys/sched.h 8 Jul 2024 14:46:47 -0000 1.73 +++ sys/sched.h 18 May 2025 02:16:39 -0000 @@ -200,6 +202,7 @@ void remrunqueue(struct proc *); func(); \ } while (0) +#if 0 extern struct mutex sched_lock; #define SCHED_ASSERT_LOCKED() MUTEX_ASSERT_LOCKED(&sched_lock) @@ -208,6 +211,16 @@ extern struct mutex sched_lock; #define SCHED_LOCK_INIT() mtx_init(&sched_lock, IPL_SCHED) #define SCHED_LOCK() mtx_enter(&sched_lock) #define SCHED_UNLOCK() mtx_leave(&sched_lock) +#else +extern struct mxq sched_lock; + +#define SCHED_ASSERT_LOCKED() MXQ_ASSERT_LOCKED(&sched_lock) +#define SCHED_ASSERT_UNLOCKED() MXQ_ASSERT_UNLOCKED(&sched_lock) + +#define SCHED_LOCK_INIT() mxq_init(&sched_lock, IPL_SCHED) +#define SCHED_LOCK() mxq_enter(&sched_lock) +#define SCHED_UNLOCK() mxq_leave(&sched_lock) +#endif #endif /* _KERNEL */ #endif /* _SYS_SCHED_H_ */