Index: kern/kern_lock.c =================================================================== RCS file: /cvs/src/sys/kern/kern_lock.c,v diff -u -p -r1.81 kern_lock.c --- kern/kern_lock.c 24 Jun 2025 15:37:43 -0000 1.81 +++ kern/kern_lock.c 14 Jul 2025 04:18:16 -0000 @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -37,7 +38,7 @@ * CPU-dependent timing, this needs to be settable from ddb. * Use a "long" to allow larger thresholds on fast 64 bits machines. */ -long __mp_lock_spinout = 1L * INT_MAX; +long __mp_lock_spinout = 50L * INT_MAX; #endif /* MP_LOCKDEBUG */ extern int ncpusfound; @@ -59,9 +60,12 @@ extern int ncpusfound; #ifdef MULTIPROCESSOR +#include /* CACHELINESIZE */ #include struct __mp_lock kernel_lock; +static void mtx_init_parking(void); + /* * Functions for manipulating the kernel_lock. We put them here * so that they show up in profiles. @@ -71,6 +75,7 @@ void _kernel_lock_init(void) { __mp_lock_init(&kernel_lock); + mtx_init_parking(); } /* @@ -239,90 +244,307 @@ __mp_lock_held(struct __mp_lock *mpl, st void __mtx_init(struct mutex *mtx, int wantipl) { - mtx->mtx_owner = NULL; + mtx->mtx_owner = 0; mtx->mtx_wantipl = wantipl; mtx->mtx_oldipl = IPL_NONE; } #ifdef MULTIPROCESSOR +struct mtx_waiter { + struct mutex *volatile mtx; + unsigned long self; + unsigned int spins; + TAILQ_ENTRY(mtx_waiter) entry; +}; + +TAILQ_HEAD(mtx_waitlist, mtx_waiter); + +struct mtx_park { + struct cpu_info *volatile lock; + struct mtx_waitlist waiters; +} __aligned(CACHELINESIZE); + +#define MTX_PARKING_BITS 7 +#define MTX_PARKING_LOTS (1 << MTX_PARKING_BITS) +#define MTX_PARKING_MASK (MTX_PARKING_LOTS - 1) + +static struct mtx_park mtx_parking[MTX_PARKING_LOTS]; + +static void +mtx_init_parking(void) +{ + size_t i; + + for (i = 0; i < nitems(mtx_parking); i++) { + struct mtx_park *p = &mtx_parking[i]; + + p->lock = NULL; + TAILQ_INIT(&p->waiters); + } +} + +void +mtx_print_parks(void) +{ + size_t i; + + for (i = 0; i < nitems(mtx_parking); i++) { + struct mtx_park *p = &mtx_parking[i]; + struct mtx_waiter *w; + + db_printf("park %zu @ %p lock %p\n", i, p, p->lock); + TAILQ_FOREACH(w, &p->waiters, entry) { + db_printf("\twaiter mtx %p self 0x%lx spins %u\n", + w->mtx, w->self, w->spins); + } + } +} + +static struct mtx_park * +mtx_park(struct mutex *mtx) +{ + unsigned long addr = (unsigned long)mtx; + addr >>= 6; + addr ^= addr >> MTX_PARKING_BITS; + addr &= MTX_PARKING_MASK; + + return &mtx_parking[addr]; +} + +static unsigned long +mtx_enter_park(struct mtx_park *p) +{ + struct cpu_info *ci = curcpu(); + struct cpu_info *owner; + unsigned long m; + + m = intr_disable(); + while ((owner = atomic_cas_ptr(&p->lock, NULL, ci)) != NULL) + CPU_BUSY_CYCLE(); + membar_enter_after_atomic(); + + return (m); +} + +static void +mtx_leave_park(struct mtx_park *p, unsigned long m) +{ + membar_exit(); + p->lock = NULL; + intr_restore(m); +} + +static inline unsigned long +mtx_cas(struct mutex *mtx, unsigned long e, unsigned long v) +{ + return atomic_cas_ulong(&mtx->mtx_owner, e, v); +} + +int +mtx_enter_try(struct mutex *mtx) +{ + struct cpu_info *ci = curcpu(); + unsigned long owner, self = (unsigned long)ci; + int s; + + /* Avoid deadlocks after panic or in DDB */ + if (panicstr || db_active) + return (1); + + if (mtx->mtx_wantipl != IPL_NONE) + s = splraise(mtx->mtx_wantipl); + + owner = mtx_cas(mtx, 0, self); + if (owner == 0) { + membar_enter_after_atomic(); + if (mtx->mtx_wantipl != IPL_NONE) + mtx->mtx_oldipl = s; +#ifdef DIAGNOSTIC + ci->ci_mutex_level++; +#endif + WITNESS_LOCK(MUTEX_LOCK_OBJECT(mtx), LOP_EXCLUSIVE); + return (1); + } + + if (mtx->mtx_wantipl != IPL_NONE) + splx(s); + +#ifdef DIAGNOSTIC + if (__predict_false((owner & ~1UL) == self)) + panic("mtx %p: locking against myself", mtx); +#endif + + return (0); +} + void mtx_enter(struct mutex *mtx) { - struct schedstate_percpu *spc = &curcpu()->ci_schedstate; - unsigned int i, ncycle = CPU_MIN_BUSY_CYCLES; + struct cpu_info *ci = curcpu(); + struct schedstate_percpu *spc = &ci->ci_schedstate; + unsigned long owner, self = (unsigned long)ci; + struct mtx_park *p; + struct mtx_waiter w; + unsigned long m; + int spins = 0; + int s; #ifdef MP_LOCKDEBUG long nticks = __mp_lock_spinout; #endif + /* Avoid deadlocks after panic or in DDB */ + if (panicstr || db_active) + return; + WITNESS_CHECKORDER(MUTEX_LOCK_OBJECT(mtx), LOP_EXCLUSIVE | LOP_NEWORDER, NULL); + if (mtx->mtx_wantipl != IPL_NONE) + s = splraise(mtx->mtx_wantipl); + + owner = mtx_cas(mtx, 0, self); + if (owner == 0) { + /* we got the lock first go. this is the fast path */ + goto locked; + } + +#ifdef DIAGNOSTIC + if (__predict_false((owner & ~1ULL) == self)) + panic("mtx %p: locking against myself", mtx); +#endif + + /* we're going to have to spin for it now */ spc->spc_spinning++; - while (mtx_enter_try(mtx) == 0) { - do { - /* Busy loop with exponential backoff. */ - for (i = ncycle; i > 0; i--) + + for (spins = 0; spins < 40; spins++) { + if (ISSET(owner, 1)) { + /* don't spin if cpus are already parked */ + break; + } + CPU_BUSY_CYCLE(); + owner = mtx->mtx_owner; + if (owner == 0) { + owner = mtx_cas(mtx, 0, self); + if (owner == 0) + goto spinlocked; + } + } + + /* take the really slow path */ + p = mtx_park(mtx); + + w.self = self; + w.spins = 0; + + /* publish our existence in the parking lot */ + w.mtx = mtx; + m = mtx_enter_park(p); + TAILQ_INSERT_TAIL(&p->waiters, &w, entry); + mtx_leave_park(p, m); + + do { + unsigned long o; + + KASSERT(owner != 0); + + w.mtx = mtx; + membar_producer(); + o = mtx_cas(mtx, owner, owner | 1); + if (o == owner || ISSET(o, 1)) { + while (w.mtx != NULL) { CPU_BUSY_CYCLE(); #ifdef MP_LOCKDEBUG - if ((nticks -= ncycle) <= 0) { - db_printf("%s: %p lock spun out\n", __func__, mtx); - db_enter(); - nticks = __mp_lock_spinout; + if (--nticks <= 0) { + db_printf("%s: %p lock spun out\n", + __func__, mtx); + db_enter(); + nticks = __mp_lock_spinout; + } +#endif } + membar_consumer(); + w.spins++; + } else if (o != 0) { + owner = o; + continue; + } + + owner = mtx_cas(mtx, 0, self | 1); + } while (owner != 0); + + m = mtx_enter_park(p); + TAILQ_REMOVE(&p->waiters, &w, entry); + mtx_leave_park(p, m); +spinlocked: + spc->spc_spinning--; +locked: + membar_enter_after_atomic(); + if (mtx->mtx_wantipl != IPL_NONE) + mtx->mtx_oldipl = s; +#ifdef DIAGNOSTIC + ci->ci_mutex_level++; #endif - if (ncycle < CPU_MAX_BUSY_CYCLES) - ncycle += ncycle; - } while (mtx->mtx_owner != NULL); + WITNESS_LOCK(MUTEX_LOCK_OBJECT(mtx), LOP_EXCLUSIVE); +} + +static inline void +mtx_wakeup(struct mutex *mtx, struct mtx_park *p) +{ + struct mtx_waiter *w; + + mtx->mtx_owner = 0; + membar_producer(); + TAILQ_FOREACH(w, &p->waiters, entry) { + if (w->mtx == mtx) { + w->mtx = NULL; + break; + } } - spc->spc_spinning--; } -int -mtx_enter_try(struct mutex *mtx) +void +mtx_leave(struct mutex *mtx) { - struct cpu_info *owner, *ci = curcpu(); + struct cpu_info *ci = curcpu(); + unsigned long owner, self = (unsigned long)ci; int s; /* Avoid deadlocks after panic or in DDB */ if (panicstr || db_active) - return (1); + return; - if (mtx->mtx_wantipl != IPL_NONE) - s = splraise(mtx->mtx_wantipl); + WITNESS_UNLOCK(MUTEX_LOCK_OBJECT(mtx), LOP_EXCLUSIVE); - /* - * Avoid unconditional atomic operation to prevent cache line - * contention. - */ - owner = mtx->mtx_owner; #ifdef DIAGNOSTIC - if (__predict_false(owner == ci)) - panic("mtx %p: locking against myself", mtx); + curcpu()->ci_mutex_level--; #endif - if (owner == NULL) { - owner = atomic_cas_ptr(&mtx->mtx_owner, NULL, ci); - if (owner == NULL) { - membar_enter_after_atomic(); - if (mtx->mtx_wantipl != IPL_NONE) - mtx->mtx_oldipl = s; + + s = mtx->mtx_oldipl; + membar_exit_before_atomic(); + owner = atomic_cas_ulong(&mtx->mtx_owner, self, 0); + if (owner != self) { + struct mtx_park *p; + unsigned long m; + #ifdef DIAGNOSTIC - ci->ci_mutex_level++; + if (__predict_false((owner & ~1ULL) != self)) + panic("mtx %p: not held", mtx); #endif - WITNESS_LOCK(MUTEX_LOCK_OBJECT(mtx), LOP_EXCLUSIVE); - return (1); - } + + p = mtx_park(mtx); + m = mtx_enter_park(p); + mtx_wakeup(mtx, p); + mtx_leave_park(p, m); } if (mtx->mtx_wantipl != IPL_NONE) splx(s); - - return (0); } #else void mtx_enter(struct mutex *mtx) { - struct cpu_info *ci = curcpu(); + unsigned long self = mtx_curcpu(); /* Avoid deadlocks after panic or in DDB */ if (panicstr || db_active) @@ -332,14 +554,14 @@ mtx_enter(struct mutex *mtx) LOP_EXCLUSIVE | LOP_NEWORDER, NULL); #ifdef DIAGNOSTIC - if (__predict_false(mtx->mtx_owner == ci)) + if (__predict_false(mtx_owner(mtx) == self)) panic("mtx %p: locking against myself", mtx); #endif if (mtx->mtx_wantipl != IPL_NONE) mtx->mtx_oldipl = splraise(mtx->mtx_wantipl); - mtx->mtx_owner = ci; + mtx->mtx_owner = self; #ifdef DIAGNOSTIC ci->ci_mutex_level++; @@ -353,7 +575,6 @@ mtx_enter_try(struct mutex *mtx) mtx_enter(mtx); return (1); } -#endif void mtx_leave(struct mutex *mtx) @@ -372,13 +593,11 @@ mtx_leave(struct mutex *mtx) #endif s = mtx->mtx_oldipl; -#ifdef MULTIPROCESSOR - membar_exit(); -#endif - mtx->mtx_owner = NULL; + mtx->mtx_owner = 0; if (mtx->mtx_wantipl != IPL_NONE) splx(s); } +#endif #ifdef DDB void Index: kern/subr_witness.c =================================================================== RCS file: /cvs/src/sys/kern/subr_witness.c,v diff -u -p -r1.55 subr_witness.c --- kern/subr_witness.c 14 Apr 2025 09:14:51 -0000 1.55 +++ kern/subr_witness.c 14 Jul 2025 04:18:16 -0000 @@ -1677,7 +1677,7 @@ _isitmyx(struct witness *w1, struct witn if (!((WITNESS_ATOD(r1) == r2 && WITNESS_DTOA(r2) == r1) || (WITNESS_DTOA(r1) == r2 && WITNESS_ATOD(r2) == r1))) { /* Don't squawk if we're potentially racing with an update. */ - if (w_mtx.mtx_owner != curcpu()) + if (mtx_owner(&w_mtx) != mtx_curcpu()) return (0); printf("witness: %s: rmatrix mismatch between %s (index %d) " "and %s (index %d): w_rmatrix[%d][%d] == %x but " Index: sys/mutex.h =================================================================== RCS file: /cvs/src/sys/sys/mutex.h,v diff -u -p -r1.23 mutex.h --- sys/mutex.h 2 Jul 2025 14:36:56 -0000 1.23 +++ sys/mutex.h 14 Jul 2025 04:18:16 -0000 @@ -54,7 +54,7 @@ #include struct mutex { - void *volatile mtx_owner; + volatile unsigned long mtx_owner; int mtx_wantipl; int mtx_oldipl; #ifdef WITNESS @@ -64,23 +64,26 @@ struct mutex { #ifdef WITNESS #define MUTEX_INITIALIZER_FLAGS(ipl, name, flags) \ - { NULL, __MUTEX_IPL((ipl)), IPL_NONE, MTX_LO_INITIALIZER(name, flags) } + { 0, __MUTEX_IPL((ipl)), IPL_NONE, MTX_LO_INITIALIZER(name, flags) } #else #define MUTEX_INITIALIZER_FLAGS(ipl, name, flags) \ - { NULL, __MUTEX_IPL((ipl)), IPL_NONE } + { 0, __MUTEX_IPL((ipl)), IPL_NONE } #endif void __mtx_init(struct mutex *, int); #define _mtx_init(mtx, ipl) __mtx_init((mtx), __MUTEX_IPL((ipl))) +#define mtx_curcpu() (unsigned long)curcpu() +#define mtx_owner(mtx) ((mtx)->mtx_owner & ~1UL) + #ifdef DIAGNOSTIC #define MUTEX_ASSERT_LOCKED(mtx) do { \ - if (((mtx)->mtx_owner != curcpu()) && !(panicstr || db_active)) \ + if (mtx_owner(mtx) != mtx_curcpu() && !(panicstr || db_active)) \ panic("mutex %p not held in %s", (mtx), __func__); \ } while (0) #define MUTEX_ASSERT_UNLOCKED(mtx) do { \ - if (((mtx)->mtx_owner == curcpu()) && !(panicstr || db_active)) \ + if (mtx_owner(mtx) == mtx_curcpu() && !(panicstr || db_active)) \ panic("mutex %p held in %s", (mtx), __func__); \ } while (0) #else @@ -128,7 +131,7 @@ void mtx_leave(struct mutex *); #define mtx_init(m, ipl) mtx_init_flags(m, ipl, NULL, 0) #define mtx_owned(mtx) \ - (((mtx)->mtx_owner == curcpu()) || panicstr || db_active) + ((mtx_owner(mtx) == mtx_curcpu()) || panicstr || db_active) #ifdef WITNESS