Index: sys/mutex.h =================================================================== RCS file: /cvs/src/sys/sys/mutex.h,v diff -u -p -r1.22 mutex.h --- sys/mutex.h 16 May 2024 09:30:03 -0000 1.22 +++ sys/mutex.h 19 Jun 2025 11:58:45 -0000 @@ -54,7 +54,7 @@ #include struct mutex { - void *volatile mtx_owner; + volatile unsigned long mtx_owner; int mtx_wantipl; int mtx_oldipl; #ifdef WITNESS @@ -64,23 +64,27 @@ struct mutex { #ifdef WITNESS #define MUTEX_INITIALIZER_FLAGS(ipl, name, flags) \ - { NULL, __MUTEX_IPL((ipl)), IPL_NONE, MTX_LO_INITIALIZER(name, flags) } + { 0, __MUTEX_IPL((ipl)), IPL_NONE, MTX_LO_INITIALIZER(name, flags) } #else #define MUTEX_INITIALIZER_FLAGS(ipl, name, flags) \ - { NULL, __MUTEX_IPL((ipl)), IPL_NONE } + { 0, __MUTEX_IPL((ipl)), IPL_NONE } #endif void __mtx_init(struct mutex *, int); #define _mtx_init(mtx, ipl) __mtx_init((mtx), __MUTEX_IPL((ipl))) +#define mtx_curcpu() (unsigned long)curcpu() +#define mtx_owner(mtx) ((mtx)->mtx_owner & ~1UL) + #ifdef DIAGNOSTIC + #define MUTEX_ASSERT_LOCKED(mtx) do { \ - if (((mtx)->mtx_owner != curcpu()) && !(panicstr || db_active)) \ + if (mtx_owner(mtx) != mtx_curcpu() && !(panicstr || db_active)) \ panic("mutex %p not held in %s", (mtx), __func__); \ } while (0) #define MUTEX_ASSERT_UNLOCKED(mtx) do { \ - if (((mtx)->mtx_owner == curcpu()) && !(panicstr || db_active)) \ + if (mtx_owner(mtx) == mtx_curcpu() && !(panicstr || db_active)) \ panic("mutex %p held in %s", (mtx), __func__); \ } while (0) #else @@ -128,7 +132,7 @@ void mtx_leave(struct mutex *); #define mtx_init(m, ipl) mtx_init_flags(m, ipl, NULL, 0) #define mtx_owned(mtx) \ - (((mtx)->mtx_owner == curcpu()) || panicstr || db_active) + ((mtx_owner(mtx) == mtx_curcpu()) || panicstr || db_active) #ifdef WITNESS Index: kern/kern_lock.c =================================================================== RCS file: /cvs/src/sys/kern/kern_lock.c,v diff -u -p -r1.75 kern_lock.c --- kern/kern_lock.c 3 Jul 2024 01:36:50 -0000 1.75 +++ kern/kern_lock.c 19 Jun 2025 11:58:45 -0000 @@ -24,6 +24,7 @@ #include #include #include +#include #include @@ -38,9 +39,12 @@ int __mp_lock_spinout = INT_MAX; #ifdef MULTIPROCESSOR +#include /* CACHELINESIZE */ #include struct __mp_lock kernel_lock; +static void mtx_init_parking(void); + /* * Functions for manipulating the kernel_lock. We put them here * so that they show up in profiles. @@ -50,6 +54,7 @@ void _kernel_lock_init(void) { __mp_lock_init(&kernel_lock); + mtx_init_parking(); } /* @@ -87,9 +92,7 @@ _kernel_lock_held(void) void ___mp_lock_init(struct __mp_lock *mpl, const struct lock_type *type) { - memset(mpl->mpl_cpus, 0, sizeof(mpl->mpl_cpus)); - mpl->mpl_users = 0; - mpl->mpl_ticket = 1; + SIMPLEQ_INIT(&mpl->mpl_cpuq); #ifdef WITNESS mpl->mpl_lock_obj.lo_name = type->lt_name; @@ -101,56 +104,101 @@ ___mp_lock_init(struct __mp_lock *mpl, c #endif } -static __inline void -__mp_lock_spin(struct __mp_lock *mpl, u_int me) +static void +__mp_lock_spin_enter(struct __mp_lock *mpl) { - struct schedstate_percpu *spc = &curcpu()->ci_schedstate; -#ifdef MP_LOCKDEBUG - int nticks = __mp_lock_spinout; -#endif + if (atomic_cas_uint(&mpl->mpl_crit, 0, 1) != 0) { +// struct schedstate_percpu *spc = &curcpu()->ci_schedstate; - spc->spc_spinning++; - while (mpl->mpl_ticket != me) { - CPU_BUSY_CYCLE(); +// spc->spc_spinning++; + do { + do { + membar_consumer(); + } while (mpl->mpl_crit != 0); -#ifdef MP_LOCKDEBUG - if (--nticks <= 0) { - db_printf("%s: %p lock spun out\n", __func__, mpl); - db_enter(); - nticks = __mp_lock_spinout; - } -#endif + } while (atomic_cas_uint(&mpl->mpl_crit, 0, 1) != 0); +// spc->spc_spinning--; } - spc->spc_spinning--; + membar_enter_after_atomic(); +} + +static inline void +__mp_lock_spin_leave(struct __mp_lock *mpl) +{ + membar_exit(); + mpl->mpl_crit = 0; + membar_producer(); } void __mp_lock(struct __mp_lock *mpl) { - struct __mp_lock_cpu *cpu = &mpl->mpl_cpus[cpu_number()]; + struct __mp_lock_cpu *mplc = &mpl->mpl_cpus[cpu_number()]; + struct cpu_info *self = curcpu(), *owner = NULL; unsigned long s; + unsigned int depth; #ifdef WITNESS - if (!__mp_lock_held(mpl, curcpu())) + if (!__mp_lock_held(mpl, self)) { WITNESS_CHECKORDER(&mpl->mpl_lock_obj, LOP_EXCLUSIVE | LOP_NEWORDER, NULL); + } #endif s = intr_disable(); - if (cpu->mplc_depth++ == 0) - cpu->mplc_ticket = atomic_inc_int_nv(&mpl->mpl_users); + depth = mplc->mplc_depth++; + if (depth == 0) { + LLTRACE(lltrace_lock, mpl, LLTRACE_LK_K, LLTRACE_LK_A_START); + + __mp_lock_spin_enter(mpl); + owner = mpl->mpl_owner; + if (owner == NULL) + mpl->mpl_owner = self; + else if (owner != self) { + mplc->mplc_self = self; + SIMPLEQ_INSERT_TAIL(&mpl->mpl_cpuq, mplc, mplc_entry); + } + __mp_lock_spin_leave(mpl); + } intr_restore(s); - __mp_lock_spin(mpl, cpu->mplc_ticket); - membar_enter_after_atomic(); + if (owner != self && mplc->mplc_self != NULL) { + struct schedstate_percpu *spc = &self->ci_schedstate; +#ifdef MP_LOCKDEBUG + int nticks = __mp_lock_spinout; +#endif + + spc->spc_spinning++; + do { + //CPU_BUSY_CYCLE(); + +#ifdef MP_LOCKDEBUG + if (--nticks <= 0) { + db_printf("%s: %p lock spun out\n", + __func__, mpl); + db_enter(); + nticks = __mp_lock_spinout; + } +#endif + + membar_consumer(); + } while (mplc->mplc_self != NULL); + spc->spc_spinning--; + + membar_enter(); + } WITNESS_LOCK(&mpl->mpl_lock_obj, LOP_EXCLUSIVE); + + if (depth == 0) + LLTRACE(lltrace_lock, mpl, LLTRACE_LK_K, LLTRACE_LK_A_EXCL); } void __mp_unlock(struct __mp_lock *mpl) { - struct __mp_lock_cpu *cpu = &mpl->mpl_cpus[cpu_number()]; + struct __mp_lock_cpu *mplc = &mpl->mpl_cpus[cpu_number()]; + struct __mp_lock_cpu *nmplc = NULL; unsigned long s; #ifdef MP_LOCKDEBUG @@ -163,50 +211,78 @@ __mp_unlock(struct __mp_lock *mpl) WITNESS_UNLOCK(&mpl->mpl_lock_obj, LOP_EXCLUSIVE); s = intr_disable(); - if (--cpu->mplc_depth == 0) { - membar_exit(); - mpl->mpl_ticket++; + if (--mplc->mplc_depth == 0) { + LLTRACE(lltrace_lock, mpl, LLTRACE_LK_K, LLTRACE_LK_R_EXCL); + + __mp_lock_spin_enter(mpl); + nmplc = SIMPLEQ_FIRST(&mpl->mpl_cpuq); + if (nmplc != NULL) { + SIMPLEQ_REMOVE_HEAD(&mpl->mpl_cpuq, mplc_entry); + mpl->mpl_owner = nmplc->mplc_self; + } else + mpl->mpl_owner = NULL; + __mp_lock_spin_leave(mpl); } intr_restore(s); + /* there's enough membars done already */ + + if (nmplc != NULL) { + nmplc->mplc_self = NULL; + membar_producer(); + } } int __mp_release_all(struct __mp_lock *mpl) { - struct __mp_lock_cpu *cpu = &mpl->mpl_cpus[cpu_number()]; + struct __mp_lock_cpu *mplc = &mpl->mpl_cpus[cpu_number()]; + struct __mp_lock_cpu *nmplc = NULL; unsigned long s; int rv; #ifdef WITNESS int i; #endif + LLTRACE(lltrace_lock, mpl, LLTRACE_LK_K, LLTRACE_LK_R_EXCL); + s = intr_disable(); - rv = cpu->mplc_depth; + rv = mplc->mplc_depth; #ifdef WITNESS for (i = 0; i < rv; i++) WITNESS_UNLOCK(&mpl->mpl_lock_obj, LOP_EXCLUSIVE); #endif - cpu->mplc_depth = 0; - membar_exit(); - mpl->mpl_ticket++; + mplc->mplc_depth = 0; + + __mp_lock_spin_enter(mpl); + nmplc = SIMPLEQ_FIRST(&mpl->mpl_cpuq); + if (nmplc != NULL) { + SIMPLEQ_REMOVE_HEAD(&mpl->mpl_cpuq, mplc_entry); + mpl->mpl_owner = nmplc->mplc_self; + } else + mpl->mpl_owner = NULL; + __mp_lock_spin_leave(mpl); intr_restore(s); + if (nmplc != NULL) { + nmplc->mplc_self = NULL; + membar_producer(); + } + return (rv); } void __mp_acquire_count(struct __mp_lock *mpl, int count) { - while (count--) + do { __mp_lock(mpl); + } while (--count); } int __mp_lock_held(struct __mp_lock *mpl, struct cpu_info *ci) { - struct __mp_lock_cpu *cpu = &mpl->mpl_cpus[CPU_INFO_UNIT(ci)]; - - return (cpu->mplc_ticket == mpl->mpl_ticket && cpu->mplc_depth > 0); + return (mpl->mpl_owner == ci); } #endif /* __USE_MI_MPLOCK */ @@ -218,44 +294,104 @@ __mp_lock_held(struct __mp_lock *mpl, st void __mtx_init(struct mutex *mtx, int wantipl) { - mtx->mtx_owner = NULL; + mtx->mtx_owner = 0; mtx->mtx_wantipl = wantipl; mtx->mtx_oldipl = IPL_NONE; } #ifdef MULTIPROCESSOR -void -mtx_enter(struct mutex *mtx) +struct mtx_waiter { + struct mutex *mtx; + TAILQ_ENTRY(mtx_waiter) entry; +}; + +TAILQ_HEAD(mtx_waitlist, mtx_waiter); + +struct mtx_park { + struct cpu_info *lock; + struct mtx_waitlist waiters; +}; + +#define MTX_PARKING_BITS 7 +#define MTX_PARKING_LOTS (1 << MTX_PARKING_BITS) +#define MTX_PARKING_MASK (MTX_PARKING_LOTS - 1) + +struct mtx_park mtx_parking[MTX_PARKING_MASK]; + +static void +mtx_init_parking(void) { - struct schedstate_percpu *spc = &curcpu()->ci_schedstate; -#ifdef MP_LOCKDEBUG - int nticks = __mp_lock_spinout; -#endif + size_t i; - WITNESS_CHECKORDER(MUTEX_LOCK_OBJECT(mtx), - LOP_EXCLUSIVE | LOP_NEWORDER, NULL); + for (i = 0; i < nitems(mtx_parking); i++) { + struct mtx_park *p = &mtx_parking[i]; - spc->spc_spinning++; - while (mtx_enter_try(mtx) == 0) { - do { - CPU_BUSY_CYCLE(); -#ifdef MP_LOCKDEBUG - if (--nticks == 0) { - db_printf("%s: %p lock spun out\n", - __func__, mtx); - db_enter(); - nticks = __mp_lock_spinout; - } -#endif - } while (mtx->mtx_owner != NULL); + p->lock = NULL; + TAILQ_INIT(&p->waiters); } - spc->spc_spinning--; +} + +static struct mtx_park * +mtx_park(struct mutex *mtx) +{ + unsigned long addr = (unsigned long)mtx; + addr >>= 6; + addr ^= addr >> MTX_PARKING_BITS; + addr &= MTX_PARKING_MASK; + + return &mtx_parking[addr]; +} + +static unsigned long +mtx_enter_park(struct mtx_park *p) +{ + struct cpu_info *ci = curcpu(); + struct cpu_info *owner; + unsigned long m; + + m = intr_disable(); + for (;;) { + owner = p->lock; + if (owner == NULL) { + owner = atomic_cas_ptr(&p->lock, NULL, ci); + if (owner == NULL) + break; + } + CPU_BUSY_CYCLE(); + } + membar_enter_after_atomic(); + + return (m); +} + +static void +mtx_leave_park(struct mtx_park *p, unsigned long m) +{ + membar_exit(); + p->lock = NULL; + intr_restore(m); +} + +static unsigned long +mtx_enter_self(struct mutex *mtx, unsigned long self) +{ + unsigned long owner; + + owner = mtx->mtx_owner; + if (owner == 0) { + owner = atomic_cas_ulong(&mtx->mtx_owner, 0, self); + if (owner == 0) + membar_enter_after_atomic(); + } + + return (owner); } int mtx_enter_try(struct mutex *mtx) { - struct cpu_info *owner, *ci = curcpu(); + struct cpu_info *ci = curcpu(); + unsigned long owner, self = (unsigned long)ci; int s; /* Avoid deadlocks after panic or in DDB */ @@ -265,32 +401,172 @@ mtx_enter_try(struct mutex *mtx) if (mtx->mtx_wantipl != IPL_NONE) s = splraise(mtx->mtx_wantipl); - owner = atomic_cas_ptr(&mtx->mtx_owner, NULL, ci); -#ifdef DIAGNOSTIC - if (__predict_false(owner == ci)) - panic("mtx %p: locking against myself", mtx); -#endif - if (owner == NULL) { - membar_enter_after_atomic(); + owner = mtx_enter_self(mtx, self); + if (owner == 0) { if (mtx->mtx_wantipl != IPL_NONE) mtx->mtx_oldipl = s; #ifdef DIAGNOSTIC ci->ci_mutex_level++; #endif WITNESS_LOCK(MUTEX_LOCK_OBJECT(mtx), LOP_EXCLUSIVE); + LLTRACE_CPU(ci, lltrace_lock, mtx, LLTRACE_LK_MTX, + LLTRACE_LK_I_EXCL); return (1); } +#ifdef DIAGNOSTIC + if (__predict_false((owner & ~0x1UL) == self)) + panic("mtx %p: locking against myself", mtx); +#endif if (mtx->mtx_wantipl != IPL_NONE) splx(s); + LLTRACE_CPU(ci, lltrace_lock, mtx, LLTRACE_LK_MTX, LLTRACE_LK_I_FAIL); return (0); } -#else + void mtx_enter(struct mutex *mtx) { struct cpu_info *ci = curcpu(); + unsigned long owner, self = (unsigned long)ci; + struct schedstate_percpu *spc = &ci->ci_schedstate; + struct mtx_park *p; + int s; +#ifdef MP_LOCKDEBUG + int nticks = __mp_lock_spinout; +#endif +#if NLLT > 0 + unsigned int lltev = LLTRACE_LK_I_EXCL; +#endif + unsigned int i; + + WITNESS_CHECKORDER(MUTEX_LOCK_OBJECT(mtx), + LOP_EXCLUSIVE | LOP_NEWORDER, NULL); + + if (mtx->mtx_wantipl != IPL_NONE) + s = splraise(mtx->mtx_wantipl); + + owner = mtx_enter_self(mtx, self); + if (owner == 0) { + /* we got the lock first go. this is the fast path */ + goto locked; + } + +#ifdef DIAGNOSTIC + if (__predict_false((owner & ~0x1UL) == self)) + panic("mtx %p: locking against myself", mtx); +#endif + LLTRACE_SPC(spc, lltrace_lock, mtx, LLTRACE_LK_MTX, + LLTRACE_LK_A_START); +#if NLLT > 0 + lltev = LLTRACE_LK_A_EXCL; +#endif + + /* we're going to have to spin for it now */ + spc->spc_spinning++; + + /* "adaptive" try */ + for (i = 0; i < 40; i++) { + CPU_BUSY_CYCLE(); + owner = mtx_enter_self(mtx, self); + if (owner == 0) + goto spinlocked; + } + + /* park us in a wait list and spin on our own stack */ + p = mtx_park(mtx); + do { + struct mtx_waiter w; + unsigned long nself = self; + unsigned long m; + + atomic_cas_ulong(&mtx->mtx_owner, owner, owner | 1); + + m = mtx_enter_park(p); + owner = mtx->mtx_owner; + if (owner & 1) { + w.mtx = mtx; + TAILQ_INSERT_TAIL(&p->waiters, &w, entry); + } + mtx_leave_park(p, m); + + if (owner & 1) { + while (w.mtx != NULL) + CPU_BUSY_CYCLE(); + + m = mtx_enter_park(p); + TAILQ_REMOVE(&p->waiters, &w, entry); + if (!TAILQ_EMPTY(&p->waiters)) /* XXX */ + nself |= 1; + mtx_leave_park(p, m); + } + + owner = mtx_enter_self(mtx, nself); + } while (owner != 0); + +spinlocked: + spc->spc_spinning--; +locked: + membar_enter_after_atomic(); + if (mtx->mtx_wantipl != IPL_NONE) + mtx->mtx_oldipl = s; +#ifdef DIAGNOSTIC + ci->ci_mutex_level++; +#endif + WITNESS_LOCK(MUTEX_LOCK_OBJECT(mtx), LOP_EXCLUSIVE); + LLTRACE_SPC(spc, lltrace_lock, mtx, LLTRACE_LK_MTX, lltev); +} + +void +mtx_leave(struct mutex *mtx) +{ + struct cpu_info *ci = curcpu(); + unsigned long owner, self = (unsigned long)ci; + int s; + + /* Avoid deadlocks after panic or in DDB */ + if (panicstr || db_active) + return; + + LLTRACE(lltrace_lock, mtx, LLTRACE_LK_MTX, LLTRACE_LK_R_EXCL); + WITNESS_UNLOCK(MUTEX_LOCK_OBJECT(mtx), LOP_EXCLUSIVE); + +#ifdef DIAGNOSTIC + curcpu()->ci_mutex_level--; +#endif + + s = mtx->mtx_oldipl; + owner = atomic_swap_ulong(&mtx->mtx_owner, 0); + if (owner != self) { + struct mtx_park *p; + unsigned long m; + struct mtx_waiter *w; + +#ifdef DIAGNOSTIC + if (__predict_false((owner & ~0x1UL) != self)) + panic("mtx %p: not held", mtx); +#endif + + p = mtx_park(mtx); + m = mtx_enter_park(p); + TAILQ_FOREACH(w, &p->waiters, entry) { + if (w->mtx == mtx) { + w->mtx = NULL; + break; + } + } + mtx_leave_park(p, m); + } + + if (mtx->mtx_wantipl != IPL_NONE) + splx(s); +} +#else +void +mtx_enter(struct mutex *mtx) +{ + unsigned long self = mtx_curcpu(); /* Avoid deadlocks after panic or in DDB */ if (panicstr || db_active) @@ -300,19 +576,20 @@ mtx_enter(struct mutex *mtx) LOP_EXCLUSIVE | LOP_NEWORDER, NULL); #ifdef DIAGNOSTIC - if (__predict_false(mtx->mtx_owner == ci)) + if (__predict_false(mtx_owner(mtx) == self)) panic("mtx %p: locking against myself", mtx); #endif if (mtx->mtx_wantipl != IPL_NONE) mtx->mtx_oldipl = splraise(mtx->mtx_wantipl); - mtx->mtx_owner = ci; + mtx->mtx_owner = self; #ifdef DIAGNOSTIC ci->ci_mutex_level++; #endif WITNESS_LOCK(MUTEX_LOCK_OBJECT(mtx), LOP_EXCLUSIVE); + LLTRACE(lltrace_lock, mtx, LLTRACE_LK_MTX, LLTRACE_LK_I_EXCL); } int @@ -321,7 +598,6 @@ mtx_enter_try(struct mutex *mtx) mtx_enter(mtx); return (1); } -#endif void mtx_leave(struct mutex *mtx) @@ -333,6 +609,7 @@ mtx_leave(struct mutex *mtx) return; MUTEX_ASSERT_LOCKED(mtx); + LLTRACE(lltrace_lock, mtx, LLTRACE_LK_MTX, LLTRACE_LK_R_EXCL); WITNESS_UNLOCK(MUTEX_LOCK_OBJECT(mtx), LOP_EXCLUSIVE); #ifdef DIAGNOSTIC @@ -340,13 +617,11 @@ mtx_leave(struct mutex *mtx) #endif s = mtx->mtx_oldipl; -#ifdef MULTIPROCESSOR - membar_exit(); -#endif - mtx->mtx_owner = NULL; + mtx->mtx_owner = 0; if (mtx->mtx_wantipl != IPL_NONE) splx(s); } +#endif #ifdef DDB void