Index: lib/libc/Symbols.list =================================================================== RCS file: /cvs/src/lib/libc/Symbols.list,v diff -u -p -r1.90 Symbols.list --- lib/libc/Symbols.list 12 Aug 2024 20:56:55 -0000 1.90 +++ lib/libc/Symbols.list 2 Oct 2024 21:03:02 -0000 @@ -1705,6 +1705,12 @@ tcsetattr tcsetpgrp /* thread */ +__cmtx_init +__cmtx_enter +__cmtx_leave +__rcmtx_init +__rcmtx_enter +__rcmtx_leave _rthread_debug _spinlock _spinlocktry Index: lib/libc/dlfcn/init.c =================================================================== RCS file: /cvs/src/lib/libc/dlfcn/init.c,v diff -u -p -r1.24 init.c --- lib/libc/dlfcn/init.c 22 Jul 2024 22:06:27 -0000 1.24 +++ lib/libc/dlfcn/init.c 2 Oct 2024 21:03:02 -0000 @@ -21,6 +21,7 @@ #include #include #include /* timekeep */ +#include /* ncpus */ #ifndef PIC #include @@ -51,6 +52,7 @@ int _pagesize = 0; struct timekeep *_timekeep; unsigned long _hwcap, _hwcap2; int _hwcap_avail, _hwcap2_avail; +int _ncpus; /* * In dynamically linked binaries environ and __progname are overridden by @@ -130,6 +132,9 @@ _libc_preinit(int argc, char **argv, cha if (issetugid() == 0 && getenv("LIBC_NOUSERTC")) _timekeep = NULL; break; + case AUX_openbsd_ncpus: + _ncpus = aux->au_v; + break; } } @@ -167,6 +172,14 @@ _libc_preinit(int argc, char **argv, cha } } #endif /* !PIC */ + + if (_ncpus == 0) { + int mib[] = { CTL_HW, HW_NCPU }; + size_t len = sizeof(_ncpus); + + (void)sysctl(mib, sizeof(mib) / sizeof(mib[0]), + &_ncpus, &len, NULL, 0); + } } /* ARM just had to be different... */ Index: lib/libc/gen/sysconf.c =================================================================== RCS file: /cvs/src/lib/libc/gen/sysconf.c,v diff -u -p -r1.28 sysconf.c --- lib/libc/gen/sysconf.c 19 Jul 2022 09:25:44 -0000 1.28 +++ lib/libc/gen/sysconf.c 2 Oct 2024 21:03:02 -0000 @@ -45,6 +45,8 @@ #include #include +extern int _ncpus; /* passed by the kernel as an aux vector value */ + /* * sysconf -- * get configurable system variables. @@ -461,6 +463,8 @@ sysconf(int name) } case _SC_NPROCESSORS_CONF: + if (_ncpus != 0) + return (_ncpus); mib[0] = CTL_HW; mib[1] = HW_NCPU; break; Index: lib/libc/include/thread_private.h =================================================================== RCS file: /cvs/src/lib/libc/include/thread_private.h,v diff -u -p -r1.37 thread_private.h --- lib/libc/include/thread_private.h 18 Aug 2024 02:25:51 -0000 1.37 +++ lib/libc/include/thread_private.h 2 Oct 2024 21:03:02 -0000 @@ -6,6 +6,7 @@ #define _THREAD_PRIVATE_H_ extern int __isthreaded; +extern int _ncpus; #define _MALLOC_MUTEXES 32 void _malloc_init(int); @@ -288,33 +289,38 @@ struct __sem { int shared; }; -TAILQ_HEAD(pthread_queue, pthread); - -#ifdef FUTEX +struct __cmtx_node; +SIMPLEQ_HEAD(__cmtx_node_list, __cmtx_node); -struct pthread_mutex { - volatile unsigned int lock; - int type; - pthread_t owner; - int count; - int prioceiling; +struct __cmtx { + _atomic_lock_t spin; + pthread_t owner; + struct __cmtx_node_list list; }; -struct pthread_cond { - volatile unsigned int seq; - clockid_t clock; - struct pthread_mutex *mutex; +struct __rcmtx { + _atomic_lock_t spin; + pthread_t owner; + unsigned int depth; + struct __cmtx_node_list list; }; -struct pthread_rwlock { - volatile unsigned int value; +TAILQ_HEAD(pthread_queue, pthread); + +struct pthread_locker; + +struct pthread_waiter { + volatile uint32_t wait; + pthread_cond_t cv; + pthread_t owner; + TAILQ_ENTRY(pthread_waiter) entry; }; -#else +TAILQ_HEAD(pthread_waiters, pthread_waiter); struct pthread_mutex { _atomic_lock_t lock; - struct pthread_queue lockers; + struct pthread_waiters waiters; int type; pthread_t owner; int count; @@ -323,19 +329,11 @@ struct pthread_mutex { struct pthread_cond { _atomic_lock_t lock; - struct pthread_queue waiters; + struct pthread_waiters waiters; struct pthread_mutex *mutex; clockid_t clock; }; -struct pthread_rwlock { - _atomic_lock_t lock; - pthread_t owner; - struct pthread_queue writers; - int readers; -}; -#endif /* FUTEX */ - struct pthread_mutex_attr { int ma_type; int ma_protocol; @@ -406,6 +404,14 @@ struct pthread { /* * Internal functions exported from libc's thread bits for use by libpthread */ + +#define SPIN_COUNT 128 +#if defined(__i386__) || defined(__amd64__) +#define SPIN_WAIT() asm volatile("pause": : : "memory") +#else +#define SPIN_WAIT() do { } while (0) +#endif + void _spinlock(volatile _atomic_lock_t *); int _spinlocktry(volatile _atomic_lock_t *); void _spinunlock(volatile _atomic_lock_t *); @@ -414,6 +420,34 @@ void _rthread_debug(int, const char *, . __attribute__((__format__ (printf, 2, 3))); pid_t _thread_dofork(pid_t (*_sys_fork)(void)); void _thread_finalize(void); + +/* + * simple^Wmutex for libc/libpthread to use internally + */ +void __cmtx_init(struct __cmtx *); +void __cmtx_enter(struct __cmtx *); +void __cmtx_leave(struct __cmtx *); + +#define __CMTX_INITIALIZER(_cm) { \ + .spin = _SPINLOCK_UNLOCKED, \ + .owner = NULL, \ + .list = SIMPLEQ_HEAD_INITIALIZER(_cm.list), \ +} + +/* + * recursive mutex for libc/libpthread to use internally + */ +void __rcmtx_init(struct __rcmtx *); +int __rcmtx_enter_try(struct __rcmtx *); +void __rcmtx_enter(struct __rcmtx *); +void __rcmtx_leave(struct __rcmtx *); + +#define __RCMTX_INITIALIZER(_rcm) { \ + .spin = _SPINLOCK_UNLOCKED, \ + .owner = NULL, \ + .depth = 0, \ + .list = SIMPLEQ_HEAD_INITIALIZER(_rcm.list), \ +} /* * Threading syscalls not declared in system headers Index: lib/libc/thread/Makefile.inc =================================================================== RCS file: /cvs/src/lib/libc/thread/Makefile.inc,v diff -u -p -r1.19 Makefile.inc --- lib/libc/thread/Makefile.inc 6 Feb 2020 03:13:45 -0000 1.19 +++ lib/libc/thread/Makefile.inc 2 Oct 2024 21:03:02 -0000 @@ -12,6 +12,7 @@ SRCS+= rthread.c \ rthread_libc.c \ rthread_once.c \ rthread_tls.c \ + rthread_sync.c notyet= rthread_condattr_clock.c \ rthread_equal.c \ @@ -19,14 +20,6 @@ notyet= rthread_condattr_clock.c \ spinlock.c \ spinlocktry.c -.if ${MACHINE_ARCH} == "hppa" || ${MACHINE_ARCH} == "m88k" || \ - ${MACHINE_ARCH} == "sh" -SRCS+= rthread_sync.c -.else -CFLAGS+= -DFUTEX -SRCS+= rthread_mutex.c \ - rthread_cond.c -.endif .if defined(NOPIC) CFLAGS+=-DNO_PIC Index: lib/libc/thread/rthread.c =================================================================== RCS file: /cvs/src/lib/libc/thread/rthread.c,v diff -u -p -r1.9 rthread.c --- lib/libc/thread/rthread.c 12 Oct 2020 22:06:51 -0000 1.9 +++ lib/libc/thread/rthread.c 2 Oct 2024 21:03:02 -0000 @@ -21,11 +21,14 @@ #include #include +#include +#include #include #include #include #include +#include #include "rthread.h" @@ -43,11 +46,43 @@ struct pthread _initial_thread = { /* * internal support functions */ + +/* + * Wait for the spinlock to become unlocked. + * + * On uniprocessor systems it is pointless to spin waiting for + * another thread to release the lock because this thread occupies + * the only CPU, preventing the thread holding the lock from running + * and leaving the critical section. + * + * On multiprocessor systems we spin, but not forever in case there + * are more threads than CPUs still, and more progress might be made + * if we can get the other thread to run. + */ + +static inline void +_spinlock_wait(volatile _atomic_lock_t *lock) +{ + do { + if (_ncpus > 1) { + unsigned int spin; + + for (spin = 0; spin < SPIN_COUNT; spin++) { + SPIN_WAIT(); + if (*lock == _ATOMIC_LOCK_UNLOCKED) + return; + } + } + + sched_yield(); + } while (*lock != _ATOMIC_LOCK_UNLOCKED); +} + void _spinlock(volatile _atomic_lock_t *lock) { while (_atomic_lock(lock)) - sched_yield(); + _spinlock_wait(lock); membar_enter_after_atomic(); } DEF_STRONG(_spinlock); @@ -69,6 +104,185 @@ _spinunlock(volatile _atomic_lock_t *loc *lock = _ATOMIC_LOCK_UNLOCKED; } DEF_STRONG(_spinunlock); + +/* + * libc internal mutex + * + * the lock is implemented as a list of waiters protected by a spinlock. + * threads waiting for the lock add themselves to the list, and then + * spin on their own wait variable. + * + * this avoids (some) contention on the lock data structure by + * having threads spin on their stack. the thread that "owns" the + * lock is responsible for checking if there are waiting threads and + * updating their wait variable to wake them up. + * + * it also provides ordered access to the critical section by having + * threads only woken up in the order the were queued on the lock. + * + * this in turn (ha) prevents the "thundering herd" in classic locks + * where all threads are woken up so they can try and take ownership. + */ + +struct __cmtx_node { + volatile uint32_t wait; + pthread_t owner; + SIMPLEQ_ENTRY(__cmtx_node) link; +}; + +void +__cmtx_init(struct __cmtx *cm) +{ + cm->spin = _SPINLOCK_UNLOCKED; + cm->owner = 0; + SIMPLEQ_INIT(&cm->list); +} + +void +__cmtx_enter(struct __cmtx *cm) +{ + pthread_t self = pthread_self(); + struct __cmtx_node node = { .wait = 1, .owner = self }; + pthread_t owner; + + _spinlock(&cm->spin); + owner = cm->owner; + if (owner == NULL) + cm->owner = self; + else + SIMPLEQ_INSERT_TAIL(&cm->list, &node, link); + _spinunlock(&cm->spin); + + if (owner == NULL) { + /* the spinlock ops provided enough membars */ + return; + } + + if (_ncpus > 1) { + unsigned int spin; + + for (spin = 0; spin < SPIN_COUNT; spin++) { + SPIN_WAIT(); + if (!node.wait) + goto locked; + } + } + + do { + futex(&node.wait, FUTEX_WAIT_PRIVATE, 1, NULL, NULL); + } while (node.wait); + +locked: + membar_enter(); + __builtin_prefetch(cm, 1); +} + +void +__cmtx_leave(struct __cmtx *cm) +{ + struct __cmtx_node *next; + + _spinlock(&cm->spin); + next = SIMPLEQ_FIRST(&cm->list); + if (next != NULL) { + SIMPLEQ_REMOVE_HEAD(&cm->list, link); + cm->owner = next->owner; + next->wait = 0; + } else + cm->owner = NULL; + _spinunlock(&cm->spin); /* this provides membar_exit() */ + + if (next != NULL) + futex(&next->wait, FUTEX_WAKE_PRIVATE, 1, NULL, NULL); +} + +void +__rcmtx_init(struct __rcmtx *rcm) +{ + rcm->spin = _SPINLOCK_UNLOCKED; + rcm->owner = NULL; + rcm->depth = 0; + SIMPLEQ_INIT(&rcm->list); +} + +int +__rcmtx_enter_try(struct __rcmtx *rcm) +{ + pthread_t self = pthread_self(); + pthread_t owner; + + _spinlock(&rcm->spin); + owner = rcm->owner; + if (owner == NULL) + rcm->owner = owner = self; + _spinunlock(&rcm->spin); + + if (owner != self) + return (0); + + rcm->depth++; + + return (1); +} + +void +__rcmtx_enter(struct __rcmtx *rcm) +{ + pthread_t self = pthread_self(); + struct __cmtx_node node = { .wait = 1, .owner = self }; + pthread_t owner; + + _spinlock(&rcm->spin); + owner = rcm->owner; + if (owner == NULL) + rcm->owner = owner = self; + else if (owner != self) + SIMPLEQ_INSERT_TAIL(&rcm->list, &node, link); + _spinunlock(&rcm->spin); + + if (owner != self) { + if (_ncpus > 1) { + unsigned int spin; + + for (spin = 0; spin < SPIN_COUNT; spin++) { + SPIN_WAIT(); + if (!node.wait) + goto locked; + } + } + + do { + futex(&node.wait, FUTEX_WAIT_PRIVATE, 1, NULL, NULL); + } while (node.wait); + +locked: + membar_enter(); + } + + rcm->depth++; +} + +void +__rcmtx_leave(struct __rcmtx *rcm) +{ + struct __cmtx_node *next; + + if (--rcm->depth > 0) + return; + + _spinlock(&rcm->spin); + next = SIMPLEQ_FIRST(&rcm->list); + if (next != NULL) { + SIMPLEQ_REMOVE_HEAD(&rcm->list, link); + rcm->owner = next->owner; + next->wait = 0; + } else + rcm->owner = NULL; + _spinunlock(&rcm->spin); /* this provides membar_exit() */ + + if (next != NULL) + futex(&next->wait, FUTEX_WAKE_PRIVATE, 1, NULL, NULL); +} static void _rthread_init(void) Index: lib/libc/thread/rthread_file.c =================================================================== RCS file: /cvs/src/lib/libc/thread/rthread_file.c,v diff -u -p -r1.3 rthread_file.c --- lib/libc/thread/rthread_file.c 27 Dec 2022 17:10:06 -0000 1.3 +++ lib/libc/thread/rthread_file.c 2 Oct 2024 21:03:02 -0000 @@ -38,6 +38,9 @@ * */ +#include +#include +#include #include #include #include @@ -48,16 +51,15 @@ #include "rthread_cb.h" /* - * The FILE lock structure. The FILE *fp is locked if the owner is - * not NULL. If not locked, the file lock structure can be - * reassigned to a different file by setting fp. + * The FILE lock structure. The FILE *fp locking is handled by the __rcmtx. + * The file lock structure can be reassigned to a different file by setting fp. */ struct file_lock { - LIST_ENTRY(file_lock) entry; /* Entry if file list. */ - FILE *fp; /* The target file. */ - struct pthread_queue lockers; - pthread_t owner; - int count; + LIST_ENTRY(file_lock) entry; /* Entry if file list. */ + FILE *fp; + + unsigned int refs; + struct __rcmtx lock; }; /* @@ -83,223 +85,201 @@ struct file_lock { * allocated statically in the hope that there won't be too many * collisions that require a malloc and an element added to the list. */ -static struct static_file_lock { - LIST_HEAD(file_list_head, file_lock) head; +static struct file_lock_bucket { + volatile unsigned int initted; + struct __cmtx lock; + LIST_HEAD(file_lock_list, file_lock) head; struct file_lock fl; } flh[NUM_HEADS]; -/* Lock for accesses to the hash table: */ +/* Lock for initialisation of the hash table: */ static _atomic_lock_t hash_lock = _SPINLOCK_UNLOCKED; +static struct file_lock_bucket * +file_bucket(FILE *fp) +{ + int idx = file_idx(fp); + struct file_lock_bucket *flb = &flh[idx]; + + if (!flb->initted) { + _spinlock(&hash_lock); + if (!flb->initted) { + __cmtx_init(&flb->lock); + LIST_INIT(&flb->head); + + __rcmtx_init(&flb->fl.lock); + + /* XXX barrier? */ + flb->initted = 1; + } + _spinunlock(&hash_lock); + } + + return (flb); +} + /* * Find a lock structure for a FILE, return NULL if the file is * not locked: */ -static -struct file_lock * -find_lock(int idx, FILE *fp) +static struct file_lock * +find_lock(struct file_lock_bucket *flb, FILE *fp) { struct file_lock *p; /* Check if the file is locked using the static structure: */ - if (flh[idx].fl.fp == fp && flh[idx].fl.owner != NULL) + if (flb->fl.fp == fp) { /* Return a pointer to the static lock: */ - p = &flh[idx].fl; - else { - /* Point to the first dynamic lock: */ - p = LIST_FIRST(&flh[idx].head); - + p = &flb->fl; + } else { /* * Loop through the dynamic locks looking for the * target file: */ - while (p != NULL && (p->fp != fp || p->owner == NULL)) - /* Not this file, try the next: */ - p = LIST_NEXT(p, entry); + LIST_FOREACH(p, &flb->head, entry) { + if (p->fp == fp) + break; + } } - return(p); + + return (p); } /* * Lock a file, assuming that there is no lock structure currently * assigned to it. */ -static -struct file_lock * -do_lock(int idx, FILE *fp) +static struct file_lock * +do_lock(struct file_lock_bucket *flb, FILE *fp) { struct file_lock *p; /* Check if the static structure is not being used: */ - if (flh[idx].fl.owner == NULL) { + if (flb->fl.fp == NULL) { /* Return a pointer to the static lock: */ - p = &flh[idx].fl; - } - else { - /* Point to the first dynamic lock: */ - p = LIST_FIRST(&flh[idx].head); - + p = &flb->fl; + } else { /* * Loop through the dynamic locks looking for a * lock structure that is not being used: */ - while (p != NULL && p->owner != NULL) - /* This one is used, try the next: */ - p = LIST_NEXT(p, entry); + LIST_FOREACH(p, &flb->head, entry) { + if (p->fp == NULL) + break; + } } /* * If an existing lock structure has not been found, * allocate memory for a new one: */ - if (p == NULL && (p = (struct file_lock *) - malloc(sizeof(struct file_lock))) != NULL) { + if (p == NULL) { + p = malloc(sizeof(*p)); + if (p == NULL) + return (NULL); + + __rcmtx_init(&p->lock); + p->refs = 0; + /* Add the new element to the list: */ - LIST_INSERT_HEAD(&flh[idx].head, p, entry); + LIST_INSERT_HEAD(&flb->head, p, entry); } - /* Check if there is a lock structure to acquire: */ - if (p != NULL) { - /* Acquire the lock for the running thread: */ - p->fp = fp; - p->owner = pthread_self(); - p->count = 1; - TAILQ_INIT(&p->lockers); - } - return(p); + p->fp = fp; + + return (p); } void _thread_flockfile(FILE * fp) { - int idx = file_idx(fp); - struct file_lock *p; - pthread_t self = pthread_self(); + struct file_lock_bucket *flb = file_bucket(fp); + struct file_lock *p; - /* Lock the hash table: */ - _spinlock(&hash_lock); + __cmtx_enter(&flb->lock); /* Get a pointer to any existing lock for the file: */ - if ((p = find_lock(idx, fp)) == NULL) { + p = find_lock(flb, fp); + if (p == NULL) { /* * The file is not locked, so this thread can * grab the lock: */ - do_lock(idx, fp); - - /* - * The file is already locked, so check if the - * running thread is the owner: - */ - } else if (p->owner == self) { - /* - * The running thread is already the - * owner, so increment the count of - * the number of times it has locked - * the file: - */ - p->count++; - } else { - /* - * The file is locked for another thread. - * Append this thread to the queue of - * threads waiting on the lock. - */ - TAILQ_INSERT_TAIL(&p->lockers,self,waiting); - while (p->owner != self) { - __thrsleep(self, 0, NULL, &hash_lock, NULL); - _spinlock(&hash_lock); + p = do_lock(flb, fp); + if (p == NULL) { + __cmtx_leave(&flb->lock); + /* XXX unable to allocate dynamic lock! */ + /* abort(); */ + return; } } + p->refs++; + __cmtx_leave(&flb->lock); - /* Unlock the hash table: */ - _spinunlock(&hash_lock); + /* keep ref */ + __rcmtx_enter(&p->lock); } int _thread_ftrylockfile(FILE * fp) { - int ret = -1; - int idx = file_idx(fp); - struct file_lock *p; - - /* Lock the hash table: */ - _spinlock(&hash_lock); + struct file_lock_bucket *flb = file_bucket(fp); + struct file_lock *p; + int rv = -1; + __cmtx_enter(&flb->lock); /* Get a pointer to any existing lock for the file: */ - if ((p = find_lock(idx, fp)) == NULL) { + p = find_lock(flb, fp); + if (p == NULL) { /* * The file is not locked, so this thread can * grab the lock: */ - p = do_lock(idx, fp); - - /* - * The file is already locked, so check if the - * running thread is the owner: - */ - } else if (p->owner == pthread_self()) { - /* - * The running thread is already the - * owner, so increment the count of - * the number of times it has locked - * the file: - */ - p->count++; - } else { - /* - * The file is locked for another thread, - * so this try fails. - */ - p = NULL; + p = do_lock(flb, fp); + if (p == NULL) { + __cmtx_leave(&flb->lock); + /* XXX unable to allocate dynamic lock! */ + /* abort(); */ + return (-1); + } } + if (__rcmtx_enter_try(&p->lock)) { + p->refs++; + rv = 0; + } else if (p->refs == 0) { + /* This gives the entry back to the bucket: */ + p->fp = NULL; + } + __cmtx_leave(&flb->lock); - /* Unlock the hash table: */ - _spinunlock(&hash_lock); - - /* Check if the lock was obtained: */ - if (p != NULL) - /* Return success: */ - ret = 0; - - return (ret); + return (rv); } -void +void _thread_funlockfile(FILE * fp) { - int idx = file_idx(fp); - struct file_lock *p; + struct file_lock_bucket *flb = file_bucket(fp); + struct file_lock *p; - /* Lock the hash table: */ - _spinlock(&hash_lock); + __cmtx_enter(&flb->lock); + /* Get a pointer to the lock for the file: */ + p = find_lock(flb, fp); + if (--p->refs == 0) { + /* This gives the entry back to the bucket: */ + p->fp = NULL; + } + __cmtx_leave(&flb->lock); /* - * Get a pointer to the lock for the file and check that - * the running thread is the one with the lock: + * we can do this after dropping the ref but before releasing + * the lock because file_lock structures are never freed. + * + * the worst that can happen is _thread_flockfile() taking + * over the struct and having __rcmtx_enter wait for for this + * __rcmtx_leave call. + * _thread_ftrylockfile calls __rcmtx_entry_try while holding + * flb->lock, so it cannot see a state where fp is NULL. */ - if ((p = find_lock(idx, fp)) != NULL && p->owner == pthread_self()) { - /* - * Check if this thread has locked the FILE - * more than once: - */ - if (--p->count == 0) { - /* Get the new owner of the lock: */ - if ((p->owner = TAILQ_FIRST(&p->lockers)) != NULL) { - /* Pop the thread off the queue: */ - TAILQ_REMOVE(&p->lockers,p->owner,waiting); - - /* - * This is the first lock for the new - * owner: - */ - p->count = 1; - - __thrwakeup(p->owner, 1); - } - } - } - /* Unlock the hash table: */ - _spinunlock(&hash_lock); + __rcmtx_leave(&p->lock); } Index: lib/libc/thread/rthread_libc.c =================================================================== RCS file: /cvs/src/lib/libc/thread/rthread_libc.c,v diff -u -p -r1.4 rthread_libc.c --- lib/libc/thread/rthread_libc.c 6 Jan 2021 19:54:17 -0000 1.4 +++ lib/libc/thread/rthread_libc.c 2 Oct 2024 21:03:02 -0000 @@ -152,24 +152,9 @@ _thread_mutex_destroy(void **mutex) /* * the malloc lock */ -#ifndef FUTEX -#define MALLOC_LOCK_INITIALIZER(n) { \ - _SPINLOCK_UNLOCKED, \ - TAILQ_HEAD_INITIALIZER(malloc_lock[n].lockers), \ - PTHREAD_MUTEX_DEFAULT, \ - NULL, \ - 0, \ - -1 } -#else -#define MALLOC_LOCK_INITIALIZER(n) { \ - _SPINLOCK_UNLOCKED, \ - PTHREAD_MUTEX_DEFAULT, \ - NULL, \ - 0, \ - -1 } -#endif +#define MALLOC_LOCK_INITIALIZER(n) __CMTX_INITIALIZER(malloc_lock[n]) -static struct pthread_mutex malloc_lock[_MALLOC_MUTEXES] = { +static struct __cmtx malloc_lock[_MALLOC_MUTEXES] = { MALLOC_LOCK_INITIALIZER(0), MALLOC_LOCK_INITIALIZER(1), MALLOC_LOCK_INITIALIZER(2), @@ -204,51 +189,16 @@ static struct pthread_mutex malloc_lock[ MALLOC_LOCK_INITIALIZER(31) }; -static pthread_mutex_t malloc_mutex[_MALLOC_MUTEXES] = { - &malloc_lock[0], - &malloc_lock[1], - &malloc_lock[2], - &malloc_lock[3], - &malloc_lock[4], - &malloc_lock[5], - &malloc_lock[6], - &malloc_lock[7], - &malloc_lock[8], - &malloc_lock[9], - &malloc_lock[10], - &malloc_lock[11], - &malloc_lock[12], - &malloc_lock[13], - &malloc_lock[14], - &malloc_lock[15], - &malloc_lock[16], - &malloc_lock[17], - &malloc_lock[18], - &malloc_lock[19], - &malloc_lock[20], - &malloc_lock[21], - &malloc_lock[22], - &malloc_lock[23], - &malloc_lock[24], - &malloc_lock[25], - &malloc_lock[26], - &malloc_lock[27], - &malloc_lock[28], - &malloc_lock[29], - &malloc_lock[30], - &malloc_lock[31] -}; - void _thread_malloc_lock(int i) { - pthread_mutex_lock(&malloc_mutex[i]); + __cmtx_enter(&malloc_lock[i]); } void _thread_malloc_unlock(int i) { - pthread_mutex_unlock(&malloc_mutex[i]); + __cmtx_leave(&malloc_lock[i]); } static void @@ -256,31 +206,25 @@ _thread_malloc_reinit(void) { int i; - for (i = 0; i < _MALLOC_MUTEXES; i++) { - malloc_lock[i].lock = _SPINLOCK_UNLOCKED; -#ifndef FUTEX - TAILQ_INIT(&malloc_lock[i].lockers); -#endif - malloc_lock[i].owner = NULL; - malloc_lock[i].count = 0; - } + for (i = 0; i < _MALLOC_MUTEXES; i++) + __cmtx_init(&malloc_lock[i]); } /* * atexit lock */ -static _atomic_lock_t atexit_lock = _SPINLOCK_UNLOCKED; +static struct __cmtx atexit_lock = __CMTX_INITIALIZER(atexit_lock); void _thread_atexit_lock(void) { - _spinlock(&atexit_lock); + __cmtx_enter(&atexit_lock); } void _thread_atexit_unlock(void) { - _spinunlock(&atexit_lock); + __cmtx_leave(&atexit_lock); } /* @@ -303,18 +247,18 @@ _thread_atfork_unlock(void) /* * arc4random lock */ -static _atomic_lock_t arc4_lock = _SPINLOCK_UNLOCKED; +static struct __cmtx arc4_lock = __CMTX_INITIALIZER(arc4_lock); void _thread_arc4_lock(void) { - _spinlock(&arc4_lock); + __cmtx_enter(&arc4_lock); } void _thread_arc4_unlock(void) { - _spinunlock(&arc4_lock); + __cmtx_leave(&arc4_lock); } pid_t Index: lib/libc/thread/rthread_mutex.c =================================================================== RCS file: /cvs/src/lib/libc/thread/rthread_mutex.c,v diff -u -p -r1.6 rthread_mutex.c --- lib/libc/thread/rthread_mutex.c 20 Sep 2024 02:00:46 -0000 1.6 +++ lib/libc/thread/rthread_mutex.c 2 Oct 2024 21:03:02 -0000 @@ -36,14 +36,7 @@ enum { CONTENDED = 2, /* threads waiting for this mutex */ }; -#define SPIN_COUNT 128 -#if defined(__i386__) || defined(__amd64__) -#define SPIN_WAIT() asm volatile("pause": : : "memory") -#else -#define SPIN_WAIT() do { } while (0) -#endif - -static _atomic_lock_t static_init_lock = _SPINLOCK_UNLOCKED; +static struct __cmtx static_init_lock = __CMTX_INITIALIZER(static_init_lock); int pthread_mutex_init(pthread_mutex_t *mutexp, const pthread_mutexattr_t *attr) @@ -151,10 +144,10 @@ _rthread_mutex_timedlock(pthread_mutex_t * is NULL. */ if (*mutexp == NULL) { - _spinlock(&static_init_lock); + __cmtx_enter(&static_init_lock); if (*mutexp == NULL) error = pthread_mutex_init(mutexp, NULL); - _spinunlock(&static_init_lock); + __cmtx_leave(&static_init_lock); if (error != 0) return (EINVAL); } Index: lib/libc/thread/rthread_sync.c =================================================================== RCS file: /cvs/src/lib/libc/thread/rthread_sync.c,v diff -u -p -r1.6 rthread_sync.c --- lib/libc/thread/rthread_sync.c 10 Jan 2024 04:28:43 -0000 1.6 +++ lib/libc/thread/rthread_sync.c 2 Oct 2024 21:03:02 -0000 @@ -28,9 +28,10 @@ #include #include "rthread.h" +#include "synch.h" #include "cancel.h" /* in libc/include */ -static _atomic_lock_t static_init_lock = _SPINLOCK_UNLOCKED; +static struct __cmtx static_init_lock = __CMTX_INITIALIZER(static_init_lock); /* * mutexen @@ -44,7 +45,7 @@ pthread_mutex_init(pthread_mutex_t *mute if (!mutex) return (errno); mutex->lock = _SPINLOCK_UNLOCKED; - TAILQ_INIT(&mutex->lockers); + TAILQ_INIT(&mutex->waiters); if (attr == NULL) { mutex->type = PTHREAD_MUTEX_DEFAULT; mutex->prioceiling = -1; @@ -68,7 +69,7 @@ pthread_mutex_destroy(pthread_mutex_t *m mutex = (struct pthread_mutex *)*mutexp; if (mutex) { if (mutex->count || mutex->owner != NULL || - !TAILQ_EMPTY(&mutex->lockers)) { + !TAILQ_EMPTY(&mutex->waiters)) { #define MSG "pthread_mutex_destroy on mutex with waiters!\n" write(2, MSG, sizeof(MSG) - 1); #undef MSG @@ -87,6 +88,9 @@ _rthread_mutex_lock(pthread_mutex_t *mut { struct pthread_mutex *mutex; pthread_t self = pthread_self(); + pthread_t owner; + struct pthread_waiter waiter = { .owner = self, .wait = 1 }; + unsigned int spin; int ret = 0; /* @@ -96,10 +100,10 @@ _rthread_mutex_lock(pthread_mutex_t *mut * is NULL. */ if (*mutexp == NULL) { - _spinlock(&static_init_lock); + __cmtx_enter(&static_init_lock); if (*mutexp == NULL) ret = pthread_mutex_init(mutexp, NULL); - _spinunlock(&static_init_lock); + __cmtx_leave(&static_init_lock); if (ret != 0) return (EINVAL); } @@ -107,62 +111,96 @@ _rthread_mutex_lock(pthread_mutex_t *mut _rthread_debug(5, "%p: mutex_lock %p\n", (void *)self, (void *)mutex); _spinlock(&mutex->lock); - if (mutex->owner == NULL && TAILQ_EMPTY(&mutex->lockers)) { + owner = mutex->owner; + if (owner == NULL) { assert(mutex->count == 0); - mutex->owner = self; - } else if (mutex->owner == self) { + assert(TAILQ_EMPTY(&mutex->waiters)); + mutex->owner = owner = self; + } else if (owner == self) { assert(mutex->count > 0); /* already owner? handle recursive behavior */ - if (mutex->type != PTHREAD_MUTEX_RECURSIVE) - { - if (trywait || - mutex->type == PTHREAD_MUTEX_ERRORCHECK) { - _spinunlock(&mutex->lock); - return (trywait ? EBUSY : EDEADLK); + if (mutex->type != PTHREAD_MUTEX_RECURSIVE) { + if (trywait) { + ret = EBUSY; + goto err; } /* self-deadlock is disallowed by strict */ - if (mutex->type == PTHREAD_MUTEX_STRICT_NP && - abstime == NULL) + if (mutex->type == PTHREAD_MUTEX_STRICT_NP) abort(); - /* self-deadlock, possibly until timeout */ - while (__thrsleep(self, CLOCK_REALTIME, abstime, - &mutex->lock, NULL) != EWOULDBLOCK) - _spinlock(&mutex->lock); - return (ETIMEDOUT); - } - if (mutex->count == INT_MAX) { - _spinunlock(&mutex->lock); - return (EAGAIN); + /* + * The pthread_mutex_lock() function may fail if + * a deadlock condition was detected. + */ + ret = EDEADLK; + goto err; } } else if (trywait) { /* try failed */ - _spinunlock(&mutex->lock); - return (EBUSY); + ret = EBUSY; + goto err; } else { - /* add to the wait queue and block until at the head */ - TAILQ_INSERT_TAIL(&mutex->lockers, self, waiting); - while (mutex->owner != self) { - ret = __thrsleep(self, CLOCK_REALTIME, abstime, - &mutex->lock, NULL); - _spinlock(&mutex->lock); - assert(mutex->owner != NULL); - if (ret == EWOULDBLOCK) { - if (mutex->owner == self) - break; - TAILQ_REMOVE(&mutex->lockers, self, waiting); - _spinunlock(&mutex->lock); - return (ETIMEDOUT); + /* add to the wait queue */ + TAILQ_INSERT_TAIL(&mutex->waiters, &waiter, entry); + } + _spinunlock(&mutex->lock); + + if (owner == self) { + int count = mutex->count; + if (count == INT_MAX) + return (EAGAIN); + mutex->count = count + 1; + + /* the spinlock has done enough membars */ + return (0); + } + +#if 0 + if (ncpus > 1) { + unsigned int spin; + + for (spin = 0; spin < SPIN_COUNT; spin++) { + SPIN_WAIT(); + if (!waiter.wait) { + membar_enter(); + return (0); } } } +#endif - mutex->count++; - _spinunlock(&mutex->lock); - + do { + ret = _twait(&waiter.wait, 1, CLOCK_REALTIME, abstime); + if (ret == ETIMEDOUT) + goto tmo; + } while (waiter.wait); + +#if 0 + assert(mutex->owner == self); + assert(mutex->count > 0); +#endif + __builtin_prefetch(mutex, 1); + membar_enter(); return (0); + +tmo: + assert(abstime != NULL); + _spinlock(&mutex->lock); + if (waiter.wait) { + /* take ourself off the wait queue */ + TAILQ_REMOVE(&mutex->waiters, &waiter, entry); + } else { + /* the timeout lost a race with actually getting the lock */ + assert(mutex->owner == self); + assert(mutex->count > 0); + ret = 0; + } + /* FALLTHROUGH */ +err: + _spinunlock(&mutex->lock); + return (ret); } int @@ -189,6 +227,7 @@ pthread_mutex_unlock(pthread_mutex_t *mu { pthread_t self = pthread_self(); struct pthread_mutex *mutex = (struct pthread_mutex *)*mutexp; + int count; _rthread_debug(5, "%p: mutex_unlock %p\n", (void *)self, (void *)mutex); @@ -221,17 +260,29 @@ pthread_mutex_unlock(pthread_mutex_t *mu } } - if (--mutex->count == 0) { - pthread_t next; + count = mutex->count - 1; + if (count == 0) { + struct pthread_waiter *nwaiter = NULL; _spinlock(&mutex->lock); - mutex->owner = next = TAILQ_FIRST(&mutex->lockers); - if (next != NULL) - TAILQ_REMOVE(&mutex->lockers, next, waiting); + nwaiter = TAILQ_FIRST(&mutex->waiters); + if (nwaiter != NULL) { + /* move ownership to the next thread from the list */ + TAILQ_REMOVE(&mutex->waiters, nwaiter, entry); + mutex->owner = nwaiter->owner; + /* leave mutex->count at 1 for the next thread */ + nwaiter->owner = NULL; + nwaiter->wait = 0; /* let them proceed */ + } else { + mutex->owner = NULL; + mutex->count = 0; + } _spinunlock(&mutex->lock); - if (next != NULL) - __thrwakeup(next, 1); - } + + if (nwaiter != NULL) + _wake(&nwaiter->wait, 1); + } else + mutex->count = count; return (0); } @@ -282,18 +333,20 @@ pthread_cond_destroy(pthread_cond_t *con } int -pthread_cond_timedwait(pthread_cond_t *condp, pthread_mutex_t *mutexp, +_rthread_cond_timedwait(pthread_cond_t *condp, pthread_mutex_t *mutexp, const struct timespec *abstime) { pthread_cond_t cond; struct pthread_mutex *mutex = (struct pthread_mutex *)*mutexp; struct tib *tib = TIB_GET(); pthread_t self = tib->tib_thread; - pthread_t next; + struct pthread_waiter waiter = { .owner = self, .wait = 1 }; + struct pthread_waiter *nwaiter; + pthread_t owner; int mutex_count; int canceled = 0; - int rv = 0; int error; + int rv = 0; PREP_CANCEL_POINT(tib); if (!*condp) @@ -317,10 +370,6 @@ pthread_cond_timedwait(pthread_cond_t *c abort(); } - if (abstime == NULL || abstime->tv_nsec < 0 || - abstime->tv_nsec >= 1000000000) - return (EINVAL); - ENTER_DELAYED_CANCEL_POINT(tib, self); _spinlock(&cond->lock); @@ -340,35 +389,35 @@ pthread_cond_timedwait(pthread_cond_t *c /* snag the count in case this is a recursive mutex */ mutex_count = mutex->count; + waiter.cv = cond; + TAILQ_INSERT_TAIL(&cond->waiters, &waiter, entry); + /* transfer from the mutex queue to the condvar queue */ _spinlock(&mutex->lock); - self->blocking_cond = cond; - TAILQ_INSERT_TAIL(&cond->waiters, self, waiting); _spinunlock(&cond->lock); - /* wake the next guy blocked on the mutex */ - mutex->count = 0; - mutex->owner = next = TAILQ_FIRST(&mutex->lockers); - if (next != NULL) { - TAILQ_REMOVE(&mutex->lockers, next, waiting); - __thrwakeup(next, 1); + nwaiter = TAILQ_FIRST(&mutex->waiters); + if (nwaiter != NULL) { + /* move ownership to the next thread from the list */ + TAILQ_REMOVE(&mutex->waiters, nwaiter, entry); + mutex->owner = nwaiter->owner; + mutex->count = 1; + nwaiter->wait = 0; /* let them proceed */ + } else { + mutex->owner = NULL; + mutex->count = 0; } + _spinunlock(&mutex->lock); - /* wait until we're the owner of the mutex again */ - while (mutex->owner != self) { - error = __thrsleep(self, cond->clock, abstime, - &mutex->lock, &self->delayed_cancel); + /* wake the next guy blocked on the mutex */ + if (nwaiter != NULL) + _wake(&nwaiter->wait, 1); - /* - * If abstime == NULL, then we're definitely waiting - * on the mutex instead of the condvar, and are - * just waiting for mutex ownership, regardless of - * why we woke up. - */ - if (abstime == NULL) { - _spinlock(&mutex->lock); + /* wait until we're the owner of the mutex again */ + while (waiter.wait) { + error = _twait(&waiter.wait, 1, cond->clock, abstime); + if (error == 0 || error == EAGAIN) continue; - } /* * If we took a normal signal (not from @@ -377,10 +426,8 @@ pthread_cond_timedwait(pthread_cond_t *c */ if ((error == EINTR || error == ECANCELED) && (tib->tib_canceled == 0 || - (tib->tib_cantcancel & CANCEL_DISABLED))) { - _spinlock(&mutex->lock); + (tib->tib_cantcancel & CANCEL_DISABLED))) continue; - } /* * The remaining reasons for waking up (normal @@ -388,50 +435,55 @@ pthread_cond_timedwait(pthread_cond_t *c * we won't be staying in the condvar queue and * we'll no longer time out or be cancelable. */ - abstime = NULL; LEAVE_CANCEL_POINT_INNER(tib, 0); + canceled = 1; + + /* if timeout or canceled, make note of that */ + if (error == ETIMEDOUT) + rv = ETIMEDOUT; + + abstime = NULL; - /* - * If we're no longer in the condvar's queue then - * we're just waiting for mutex ownership. Need - * cond->lock here to prevent race with cond_signal(). - */ _spinlock(&cond->lock); - if (self->blocking_cond == NULL) { + if (!waiter.wait) { + /* we lost a race with a signal and mutex */ _spinunlock(&cond->lock); - _spinlock(&mutex->lock); - continue; + assert(mutex->owner == self); + rv = 0; + break; } - assert(self->blocking_cond == cond); - /* if timeout or canceled, make note of that */ - if (error == EWOULDBLOCK) - rv = ETIMEDOUT; - else if (error == EINTR) - canceled = 1; + /* something has already moved us off the cond wait list */ + if (waiter.cv == NULL) { + _spinunlock(&cond->lock); + continue; + } - /* transfer between the queues */ - TAILQ_REMOVE(&cond->waiters, self, waiting); - assert(mutex == cond->mutex); + /* move to the mutex */ + assert(waiter.cv == cond); + waiter.cv = NULL; + TAILQ_REMOVE(&cond->waiters, &waiter, entry); if (TAILQ_EMPTY(&cond->waiters)) cond->mutex = NULL; - self->blocking_cond = NULL; - _spinunlock(&cond->lock); + _spinlock(&mutex->lock); + _spinunlock(&cond->lock); + owner = mutex->owner; /* mutex unlocked right now? */ - if (mutex->owner == NULL && - TAILQ_EMPTY(&mutex->lockers)) { - assert(mutex->count == 0); + if (owner == NULL) { mutex->owner = self; + _spinunlock(&mutex->lock); break; } - TAILQ_INSERT_TAIL(&mutex->lockers, self, waiting); + assert(owner != self); + + TAILQ_INSERT_TAIL(&mutex->waiters, &waiter, entry); + _spinunlock(&mutex->lock); } /* restore the mutex's count */ mutex->count = mutex_count; - _spinunlock(&mutex->lock); LEAVE_CANCEL_POINT_INNER(tib, canceled); @@ -439,150 +491,29 @@ pthread_cond_timedwait(pthread_cond_t *c } int -pthread_cond_wait(pthread_cond_t *condp, pthread_mutex_t *mutexp) +pthread_cond_timedwait(pthread_cond_t *condp, pthread_mutex_t *mutexp, + const struct timespec *abstime) { - pthread_cond_t cond; - struct pthread_mutex *mutex = (struct pthread_mutex *)*mutexp; - struct tib *tib = TIB_GET(); - pthread_t self = tib->tib_thread; - pthread_t next; - int mutex_count; - int canceled = 0; - int error; - PREP_CANCEL_POINT(tib); - - if (!*condp) - if ((error = pthread_cond_init(condp, NULL))) - return (error); - cond = *condp; - _rthread_debug(5, "%p: cond_wait %p,%p\n", (void *)self, - (void *)cond, (void *)mutex); - - if (mutex == NULL) -#if PTHREAD_MUTEX_DEFAULT == PTHREAD_MUTEX_ERRORCHECK - return (EPERM); -#else - abort(); -#endif - - if (mutex->owner != self) { - if (mutex->type == PTHREAD_MUTEX_ERRORCHECK) - return (EPERM); - else - abort(); - } - - ENTER_DELAYED_CANCEL_POINT(tib, self); - - _spinlock(&cond->lock); - - /* mark the condvar as being associated with this mutex */ - if (cond->mutex == NULL) { - cond->mutex = mutex; - assert(TAILQ_EMPTY(&cond->waiters)); - } else if (cond->mutex != mutex) { - assert(cond->mutex == mutex); - _spinunlock(&cond->lock); - LEAVE_CANCEL_POINT_INNER(tib, 1); + if (abstime == NULL || abstime->tv_nsec < 0 || + abstime->tv_nsec >= 1000000000) return (EINVAL); - } else - assert(! TAILQ_EMPTY(&cond->waiters)); - - /* snag the count in case this is a recursive mutex */ - mutex_count = mutex->count; - - /* transfer from the mutex queue to the condvar queue */ - _spinlock(&mutex->lock); - self->blocking_cond = cond; - TAILQ_INSERT_TAIL(&cond->waiters, self, waiting); - _spinunlock(&cond->lock); - /* wake the next guy blocked on the mutex */ - mutex->count = 0; - mutex->owner = next = TAILQ_FIRST(&mutex->lockers); - if (next != NULL) { - TAILQ_REMOVE(&mutex->lockers, next, waiting); - __thrwakeup(next, 1); - } - - /* wait until we're the owner of the mutex again */ - while (mutex->owner != self) { - error = __thrsleep(self, 0, NULL, &mutex->lock, - &self->delayed_cancel); - - /* - * If we took a normal signal (not from - * cancellation) then we should just go back to - * sleep without changing state (timeouts, etc). - */ - if ((error == EINTR || error == ECANCELED) && - (tib->tib_canceled == 0 || - (tib->tib_cantcancel & CANCEL_DISABLED))) { - _spinlock(&mutex->lock); - continue; - } - - /* - * The remaining reasons for waking up (normal - * wakeup and cancellation) all mean that we won't - * be staying in the condvar queue and we'll no - * longer be cancelable. - */ - LEAVE_CANCEL_POINT_INNER(tib, 0); - - /* - * If we're no longer in the condvar's queue then - * we're just waiting for mutex ownership. Need - * cond->lock here to prevent race with cond_signal(). - */ - _spinlock(&cond->lock); - if (self->blocking_cond == NULL) { - _spinunlock(&cond->lock); - _spinlock(&mutex->lock); - continue; - } - assert(self->blocking_cond == cond); - - /* if canceled, make note of that */ - if (error == EINTR) - canceled = 1; - - /* transfer between the queues */ - TAILQ_REMOVE(&cond->waiters, self, waiting); - assert(mutex == cond->mutex); - if (TAILQ_EMPTY(&cond->waiters)) - cond->mutex = NULL; - self->blocking_cond = NULL; - _spinunlock(&cond->lock); - _spinlock(&mutex->lock); - - /* mutex unlocked right now? */ - if (mutex->owner == NULL && - TAILQ_EMPTY(&mutex->lockers)) { - assert(mutex->count == 0); - mutex->owner = self; - break; - } - TAILQ_INSERT_TAIL(&mutex->lockers, self, waiting); - } - - /* restore the mutex's count */ - mutex->count = mutex_count; - _spinunlock(&mutex->lock); - - LEAVE_CANCEL_POINT_INNER(tib, canceled); - - return (0); + return (_rthread_cond_timedwait(condp, mutexp, abstime)); } +int +pthread_cond_wait(pthread_cond_t *condp, pthread_mutex_t *mutexp) +{ + return (_rthread_cond_timedwait(condp, mutexp, NULL)); +} int pthread_cond_signal(pthread_cond_t *condp) { pthread_cond_t cond; struct pthread_mutex *mutex; - pthread_t thread; - int wakeup; + struct pthread_waiter *nwaiter; + pthread_t owner; /* uninitialized? Then there's obviously no one waiting! */ if (!*condp) @@ -591,17 +522,18 @@ pthread_cond_signal(pthread_cond_t *cond cond = *condp; _rthread_debug(5, "%p: cond_signal %p,%p\n", (void *)pthread_self(), (void *)cond, (void *)cond->mutex); + _spinlock(&cond->lock); - thread = TAILQ_FIRST(&cond->waiters); - if (thread == NULL) { + nwaiter = TAILQ_FIRST(&cond->waiters); + if (nwaiter == NULL) { assert(cond->mutex == NULL); _spinunlock(&cond->lock); return (0); } - assert(thread->blocking_cond == cond); - TAILQ_REMOVE(&cond->waiters, thread, waiting); - thread->blocking_cond = NULL; + assert(nwaiter->cv == cond); + nwaiter->cv = NULL; + TAILQ_REMOVE(&cond->waiters, nwaiter, entry); mutex = cond->mutex; assert(mutex != NULL); @@ -612,14 +544,19 @@ pthread_cond_signal(pthread_cond_t *cond _spinlock(&mutex->lock); _spinunlock(&cond->lock); - wakeup = mutex->owner == NULL && TAILQ_EMPTY(&mutex->lockers); - if (wakeup) - mutex->owner = thread; - else - TAILQ_INSERT_TAIL(&mutex->lockers, thread, waiting); + owner = mutex->owner; + if (owner == NULL) { + mutex->owner = nwaiter->owner; + /* mutex->count will be fixed by cond wait tail */ + nwaiter->wait = 0; + } else { + assert(owner != nwaiter->owner); + TAILQ_INSERT_TAIL(&mutex->waiters, nwaiter, entry); + } _spinunlock(&mutex->lock); - if (wakeup) - __thrwakeup(thread, 1); + + if (owner == NULL) + _wake(&nwaiter->wait, 1); return (0); } @@ -629,9 +566,9 @@ pthread_cond_broadcast(pthread_cond_t *c { pthread_cond_t cond; struct pthread_mutex *mutex; - pthread_t thread; - pthread_t p; - int wakeup; + struct pthread_waiter *nwaiter, *nnwaiter; + struct pthread_waiter **lwaiterp; + pthread_t owner; /* uninitialized? Then there's obviously no one waiting! */ if (!*condp) @@ -640,51 +577,64 @@ pthread_cond_broadcast(pthread_cond_t *c cond = *condp; _rthread_debug(5, "%p: cond_broadcast %p,%p\n", (void *)pthread_self(), (void *)cond, (void *)cond->mutex); + _spinlock(&cond->lock); - thread = TAILQ_FIRST(&cond->waiters); - if (thread == NULL) { + nwaiter = TAILQ_FIRST(&cond->waiters); + if (nwaiter == NULL) { assert(cond->mutex == NULL); _spinunlock(&cond->lock); return (0); } + lwaiterp = cond->waiters.tqh_last; mutex = cond->mutex; assert(mutex != NULL); + cond->mutex = NULL; + TAILQ_INIT(&cond->waiters); + /* walk the list, clearing the "blocked on condvar" pointer */ - p = thread; - do - p->blocking_cond = NULL; - while ((p = TAILQ_NEXT(p, waiting)) != NULL); + nnwaiter = nwaiter; + do { + assert(nnwaiter->cv == cond); + nnwaiter->cv = NULL; + + nnwaiter = TAILQ_NEXT(nnwaiter, entry); + } while (nnwaiter != NULL); + + _spinlock(&mutex->lock); + _spinunlock(&cond->lock); + + /* if the mutex is unowned, we can wake up the first waiter now */ + owner = mutex->owner; + if (owner == NULL) { + nnwaiter = TAILQ_NEXT(nwaiter, entry); + + mutex->owner = nwaiter->owner; + /* mutex->count will be fixed by cond wait tail */ + nwaiter->wait = 0; + } else { + /* move the whole list to the mutex waiters */ + nnwaiter = nwaiter; + } /* * We want to transfer all the threads from the condvar's list * to the mutex's list. The TAILQ_* macros don't let us do that * efficiently, so this is direct list surgery. Pay attention! */ + if (nnwaiter != NULL) { + /* 1) attach the first thread to the end of the mutex's list */ + nnwaiter->entry.tqe_prev = mutex->waiters.tqh_last; + *(mutex->waiters.tqh_last) = nnwaiter; - /* 1) attach the first thread to the end of the mutex's list */ - _spinlock(&mutex->lock); - wakeup = mutex->owner == NULL && TAILQ_EMPTY(&mutex->lockers); - thread->waiting.tqe_prev = mutex->lockers.tqh_last; - *(mutex->lockers.tqh_last) = thread; - - /* 2) fix up the end pointer for the mutex's list */ - mutex->lockers.tqh_last = cond->waiters.tqh_last; - - if (wakeup) { - TAILQ_REMOVE(&mutex->lockers, thread, waiting); - mutex->owner = thread; - _spinunlock(&mutex->lock); - __thrwakeup(thread, 1); - } else - _spinunlock(&mutex->lock); + /* 2) fix up the end pointer for the mutex's list */ + mutex->waiters.tqh_last = lwaiterp; + } + _spinunlock(&mutex->lock); - /* 3) reset the condvar's list and mutex pointer */ - TAILQ_INIT(&cond->waiters); - assert(cond->mutex != NULL); - cond->mutex = NULL; - _spinunlock(&cond->lock); + if (owner == NULL) + _wake(&nwaiter->wait, 1); return (0); } Index: lib/libc/thread/rthread_tls.c =================================================================== RCS file: /cvs/src/lib/libc/thread/rthread_tls.c,v diff -u -p -r1.5 rthread_tls.c --- lib/libc/thread/rthread_tls.c 19 Apr 2023 12:30:09 -0000 1.5 +++ lib/libc/thread/rthread_tls.c 2 Oct 2024 21:03:02 -0000 @@ -32,7 +32,7 @@ struct rthread_key { }; static struct rthread_key rkeys[PTHREAD_KEYS_MAX]; -static _atomic_lock_t rkeyslock = _SPINLOCK_UNLOCKED; +static struct __cmtx rkeyslock = __CMTX_INITIALIZER(rkeyslock); int pthread_key_create(pthread_key_t *key, void (*destructor)(void*)) @@ -40,14 +40,14 @@ pthread_key_create(pthread_key_t *key, v static int hint; int i; - _spinlock(&rkeyslock); + __cmtx_enter(&rkeyslock); if (rkeys[hint].used) { for (i = 0; i < PTHREAD_KEYS_MAX; i++) { if (!rkeys[i].used) break; } if (i == PTHREAD_KEYS_MAX) { - _spinunlock(&rkeyslock); + __cmtx_leave(&rkeyslock); return (EAGAIN); } hint = i; @@ -58,7 +58,7 @@ pthread_key_create(pthread_key_t *key, v *key = hint++; if (hint >= PTHREAD_KEYS_MAX) hint = 0; - _spinunlock(&rkeyslock); + __cmtx_leave(&rkeyslock); return (0); } @@ -73,7 +73,7 @@ pthread_key_delete(pthread_key_t key) if (key < 0 || key >= PTHREAD_KEYS_MAX) return (EINVAL); - _spinlock(&rkeyslock); + __cmtx_enter(&rkeyslock); if (!rkeys[key].used) { rv = EINVAL; goto out; @@ -91,7 +91,7 @@ pthread_key_delete(pthread_key_t key) } out: - _spinunlock(&rkeyslock); + __cmtx_leave(&rkeyslock); return (rv); } @@ -165,7 +165,7 @@ _rthread_tls_destructors(pthread_t threa struct rthread_storage *rs; int i; - _spinlock(&rkeyslock); + __cmtx_enter(&rkeyslock); for (i = 0; i < PTHREAD_DESTRUCTOR_ITERATIONS; i++) { for (rs = thread->local_storage; rs; rs = rs->next) { if (!rs->data) @@ -175,9 +175,9 @@ _rthread_tls_destructors(pthread_t threa rkeys[rs->keyid].destructor; void *data = rs->data; rs->data = NULL; - _spinunlock(&rkeyslock); + __cmtx_leave(&rkeyslock); destructor(data); - _spinlock(&rkeyslock); + __cmtx_enter(&rkeyslock); } } } @@ -185,5 +185,5 @@ _rthread_tls_destructors(pthread_t threa thread->local_storage = rs->next; free(rs); } - _spinunlock(&rkeyslock); + __cmtx_leave(&rkeyslock); } Index: lib/librthread/Makefile =================================================================== RCS file: /cvs/src/lib/librthread/Makefile,v diff -u -p -r1.56 Makefile --- lib/librthread/Makefile 6 Feb 2020 03:13:45 -0000 1.56 +++ lib/librthread/Makefile 2 Oct 2024 21:03:02 -0000 @@ -31,17 +31,21 @@ SRCS= rthread.c \ rthread_sched.c \ rthread_stack.c \ rthread_spin_lock.c \ + rthread_sem_compat.c \ + rthread_rwlock_compat.c \ sched_prio.c +.if 0 # Architectures without atomics .if ${MACHINE_ARCH} == "hppa" || ${MACHINE_ARCH} == "m88k" || \ ${MACHINE_ARCH} == "sh" SRCS+= rthread_sem_compat.c \ - rthread_rwlock_compat.c + #rthread_rwlock_compat.c .else CFLAGS+= -DFUTEX SRCS+= rthread_sem.c \ - rthread_rwlock.c + #rthread_rwlock.c +.endif .endif SRCDIR= ${.CURDIR}/../libpthread Index: lib/librthread/rthread.c =================================================================== RCS file: /cvs/src/lib/librthread/rthread.c,v diff -u -p -r1.100 rthread.c --- lib/librthread/rthread.c 27 Dec 2022 17:10:07 -0000 1.100 +++ lib/librthread/rthread.c 2 Oct 2024 21:03:02 -0000 @@ -566,46 +566,15 @@ _thread_dump_info(void) void _rthread_dl_lock(int what) { - static _atomic_lock_t lock = _SPINLOCK_UNLOCKED; - static pthread_t owner = NULL; - static struct pthread_queue lockers = TAILQ_HEAD_INITIALIZER(lockers); - static int count = 0; + static struct __rcmtx lock = __RCMTX_INITIALIZER(lock); if (what == 0) { - pthread_t self = pthread_self(); - - /* lock, possibly recursive */ - _spinlock(&lock); - if (owner == NULL) { - owner = self; - } else if (owner != self) { - TAILQ_INSERT_TAIL(&lockers, self, waiting); - while (owner != self) { - __thrsleep(self, 0, NULL, &lock, NULL); - _spinlock(&lock); - } - } - count++; - _spinunlock(&lock); + __rcmtx_enter(&lock); } else if (what == 1) { - /* unlock, possibly recursive */ - if (--count == 0) { - pthread_t next; - - _spinlock(&lock); - owner = next = TAILQ_FIRST(&lockers); - if (next != NULL) - TAILQ_REMOVE(&lockers, next, waiting); - _spinunlock(&lock); - if (next != NULL) - __thrwakeup(next, 1); - } + __rcmtx_leave(&lock); } else { /* reinit: used in child after fork to clear the queue */ - lock = _SPINLOCK_UNLOCKED; - if (--count == 0) - owner = NULL; - TAILQ_INIT(&lockers); + __rcmtx_init(&lock); } } #endif Index: lib/librthread/rthread_rwlock.c =================================================================== RCS file: /cvs/src/lib/librthread/rthread_rwlock.c,v diff -u -p -r1.13 rthread_rwlock.c --- lib/librthread/rthread_rwlock.c 3 Mar 2019 18:39:10 -0000 1.13 +++ lib/librthread/rthread_rwlock.c 2 Oct 2024 21:03:02 -0000 @@ -25,18 +25,15 @@ #include "rthread.h" #include "synch.h" +struct pthread_rwlock { + volatile unsigned int value; +}; + #define UNLOCKED 0 #define MAXREADER 0x7ffffffe #define WRITER 0x7fffffff #define WAITING 0x80000000 #define COUNT(v) ((v) & WRITER) - -#define SPIN_COUNT 128 -#if defined(__i386__) || defined(__amd64__) -#define SPIN_WAIT() asm volatile("pause": : : "memory") -#else -#define SPIN_WAIT() do { } while (0) -#endif static _atomic_lock_t rwlock_init_lock = _SPINLOCK_UNLOCKED; Index: lib/librthread/rthread_rwlock_compat.c =================================================================== RCS file: /cvs/src/lib/librthread/rthread_rwlock_compat.c,v diff -u -p -r1.2 rthread_rwlock_compat.c --- lib/librthread/rthread_rwlock_compat.c 14 May 2022 14:52:20 -0000 1.2 +++ lib/librthread/rthread_rwlock_compat.c 2 Oct 2024 21:03:02 -0000 @@ -28,8 +28,23 @@ #include #include "rthread.h" +#include "synch.h" -static _atomic_lock_t rwlock_init_lock = _SPINLOCK_UNLOCKED; +struct pthread_rwlock { + _atomic_lock_t lock; + uint32_t state; + pthread_t owner; + struct pthread_waiters writers; + int readers; +}; + +static struct __cmtx rwlock_init_lock = __CMTX_INITIALIZER(rwlock_init_lock); + +enum pthread_rwlock_states { + pthread_rwlock_unlocked = 0, + pthread_rwlock_read = 1, /* the same as waiter.wait */ + pthread_rwlock_write, +}; int pthread_rwlock_init(pthread_rwlock_t *lockp, @@ -57,7 +72,8 @@ pthread_rwlock_destroy(pthread_rwlock_t assert(lockp); lock = *lockp; if (lock) { - if (lock->readers || !TAILQ_EMPTY(&lock->writers)) { + if (lock->state != pthread_rwlock_unlocked || + lock->readers || !TAILQ_EMPTY(&lock->writers)) { #define MSG "pthread_rwlock_destroy on rwlock with waiters!\n" write(2, MSG, sizeof(MSG) - 1); #undef MSG @@ -79,12 +95,11 @@ _rthread_rwlock_ensure_init(pthread_rwlo * If the rwlock is statically initialized, perform the dynamic * initialization. */ - if (*lockp == NULL) - { - _spinlock(&rwlock_init_lock); + if (*lockp == NULL) { + __cmtx_enter(&rwlock_init_lock); if (*lockp == NULL) ret = pthread_rwlock_init(lockp, NULL); - _spinunlock(&rwlock_init_lock); + __cmtx_leave(&rwlock_init_lock); } return (ret); } @@ -95,35 +110,60 @@ _rthread_rwlock_rdlock(pthread_rwlock_t int try) { pthread_rwlock_t lock; - pthread_t thread = pthread_self(); - int error; + pthread_t self = pthread_self(); + pthread_t owner = NULL; + uint32_t state; + int error = 0; if ((error = _rthread_rwlock_ensure_init(lockp))) return (error); lock = *lockp; - _rthread_debug(5, "%p: rwlock_rdlock %p\n", (void *)thread, - (void *)lock); + _rthread_debug(5, "%p: %s %p\n", self, __func__, lock); _spinlock(&lock->lock); - - /* writers have precedence */ - if (lock->owner == NULL && TAILQ_EMPTY(&lock->writers)) - lock->readers++; - else if (try) - error = EBUSY; - else if (lock->owner == thread) - error = EDEADLK; - else { - do { - if (__thrsleep(lock, CLOCK_REALTIME, abstime, - &lock->lock, NULL) == EWOULDBLOCK) - return (ETIMEDOUT); - _spinlock(&lock->lock); - } while (lock->owner != NULL || !TAILQ_EMPTY(&lock->writers)); - lock->readers++; + state = lock->state; + switch (state) { + case pthread_rwlock_unlocked: + lock->state = state = pthread_rwlock_read; + break; + case pthread_rwlock_write: + owner = lock->owner; + assert(owner != NULL); + if (try) { + error = EBUSY; + goto err; + } + if (owner == self) { + error = EDEADLK; + goto err; + } + break; } + lock->readers++; _spinunlock(&lock->lock); + while (state != pthread_rwlock_read) { + error = _twait(&lock->state, state, CLOCK_REALTIME, abstime); + if (error == ETIMEDOUT) { + assert(abstime != NULL); + _spinlock(&lock->lock); + if (lock->state != pthread_rwlock_read) { + assert(lock->readers > 0); + lock->readers--; + } else + error = 0; + _spinunlock(&lock->lock); + return error; + } + + state = lock->state; + } + + membar_enter(); + return (0); + +err: + _spinunlock(&lock->lock); return (error); } @@ -154,42 +194,69 @@ _rthread_rwlock_wrlock(pthread_rwlock_t int try) { pthread_rwlock_t lock; - pthread_t thread = pthread_self(); - int error; + pthread_t self = pthread_self(); + pthread_t owner; + struct pthread_waiter waiter = { .owner = self, .wait = 1 }; + uint32_t state; + int error = 0; + + assert(self != NULL); if ((error = _rthread_rwlock_ensure_init(lockp))) return (error); lock = *lockp; - _rthread_debug(5, "%p: rwlock_timedwrlock %p\n", (void *)thread, - (void *)lock); + _rthread_debug(5, "%p: %s %p\n", self, __func__, lock); + _spinlock(&lock->lock); - if (lock->readers == 0 && lock->owner == NULL) - lock->owner = thread; - else if (try) - error = EBUSY; - else if (lock->owner == thread) - error = EDEADLK; - else { - int do_wait; - - /* gotta block */ - TAILQ_INSERT_TAIL(&lock->writers, thread, waiting); - do { - do_wait = __thrsleep(thread, CLOCK_REALTIME, abstime, - &lock->lock, NULL) != EWOULDBLOCK; - _spinlock(&lock->lock); - } while (lock->owner != thread && do_wait); + state = lock->state; + if (state == pthread_rwlock_unlocked) { + lock->state = pthread_rwlock_write; + lock->owner = self; + } else { + owner = lock->owner; /* reader is NULL, writer is pthread */ - if (lock->owner != thread) { - /* timed out, sigh */ - TAILQ_REMOVE(&lock->writers, thread, waiting); - error = ETIMEDOUT; + if (try) { + error = EBUSY; + goto err; + } + if (owner == self) { + error = EDEADLK; + goto err; } + + TAILQ_INSERT_TAIL(&lock->writers, &waiter, entry); } _spinunlock(&lock->lock); + if (state == pthread_rwlock_unlocked) + return (0); + if (error != 0) + return (error); + + do { + error = _twait(&waiter.wait, 1, CLOCK_REALTIME, abstime); + if (error == ETIMEDOUT) { + assert(abstime != NULL); + _spinlock(&lock->lock); + if (waiter.wait) + TAILQ_REMOVE(&lock->writers, &waiter, entry); + else { + assert(lock->state == pthread_rwlock_write); + assert(lock->owner == self); + error = 0; + } + _spinunlock(&lock->lock); + return error; + } + } while (waiter.wait); + + membar_enter(); + return (0); + +err: + _spinunlock(&lock->lock); return (error); } @@ -214,45 +281,67 @@ pthread_rwlock_timedwrlock(pthread_rwloc return (_rthread_rwlock_wrlock(lockp, abstime, 0)); } - int pthread_rwlock_unlock(pthread_rwlock_t *lockp) { pthread_rwlock_t lock; - pthread_t thread = pthread_self(); - pthread_t next; - int was_writer; + pthread_t self = pthread_self(); + struct pthread_waiter *nwaiter; + volatile uint32_t *waitp = NULL; + uint32_t nwake; lock = *lockp; - _rthread_debug(5, "%p: rwlock_unlock %p\n", (void *)thread, - (void *)lock); + _rthread_debug(5, "%p: %s %p\n", self, __func__, lock); + _spinlock(&lock->lock); - if (lock->owner != NULL) { - assert(lock->owner == thread); - was_writer = 1; - } else { + switch (lock->state) { + case pthread_rwlock_write: + assert(lock->owner == self); + break; + case pthread_rwlock_read: + assert(lock->owner == NULL); assert(lock->readers > 0); - lock->readers--; - if (lock->readers > 0) - goto out; - was_writer = 0; + if (--lock->readers > 0) { + _spinunlock(&lock->lock); + return (0); + } + break; + default: + assert(lock->state == pthread_rwlock_write || + lock->state == pthread_rwlock_read); } - lock->owner = next = TAILQ_FIRST(&lock->writers); - if (next != NULL) { - /* dequeue and wake first writer */ - TAILQ_REMOVE(&lock->writers, next, waiting); - _spinunlock(&lock->lock); - __thrwakeup(next, 1); - return (0); - } + nwaiter = TAILQ_FIRST(&lock->writers); + if (nwaiter != NULL) { + /* can move from read or write -> write */ + TAILQ_REMOVE(&lock->writers, nwaiter, entry); + assert(nwaiter->owner != NULL); + + /* move ownership */ + lock->state = pthread_rwlock_write; + lock->owner = nwaiter->owner; + + nwake = 1; + waitp = &nwaiter->wait; + nwaiter->wait = 0; + } else { + lock->owner = NULL; - /* could there have been blocked readers? wake them all */ - if (was_writer) - __thrwakeup(lock, 0); -out: + nwake = lock->readers; + if (nwake) { + /* can only move from write -> read */ + assert(lock->state == pthread_rwlock_write); + + waitp = &lock->state; + lock->state = pthread_rwlock_read; + } else + lock->state = pthread_rwlock_unlocked; + } _spinunlock(&lock->lock); + + if (waitp != NULL) + _wake(waitp, nwake); return (0); } Index: lib/librthread/rthread_sem_compat.c =================================================================== RCS file: /cvs/src/lib/librthread/rthread_sem_compat.c,v diff -u -p -r1.2 rthread_sem_compat.c --- lib/librthread/rthread_sem_compat.c 14 May 2022 14:52:20 -0000 1.2 +++ lib/librthread/rthread_sem_compat.c 2 Oct 2024 21:03:02 -0000 @@ -34,8 +34,7 @@ #include "rthread.h" #include "cancel.h" /* in libc/include */ - -#define SHARED_IDENT ((void *)-1) +#include "synch.h" /* SHA256_DIGEST_STRING_LENGTH includes nul */ /* "/tmp/" + sha256 + ".sem" */ @@ -57,53 +56,62 @@ int _sem_wait(sem_t sem, int can_eintr, const struct timespec *abstime, int *delayed_cancel) { - void *ident = (void *)&sem->waitcount; - int r; - - if (sem->shared) - ident = SHARED_IDENT; + int value; + int error; _spinlock(&sem->lock); - if (sem->value) { - sem->value--; - r = 0; - } else { + value = sem->value; + if (value > 0) + sem->value = value - 1; + else sem->waitcount++; - do { - r = __thrsleep(ident, CLOCK_REALTIME, abstime, - &sem->lock, delayed_cancel); - _spinlock(&sem->lock); - /* ignore interruptions other than cancelation */ - if ((r == ECANCELED && *delayed_cancel == 0) || - (r == EINTR && !can_eintr)) - r = 0; - } while (r == 0 && sem->value == 0); - sem->waitcount--; - if (r == 0) - sem->value--; + _spinunlock(&sem->lock); + + if (value > 0) + return (0); + + for (;;) { + error = _twaitf(&sem->value, 0, sem->shared, + CLOCK_REALTIME, abstime); + if ((error == ECANCELED && *delayed_cancel == 0) || + (error == EINTR && !can_eintr) || error == EAGAIN) + error = 0; + + _spinlock(&sem->lock); + value = sem->value; + if (value > 0) { + sem->value = value - 1; + error = 0; + break; + } + if (error != 0) + break; + _spinunlock(&sem->lock); } + + sem->waitcount--; _spinunlock(&sem->lock); - return (r); + + return (error); } /* always increment count */ int _sem_post(sem_t sem) { - void *ident = (void *)&sem->waitcount; - int rv = 0; - - if (sem->shared) - ident = SHARED_IDENT; + int wake; _spinlock(&sem->lock); sem->value++; - if (sem->waitcount) { - __thrwakeup(ident, 1); - rv = 1; - } + wake = sem->waitcount; _spinunlock(&sem->lock); - return (rv); + + if (wake > 0) { + _wakef(&sem->value, 1, sem->shared); + return (1); + } + + return (0); } /* @@ -161,6 +169,7 @@ sem_init(sem_t *semp, int pshared, unsig } sem->lock = _SPINLOCK_UNLOCKED; sem->value = value; + sem->shared = pshared ? 0 : FUTEX_PRIVATE_FLAG; *semp = sem; return (0); @@ -188,7 +197,7 @@ sem_destroy(sem_t *semp) } *semp = NULL; - if (sem->shared) + if (sem->shared != FUTEX_PRIVATE_FLAG) munmap(sem, SEM_MMAP_SIZE); else free(sem); @@ -234,7 +243,7 @@ sem_wait(sem_t *semp) struct tib *tib = TIB_GET(); pthread_t self; sem_t sem; - int r; + int error; PREP_CANCEL_POINT(tib); if (!_threads_ready) @@ -247,11 +256,11 @@ sem_wait(sem_t *semp) } ENTER_DELAYED_CANCEL_POINT(tib, self); - r = _sem_wait(sem, 1, NULL, &self->delayed_cancel); - LEAVE_CANCEL_POINT_INNER(tib, r); + error = _sem_wait(sem, 1, NULL, &self->delayed_cancel); + LEAVE_CANCEL_POINT_INNER(tib, error); - if (r) { - errno = r; + if (error) { + errno = error; return (-1); } @@ -292,7 +301,7 @@ int sem_trywait(sem_t *semp) { sem_t sem; - int r; + int value; if (!semp || !(sem = *semp)) { errno = EINVAL; @@ -300,19 +309,17 @@ sem_trywait(sem_t *semp) } _spinlock(&sem->lock); - if (sem->value) { - sem->value--; - r = 0; - } else - r = EAGAIN; + value = sem->value; + if (value > 0) + sem->value = value - 1; _spinunlock(&sem->lock); - if (r) { - errno = r; - return (-1); - } + if (value > 0) + return (0); - return (0); + errno = EAGAIN; + _rthread_debug(1, "%s: v=%d errno=%d\n", __func__, value, errno); + return (-1); } @@ -405,7 +412,7 @@ sem_open(const char *name, int oflag, .. if (created) { sem->lock = _SPINLOCK_UNLOCKED; sem->value = value; - sem->shared = 1; + sem->shared = 0; } *semp = sem; @@ -417,7 +424,7 @@ sem_close(sem_t *semp) { sem_t sem; - if (!semp || !(sem = *semp) || !sem->shared) { + if (!semp || !(sem = *semp) || sem->shared != 0) { errno = EINVAL; return (-1); } Index: lib/librthread/synch.h =================================================================== RCS file: /cvs/src/lib/librthread/synch.h,v diff -u -p -r1.10 synch.h --- lib/librthread/synch.h 7 Jan 2024 19:44:28 -0000 1.10 +++ lib/librthread/synch.h 2 Oct 2024 21:03:02 -0000 @@ -1,4 +1,4 @@ -/* $OpenBSD: synch.h,v 1.10 2024/01/07 19:44:28 cheloha Exp $ */ +/* $OpenBSD: synch.h,v 1.9 2024/01/07 19:44:28 cheloha Exp $ */ /* * Copyright (c) 2017 Martin Pieuchot * @@ -20,20 +20,27 @@ #include static inline int +_wakef(volatile uint32_t *p, int n, int flag) +{ + return futex(p, FUTEX_WAKE | flag, n, NULL, NULL); +} + +static inline int _wake(volatile uint32_t *p, int n) { - return futex(p, FUTEX_WAKE, n, NULL, NULL); + return _wakef(p, n, FUTEX_PRIVATE_FLAG); } static inline int -_twait(volatile uint32_t *p, int val, clockid_t clockid, const struct timespec *abs) +_twaitf(volatile uint32_t *p, int val, int flag, + clockid_t clockid, const struct timespec *abs) { struct timespec now, rel; int saved_errno = errno; int error; if (abs == NULL) { - error = futex(p, FUTEX_WAIT, val, NULL, NULL); + error = futex(p, FUTEX_WAIT | flag, val, NULL, NULL); if (error == -1) { error = errno; errno = saved_errno; @@ -48,7 +55,7 @@ _twait(volatile uint32_t *p, int val, cl return ETIMEDOUT; timespecsub(abs, &now, &rel); - error = futex(p, FUTEX_WAIT, val, &rel, NULL); + error = futex(p, FUTEX_WAIT | flag, val, &rel, NULL); if (error == -1) { error = errno; errno = saved_errno; @@ -57,7 +64,14 @@ _twait(volatile uint32_t *p, int val, cl } static inline int +_twait(volatile uint32_t *p, int val, + clockid_t clockid, const struct timespec *abstime) +{ + return _twaitf(p, val, FUTEX_PRIVATE_FLAG, clockid, abstime); +} + +static inline int _requeue(volatile uint32_t *p, int n, int m, volatile uint32_t *q) { - return futex(p, FUTEX_REQUEUE, n, (void *)(long)m, q); + return futex(p, FUTEX_REQUEUE_PRIVATE, n, (void *)(long)m, q); } Index: regress/lib/libpthread/pthread_mutex/pthread_mutex.c =================================================================== RCS file: /cvs/src/regress/lib/libpthread/pthread_mutex/pthread_mutex.c,v diff -u -p -r1.10 pthread_mutex.c --- regress/lib/libpthread/pthread_mutex/pthread_mutex.c 24 Mar 2012 21:39:10 -0000 1.10 +++ regress/lib/libpthread/pthread_mutex/pthread_mutex.c 2 Oct 2024 21:03:02 -0000 @@ -216,14 +216,31 @@ test_mutex_recursive(void) static void * thread_deadlock(void *arg) { - pthread_mutex_t *mutex = arg;; + pthread_mutex_t *mutex = arg; /* intentionally deadlock this thread */ CHECKr(pthread_mutex_lock(mutex)); - CHECKr(pthread_mutex_lock(mutex)); - /* never reached */ - abort(); + /* + * POSIX says pthread_mutex_trylock should fail immediately + * if the mutex is currently locked, even if it's locked by + * the current thread. + */ + ASSERTe(pthread_mutex_trylock(mutex), != 0); + + /* + * At least firefox relies on pthread_mutex_trylock returning + * EBUSY if the owning thread tries to lock again. + */ + ASSERTe(pthread_mutex_trylock(mutex), == EBUSY); + + /* + * POSIX says a thread that relocks a mutex will deadlock, but + * it may fail with EDEADLK in that situation. + */ + ASSERTe(pthread_mutex_lock(mutex), == EDEADLK); + + return (NULL); } static void @@ -248,13 +265,15 @@ test_mutex_normal(void) CHECKr(pthread_mutex_lock(&mutex_normal)); CHECKe(clock_gettime(CLOCK_REALTIME, &ts)); ts.tv_sec += 2; - ASSERTe(pthread_mutex_timedlock(&mutex_normal, &ts), == ETIMEDOUT); + ASSERTe(pthread_mutex_timedlock(&mutex_normal, &ts), != 0); CHECKr(pthread_mutex_unlock(&mutex_normal)); + /* verify that it can still be locked and unlocked */ CHECKr(pthread_mutex_lock(&mutex_normal)); CHECKr(pthread_mutex_unlock(&mutex_normal)); + CHECKr(pthread_create(&thread, NULL, thread_deadlock, &mutex_normal)); - sleep(1); + CHECKr(pthread_join(thread, NULL)); } int Index: sys/kern/exec_elf.c =================================================================== RCS file: /cvs/src/sys/kern/exec_elf.c,v diff -u -p -r1.191 exec_elf.c --- sys/kern/exec_elf.c 15 Sep 2024 23:13:19 -0000 1.191 +++ sys/kern/exec_elf.c 2 Oct 2024 21:03:02 -0000 @@ -1023,6 +1023,10 @@ exec_elf_fixup(struct proc *p, struct ex a->au_v = p->p_p->ps_timekeep; a++; + a->au_id = AUX_openbsd_ncpus; + a->au_v = ncpus; + a++; + a->au_id = AUX_null; a->au_v = 0; a++; Index: sys/kern/sys_futex.c =================================================================== RCS file: /cvs/src/sys/kern/sys_futex.c,v diff -u -p -r1.22 sys_futex.c --- sys/kern/sys_futex.c 14 Aug 2023 07:42:34 -0000 1.22 +++ sys/kern/sys_futex.c 2 Oct 2024 21:03:02 -0000 @@ -36,41 +36,56 @@ * Kernel representation of a futex. */ struct futex { - LIST_ENTRY(futex) ft_list; /* list of all futexes */ - TAILQ_HEAD(, proc) ft_threads; /* sleeping queue */ + TAILQ_ENTRY(futex) ft_entry; /* list of all futexes */ + struct process *ft_ps; struct uvm_object *ft_obj; /* UVM object */ struct vm_amap *ft_amap; /* UVM amap */ voff_t ft_off; /* UVM offset */ - unsigned int ft_refcnt; /* # of references */ + + volatile unsigned int ft_wait; }; -/* Syscall helpers. */ -int futex_wait(uint32_t *, uint32_t, const struct timespec *, int); -int futex_wake(uint32_t *, uint32_t, int); -int futex_requeue(uint32_t *, uint32_t, uint32_t *, uint32_t, int); - -/* Flags for futex_get(). */ -#define FT_CREATE 0x1 /* Create a futex if it doesn't exist. */ -#define FT_PRIVATE 0x2 /* Futex is process-private. */ +TAILQ_HEAD(futexen, futex); -struct futex *futex_get(uint32_t *, int); -void futex_put(struct futex *); +struct futex_bucket { + struct futexen fb_list; + struct rwlock fb_lock; + uint32_t fb_id; /* for lock ordering */ +} __aligned(64); -/* - * The global futex lock serializes futex(2) calls so that no wakeup - * event is lost, and protects all futex lists and futex states. - */ -struct rwlock ftlock = RWLOCK_INITIALIZER("futex"); -static struct futex_list ftlist_shared = - LIST_HEAD_INITIALIZER(ftlist_shared); -struct pool ftpool; +/* Syscall helpers. */ +static int futex_wait(struct proc *, uint32_t *, uint32_t, + const struct timespec *, int); +static int futex_wake(struct proc *, uint32_t *, uint32_t, int, + register_t *); +static int futex_requeue(struct proc *, uint32_t *, uint32_t, + uint32_t *, uint32_t, int, register_t *); + +/* Flags for futex_get(). kernel private flags sit in FUTEX_OP_MASK space */ +#define FT_PRIVATE FUTEX_PRIVATE_FLAG /* Futex is process-private. */ + +#define FUTEX_BUCKET_BITS 6 +#define FUTEX_BUCKET_SIZE (1U << FUTEX_BUCKET_BITS) +#define FUTEX_BUCKET_MASK (FUTEX_BUCKET_SIZE - 1) +static struct futex_bucket futex_hash[FUTEX_BUCKET_SIZE]; void futex_init(void) { - pool_init(&ftpool, sizeof(struct futex), 0, IPL_NONE, - PR_WAITOK | PR_RWLOCK, "futexpl", NULL); + struct futex_bucket *fb; + unsigned int i; + + for (i = 0; i < nitems(futex_hash); i++) { + fb = &futex_hash[i]; + + TAILQ_INIT(&fb->fb_list); + rw_init(&fb->fb_lock, "futexlk"); + + fb->fb_id = arc4random(); + fb->fb_id &= ~FUTEX_BUCKET_MASK; + fb->fb_id |= i; + } } int @@ -88,65 +103,51 @@ sys_futex(struct proc *p, void *v, regis uint32_t val = SCARG(uap, val); const struct timespec *timeout = SCARG(uap, timeout); void *g = SCARG(uap, g); - int flags = 0; + int flags = op & FUTEX_FLAG_MASK; int error = 0; - if (op & FUTEX_PRIVATE_FLAG) - flags |= FT_PRIVATE; - - rw_enter_write(&ftlock); - switch (op) { + switch (op & FUTEX_OP_MASK) { case FUTEX_WAIT: - case FUTEX_WAIT_PRIVATE: - error = futex_wait(uaddr, val, timeout, flags); + error = futex_wait(p, uaddr, val, timeout, flags); break; case FUTEX_WAKE: - case FUTEX_WAKE_PRIVATE: - *retval = futex_wake(uaddr, val, flags); + error = futex_wake(p, uaddr, val, flags, retval); break; case FUTEX_REQUEUE: - case FUTEX_REQUEUE_PRIVATE: - *retval = futex_requeue(uaddr, val, g, (u_long)timeout, flags); + error = futex_requeue(p, uaddr, val, g, + (u_long)timeout, flags, retval); break; default: error = ENOSYS; break; } - rw_exit_write(&ftlock); return error; } -/* - * Return an existing futex matching userspace address ``uaddr''. - * - * If such futex does not exist and FT_CREATE is given, create it. - */ -struct futex * -futex_get(uint32_t *uaddr, int flags) +static void +futex_addrs(struct proc *p, struct futex *f, uint32_t *uaddr, int flags) { - struct proc *p = curproc; vm_map_t map = &p->p_vmspace->vm_map; vm_map_entry_t entry; struct uvm_object *obj = NULL; struct vm_amap *amap = NULL; voff_t off = (vaddr_t)uaddr; - struct futex *f; - struct futex_list *ftlist = &p->p_p->ps_ftlist; + struct process *ps; - rw_assert_wrlock(&ftlock); + if (ISSET(flags, FT_PRIVATE)) + ps = p->p_p; + else { + ps = NULL; - if (!(flags & FT_PRIVATE)) { vm_map_lock_read(map); if (uvm_map_lookup_entry(map, (vaddr_t)uaddr, &entry) && entry->inheritance == MAP_INHERIT_SHARE) { if (UVM_ET_ISOBJ(entry)) { - ftlist = &ftlist_shared; obj = entry->object.uvm_obj; off = entry->offset + ((vaddr_t)uaddr - entry->start); } else if (entry->aref.ar_amap) { - ftlist = &ftlist_shared; amap = entry->aref.ar_amap; off = ptoa(entry->aref.ar_pageoff) + ((vaddr_t)uaddr - entry->start); @@ -155,47 +156,18 @@ futex_get(uint32_t *uaddr, int flags) vm_map_unlock_read(map); } - LIST_FOREACH(f, ftlist, ft_list) { - if (f->ft_obj == obj && f->ft_amap == amap && - f->ft_off == off) { - f->ft_refcnt++; - break; - } - } - - if ((f == NULL) && (flags & FT_CREATE)) { - /* - * We rely on the rwlock to ensure that no other thread - * create the same futex. - */ - f = pool_get(&ftpool, PR_WAITOK); - TAILQ_INIT(&f->ft_threads); - f->ft_obj = obj; - f->ft_amap = amap; - f->ft_off = off; - f->ft_refcnt = 1; - LIST_INSERT_HEAD(ftlist, f, ft_list); - } - - return f; + f->ft_ps = ps; + f->ft_obj = obj; + f->ft_amap = amap; + f->ft_off = off; } -/* - * Release a given futex. - */ -void -futex_put(struct futex *f) +static inline struct futex_bucket * +futex_get_bucket(struct futex *f) { - rw_assert_wrlock(&ftlock); + uint32_t key = f->ft_off >> 3; /* watevs */ - KASSERT(f->ft_refcnt > 0); - - --f->ft_refcnt; - if (f->ft_refcnt == 0) { - KASSERT(TAILQ_EMPTY(&f->ft_threads)); - LIST_REMOVE(f, ft_list); - pool_put(&ftpool, f); - } + return (&futex_hash[key & FUTEX_BUCKET_MASK]); } /* @@ -203,69 +175,79 @@ futex_put(struct futex *f) * ``uaddr''. Let it sleep for the specified ``timeout'' time, or * indefinitely if the argument is NULL. */ -int -futex_wait(uint32_t *uaddr, uint32_t val, const struct timespec *timeout, - int flags) +static int +futex_wait(struct proc *p, uint32_t *uaddr, uint32_t val, + const struct timespec *timeout, int flags) { - struct proc *p = curproc; - struct futex *f; + struct futex f; + struct futex_bucket *fb; uint64_t nsecs = INFSLP; uint32_t cval; int error; - /* - * After reading the value a race is still possible but - * we deal with it by serializing all futex syscalls. - */ - rw_assert_wrlock(&ftlock); - - /* - * Read user space futex value - */ - if ((error = copyin32(uaddr, &cval))) - return error; - - /* If the value changed, stop here. */ - if (cval != val) - return EAGAIN; - if (timeout != NULL) { struct timespec ts; - if ((error = copyin(timeout, &ts, sizeof(ts)))) - return error; + error = copyin(timeout, &ts, sizeof(ts)); + if (error != 0) + return (error); #ifdef KTRACE if (KTRPOINT(p, KTR_STRUCT)) ktrreltimespec(p, &ts); #endif if (ts.tv_sec < 0 || !timespecisvalid(&ts)) - return EINVAL; + return (EINVAL); + nsecs = MAX(1, MIN(TIMESPEC_TO_NSEC(&ts), MAXTSLP)); } - f = futex_get(uaddr, flags | FT_CREATE); - TAILQ_INSERT_TAIL(&f->ft_threads, p, p_fut_link); - p->p_futex = f; - - error = rwsleep_nsec(p, &ftlock, PWAIT|PCATCH, "fsleep", nsecs); - if (error == ERESTART) - error = ECANCELED; - else if (error == EWOULDBLOCK) { - /* A race occurred between a wakeup and a timeout. */ - if (p->p_futex == NULL) - error = 0; - else - error = ETIMEDOUT; - } + futex_addrs(p, &f, uaddr, flags); + fb = futex_get_bucket(&f); + + /* + * After reading the value a race is still possible but + * we deal with it by serializing futex syscalls. + */ + error = rw_enter(&fb->fb_lock, RW_WRITE|RW_INTR); + if (error != 0) + return (error); - /* Remove ourself if we haven't been awaken. */ - if ((f = p->p_futex) != NULL) { - p->p_futex = NULL; - TAILQ_REMOVE(&f->ft_threads, p, p_fut_link); - futex_put(f); + /* + * Read user space futex value + */ + error = copyin32(uaddr, &cval); + if (error != 0) + goto exit; + + /* If the value changed, stop here. */ + if (cval != val) { + error = EAGAIN; + goto exit; } - return error; + TAILQ_INSERT_TAIL(&fb->fb_list, &f, ft_entry); + f.ft_wait = 1; + do { + error = rwsleep_nsec(&f, &fb->fb_lock, + PWAIT|PCATCH, "fsleep", nsecs); + if (error != 0) { + switch (error) { + case ERESTART: + error = ECANCELED; + break; + case EWOULDBLOCK: + error = f.ft_wait ? ETIMEDOUT : 0; + break; + } + + break; + } + } while (f.ft_wait); + TAILQ_REMOVE(&fb->fb_list, &f, ft_entry); + +exit: + rw_exit_write(&fb->fb_lock); + return (error); } /* @@ -273,46 +255,131 @@ futex_wait(uint32_t *uaddr, uint32_t val * ``uaddr'' and requeue at most ``m'' sibling threads on a futex at * address ``uaddr2''. */ -int -futex_requeue(uint32_t *uaddr, uint32_t n, uint32_t *uaddr2, uint32_t m, - int flags) +static int +futex_requeue(struct proc *p, uint32_t *uaddr, uint32_t n, + uint32_t *uaddr2, uint32_t m, int flags, register_t *retval) { - struct futex *f, *g; - struct proc *p; + struct futex okey, nkey; + struct futex *f, *nf = NULL; + struct futex_bucket *ofb, *nfb; uint32_t count = 0; - rw_assert_wrlock(&ftlock); + if (m > INT_MAX) + return (EINVAL); + if (m == 0) + return (futex_wake(p, uaddr, n, flags, retval)); + + futex_addrs(p, &okey, uaddr, flags); + ofb = futex_get_bucket(&okey); + futex_addrs(p, &nkey, uaddr2, flags); + nfb = futex_get_bucket(&nkey); + + if (ofb->fb_id < nfb->fb_id) { + rw_enter_write(&ofb->fb_lock); + rw_enter_write(&nfb->fb_lock); + } else if (ofb->fb_id > nfb->fb_id) { + rw_enter_write(&nfb->fb_lock); + rw_enter_write(&ofb->fb_lock); + } else + rw_enter_write(&ofb->fb_lock); + + TAILQ_FOREACH(f, &ofb->fb_list, ft_entry) { + if (f->ft_off != okey.ft_off || + f->ft_ps != okey.ft_ps || + f->ft_obj != okey.ft_obj || + f->ft_amap != okey.ft_amap) + continue; - f = futex_get(uaddr, flags); - if (f == NULL) - return 0; - - while ((p = TAILQ_FIRST(&f->ft_threads)) != NULL && (count < (n + m))) { - p->p_futex = NULL; - TAILQ_REMOVE(&f->ft_threads, p, p_fut_link); - futex_put(f); - - if (count < n) { - wakeup_one(p); - } else if (uaddr2 != NULL) { - g = futex_get(uaddr2, FT_CREATE); - TAILQ_INSERT_TAIL(&g->ft_threads, p, p_fut_link); - p->p_futex = g; + f->ft_wait = 0; + wakeup_one(f); + + if (++count == n) { + nf = TAILQ_NEXT(f, ft_entry); + break; } - count++; } - futex_put(f); + /* move matching futexes to the new bucket */ + while (nf != NULL) { + f = nf; + nf = TAILQ_NEXT(f, ft_entry); + + if (f->ft_off != okey.ft_off || + f->ft_ps != okey.ft_ps || + f->ft_obj != okey.ft_obj || + f->ft_amap != okey.ft_amap) + continue; + + TAILQ_REMOVE(&ofb->fb_list, f, ft_entry); + /* it should only be ft_off that changes, but eh */ + f->ft_ps = nkey.ft_ps; + f->ft_obj = nkey.ft_obj; + f->ft_amap = nkey.ft_amap; + f->ft_off = nkey.ft_off; + TAILQ_INSERT_TAIL(&nfb->fb_list, f, ft_entry); - return count; + if (--m == 0) + break; + } + + if (ofb->fb_id < nfb->fb_id) { + rw_exit_write(&nfb->fb_lock); + rw_exit_write(&ofb->fb_lock); + } else if (ofb->fb_id > nfb->fb_id) { + rw_exit_write(&ofb->fb_lock); + rw_exit_write(&nfb->fb_lock); + } else + rw_exit_write(&ofb->fb_lock); + + *retval = count; + + return (0); } /* * Wakeup at most ``n'' sibling threads sleeping on a futex at address * ``uaddr''. */ -int -futex_wake(uint32_t *uaddr, uint32_t n, int flags) +static int +futex_wake(struct proc *p, uint32_t *uaddr, uint32_t n, int flags, + register_t *retval) { - return futex_requeue(uaddr, n, NULL, 0, flags); + struct futex key; + struct futex *f; + struct futex_bucket *fb; + int count = 0; + int error; + + if (n > INT_MAX) + return (EINVAL); + if (n == 0) { + *retval = 0; + return (0); + } + + futex_addrs(p, &key, uaddr, flags); + fb = futex_get_bucket(&key); + + error = rw_enter(&fb->fb_lock, RW_READ|RW_INTR); + if (error != 0) + return (error); + + TAILQ_FOREACH(f, &fb->fb_list, ft_entry) { + if (f->ft_off != key.ft_off || + f->ft_ps != key.ft_ps || + f->ft_obj != key.ft_obj || + f->ft_amap != key.ft_amap) + continue; + + f->ft_wait = 0; + wakeup_one(f); + + if (++count == n) + break; + } + + rw_exit_read(&fb->fb_lock); + + *retval = count; + return (0); } Index: sys/sys/exec_elf.h =================================================================== RCS file: /cvs/src/sys/sys/exec_elf.h,v diff -u -p -r1.105 exec_elf.h --- sys/sys/exec_elf.h 14 Jul 2024 09:48:49 -0000 1.105 +++ sys/sys/exec_elf.h 2 Oct 2024 21:03:02 -0000 @@ -734,6 +734,7 @@ enum AuxID { AUX_sun_gid = 2002, /* egid */ AUX_sun_rgid = 2003, /* rgid */ AUX_openbsd_timekeep = 4000, /* userland clock_gettime */ + AUX_openbsd_ncpus = 4001, /* ncpus */ }; struct elf_args { @@ -822,7 +823,7 @@ extern Elf_Dyn _DYNAMIC[]; /* * How many entries are in the AuxInfo array we pass to the process? */ -#define ELF_AUX_ENTRIES 11 +#define ELF_AUX_ENTRIES 12 #define ELF_AUX_WORDS (sizeof(AuxInfo) * ELF_AUX_ENTRIES / sizeof(char *)) struct exec_package; Index: sys/sys/futex.h =================================================================== RCS file: /cvs/src/sys/sys/futex.h,v diff -u -p -r1.2 futex.h --- sys/sys/futex.h 3 Jun 2018 15:09:26 -0000 1.2 +++ sys/sys/futex.h 2 Oct 2024 21:03:02 -0000 @@ -28,11 +28,15 @@ int futex(volatile uint32_t *, int, int, __END_DECLS #endif /* ! _KERNEL */ +#define FUTEX_OP_MASK 0x007f + #define FUTEX_WAIT 1 #define FUTEX_WAKE 2 #define FUTEX_REQUEUE 3 -#define FUTEX_PRIVATE_FLAG 128 +#define FUTEX_FLAG_MASK 0xff80 + +#define FUTEX_PRIVATE_FLAG 0x0080 #define FUTEX_WAIT_PRIVATE (FUTEX_WAIT | FUTEX_PRIVATE_FLAG) #define FUTEX_WAKE_PRIVATE (FUTEX_WAKE | FUTEX_PRIVATE_FLAG)