Index: sys/kern/sys_futex.c =================================================================== RCS file: /cvs/src/sys/kern/sys_futex.c,v diff -u -p -r1.22 sys_futex.c --- sys/kern/sys_futex.c 14 Aug 2023 07:42:34 -0000 1.22 +++ sys/kern/sys_futex.c 14 Jul 2024 22:48:01 -0000 @@ -36,41 +36,56 @@ * Kernel representation of a futex. */ struct futex { - LIST_ENTRY(futex) ft_list; /* list of all futexes */ - TAILQ_HEAD(, proc) ft_threads; /* sleeping queue */ + TAILQ_ENTRY(futex) ft_entry; /* list of all futexes */ + struct process *ft_ps; struct uvm_object *ft_obj; /* UVM object */ struct vm_amap *ft_amap; /* UVM amap */ voff_t ft_off; /* UVM offset */ - unsigned int ft_refcnt; /* # of references */ + + volatile unsigned int ft_wait; }; -/* Syscall helpers. */ -int futex_wait(uint32_t *, uint32_t, const struct timespec *, int); -int futex_wake(uint32_t *, uint32_t, int); -int futex_requeue(uint32_t *, uint32_t, uint32_t *, uint32_t, int); - -/* Flags for futex_get(). */ -#define FT_CREATE 0x1 /* Create a futex if it doesn't exist. */ -#define FT_PRIVATE 0x2 /* Futex is process-private. */ +TAILQ_HEAD(futexen, futex); -struct futex *futex_get(uint32_t *, int); -void futex_put(struct futex *); +struct futex_bucket { + struct futexen fb_list; + struct rwlock fb_lock; + uint32_t fb_id; /* for lock ordering */ +} __aligned(64); -/* - * The global futex lock serializes futex(2) calls so that no wakeup - * event is lost, and protects all futex lists and futex states. - */ -struct rwlock ftlock = RWLOCK_INITIALIZER("futex"); -static struct futex_list ftlist_shared = - LIST_HEAD_INITIALIZER(ftlist_shared); -struct pool ftpool; +/* Syscall helpers. */ +static int futex_wait(struct proc *, uint32_t *, uint32_t, + const struct timespec *, int); +static int futex_wake(struct proc *, uint32_t *, uint32_t, int, + register_t *); +static int futex_requeue(struct proc *, uint32_t *, uint32_t, + uint32_t *, uint32_t, int, register_t *); + +/* Flags for futex_get(). kernel private flags sit in FUTEX_OP_MASK space */ +#define FT_PRIVATE FUTEX_PRIVATE_FLAG /* Futex is process-private. */ + +#define FUTEX_BUCKET_BITS 6 +#define FUTEX_BUCKET_SIZE (1U << FUTEX_BUCKET_BITS) +#define FUTEX_BUCKET_MASK (FUTEX_BUCKET_SIZE - 1) +static struct futex_bucket futex_hash[FUTEX_BUCKET_SIZE]; void futex_init(void) { - pool_init(&ftpool, sizeof(struct futex), 0, IPL_NONE, - PR_WAITOK | PR_RWLOCK, "futexpl", NULL); + struct futex_bucket *fb; + unsigned int i; + + for (i = 0; i < nitems(futex_hash); i++) { + fb = &futex_hash[i]; + + TAILQ_INIT(&fb->fb_list); + rw_init(&fb->fb_lock, "futexlk"); + + fb->fb_id = arc4random(); + fb->fb_id &= ~FUTEX_BUCKET_MASK; + fb->fb_id |= i; + } } int @@ -88,65 +103,51 @@ sys_futex(struct proc *p, void *v, regis uint32_t val = SCARG(uap, val); const struct timespec *timeout = SCARG(uap, timeout); void *g = SCARG(uap, g); - int flags = 0; + int flags = op & FUTEX_FLAG_MASK; int error = 0; - if (op & FUTEX_PRIVATE_FLAG) - flags |= FT_PRIVATE; - - rw_enter_write(&ftlock); - switch (op) { + switch (op & FUTEX_OP_MASK) { case FUTEX_WAIT: - case FUTEX_WAIT_PRIVATE: - error = futex_wait(uaddr, val, timeout, flags); + error = futex_wait(p, uaddr, val, timeout, flags); break; case FUTEX_WAKE: - case FUTEX_WAKE_PRIVATE: - *retval = futex_wake(uaddr, val, flags); + error = futex_wake(p, uaddr, val, flags, retval); break; case FUTEX_REQUEUE: - case FUTEX_REQUEUE_PRIVATE: - *retval = futex_requeue(uaddr, val, g, (u_long)timeout, flags); + error = futex_requeue(p, uaddr, val, g, + (u_long)timeout, flags, retval); break; default: error = ENOSYS; break; } - rw_exit_write(&ftlock); return error; } -/* - * Return an existing futex matching userspace address ``uaddr''. - * - * If such futex does not exist and FT_CREATE is given, create it. - */ -struct futex * -futex_get(uint32_t *uaddr, int flags) +static void +futex_addrs(struct proc *p, struct futex *f, uint32_t *uaddr, int flags) { - struct proc *p = curproc; vm_map_t map = &p->p_vmspace->vm_map; vm_map_entry_t entry; struct uvm_object *obj = NULL; struct vm_amap *amap = NULL; voff_t off = (vaddr_t)uaddr; - struct futex *f; - struct futex_list *ftlist = &p->p_p->ps_ftlist; + struct process *ps; - rw_assert_wrlock(&ftlock); + if (ISSET(flags, FT_PRIVATE)) + ps = p->p_p; + else { + ps = NULL; - if (!(flags & FT_PRIVATE)) { vm_map_lock_read(map); if (uvm_map_lookup_entry(map, (vaddr_t)uaddr, &entry) && entry->inheritance == MAP_INHERIT_SHARE) { if (UVM_ET_ISOBJ(entry)) { - ftlist = &ftlist_shared; obj = entry->object.uvm_obj; off = entry->offset + ((vaddr_t)uaddr - entry->start); } else if (entry->aref.ar_amap) { - ftlist = &ftlist_shared; amap = entry->aref.ar_amap; off = ptoa(entry->aref.ar_pageoff) + ((vaddr_t)uaddr - entry->start); @@ -155,47 +156,18 @@ futex_get(uint32_t *uaddr, int flags) vm_map_unlock_read(map); } - LIST_FOREACH(f, ftlist, ft_list) { - if (f->ft_obj == obj && f->ft_amap == amap && - f->ft_off == off) { - f->ft_refcnt++; - break; - } - } - - if ((f == NULL) && (flags & FT_CREATE)) { - /* - * We rely on the rwlock to ensure that no other thread - * create the same futex. - */ - f = pool_get(&ftpool, PR_WAITOK); - TAILQ_INIT(&f->ft_threads); - f->ft_obj = obj; - f->ft_amap = amap; - f->ft_off = off; - f->ft_refcnt = 1; - LIST_INSERT_HEAD(ftlist, f, ft_list); - } - - return f; + f->ft_ps = ps; + f->ft_obj = obj; + f->ft_amap = amap; + f->ft_off = off; } -/* - * Release a given futex. - */ -void -futex_put(struct futex *f) +static inline struct futex_bucket * +futex_get_bucket(struct futex *f) { - rw_assert_wrlock(&ftlock); + uint32_t key = f->ft_off >> 3; /* watevs */ - KASSERT(f->ft_refcnt > 0); - - --f->ft_refcnt; - if (f->ft_refcnt == 0) { - KASSERT(TAILQ_EMPTY(&f->ft_threads)); - LIST_REMOVE(f, ft_list); - pool_put(&ftpool, f); - } + return (&futex_hash[key & FUTEX_BUCKET_MASK]); } /* @@ -203,69 +175,78 @@ futex_put(struct futex *f) * ``uaddr''. Let it sleep for the specified ``timeout'' time, or * indefinitely if the argument is NULL. */ -int -futex_wait(uint32_t *uaddr, uint32_t val, const struct timespec *timeout, - int flags) +static int +futex_wait(struct proc *p, uint32_t *uaddr, uint32_t val, + const struct timespec *timeout, int flags) { - struct proc *p = curproc; - struct futex *f; + struct futex f; + struct futex_bucket *fb; uint64_t nsecs = INFSLP; uint32_t cval; int error; - /* - * After reading the value a race is still possible but - * we deal with it by serializing all futex syscalls. - */ - rw_assert_wrlock(&ftlock); - - /* - * Read user space futex value - */ - if ((error = copyin32(uaddr, &cval))) - return error; - - /* If the value changed, stop here. */ - if (cval != val) - return EAGAIN; - if (timeout != NULL) { struct timespec ts; - if ((error = copyin(timeout, &ts, sizeof(ts)))) - return error; + error = copyin(timeout, &ts, sizeof(ts)); + if (error != 0) + return (error); #ifdef KTRACE if (KTRPOINT(p, KTR_STRUCT)) ktrreltimespec(p, &ts); #endif if (ts.tv_sec < 0 || !timespecisvalid(&ts)) - return EINVAL; + return (EINVAL); + nsecs = MAX(1, MIN(TIMESPEC_TO_NSEC(&ts), MAXTSLP)); } - f = futex_get(uaddr, flags | FT_CREATE); - TAILQ_INSERT_TAIL(&f->ft_threads, p, p_fut_link); - p->p_futex = f; - - error = rwsleep_nsec(p, &ftlock, PWAIT|PCATCH, "fsleep", nsecs); - if (error == ERESTART) - error = ECANCELED; - else if (error == EWOULDBLOCK) { - /* A race occurred between a wakeup and a timeout. */ - if (p->p_futex == NULL) - error = 0; - else - error = ETIMEDOUT; - } + futex_addrs(p, &f, uaddr, flags); + fb = futex_get_bucket(&f); + + /* + * After reading the value a race is still possible but + * we deal with it by serializing futex syscalls. + */ + error = rw_enter(&fb->fb_lock, RW_WRITE|RW_INTR); + if (error != 0) + return (error); + + /* + * Read user space futex value + */ + if ((error = copyin32(uaddr, &cval))) + return (error); - /* Remove ourself if we haven't been awaken. */ - if ((f = p->p_futex) != NULL) { - p->p_futex = NULL; - TAILQ_REMOVE(&f->ft_threads, p, p_fut_link); - futex_put(f); + /* If the value changed, stop here. */ + if (cval != val) { + error = EAGAIN; + goto exit; } - return error; + TAILQ_INSERT_TAIL(&fb->fb_list, &f, ft_entry); + f.ft_wait = 1; + do { + error = rwsleep_nsec(&f, &fb->fb_lock, + PWAIT|PCATCH, "fsleep", nsecs); + if (error != 0) { + switch (error) { + case ERESTART: + error = ECANCELED; + break; + case EWOULDBLOCK: + error = f.ft_wait ? ETIMEDOUT : 0; + break; + } + + break; + } + } while (f.ft_wait); + TAILQ_REMOVE(&fb->fb_list, &f, ft_entry); + +exit: + rw_exit_write(&fb->fb_lock); + return (error); } /* @@ -273,46 +254,131 @@ futex_wait(uint32_t *uaddr, uint32_t val * ``uaddr'' and requeue at most ``m'' sibling threads on a futex at * address ``uaddr2''. */ -int -futex_requeue(uint32_t *uaddr, uint32_t n, uint32_t *uaddr2, uint32_t m, - int flags) +static int +futex_requeue(struct proc *p, uint32_t *uaddr, uint32_t n, + uint32_t *uaddr2, uint32_t m, int flags, register_t *retval) { - struct futex *f, *g; - struct proc *p; + struct futex okey, nkey; + struct futex *f, *nf = NULL; + struct futex_bucket *ofb, *nfb; uint32_t count = 0; - rw_assert_wrlock(&ftlock); + if (m > INT_MAX) + return (EINVAL); + if (m == 0) + return (futex_wake(p, uaddr, n, flags, retval)); + + futex_addrs(p, &okey, uaddr, flags); + ofb = futex_get_bucket(&okey); + futex_addrs(p, &nkey, uaddr2, flags); + nfb = futex_get_bucket(&nkey); + + if (ofb->fb_id < nfb->fb_id) { + rw_enter_write(&ofb->fb_lock); + rw_enter_write(&nfb->fb_lock); + } else if (ofb->fb_id > nfb->fb_id) { + rw_enter_write(&nfb->fb_lock); + rw_enter_write(&ofb->fb_lock); + } else + rw_enter_write(&ofb->fb_lock); + + TAILQ_FOREACH(f, &ofb->fb_list, ft_entry) { + if (f->ft_off != okey.ft_off || + f->ft_ps != okey.ft_ps || + f->ft_obj != okey.ft_obj || + f->ft_amap != okey.ft_amap) + continue; + + f->ft_wait = 0; + wakeup_one(f); - f = futex_get(uaddr, flags); - if (f == NULL) - return 0; - - while ((p = TAILQ_FIRST(&f->ft_threads)) != NULL && (count < (n + m))) { - p->p_futex = NULL; - TAILQ_REMOVE(&f->ft_threads, p, p_fut_link); - futex_put(f); - - if (count < n) { - wakeup_one(p); - } else if (uaddr2 != NULL) { - g = futex_get(uaddr2, FT_CREATE); - TAILQ_INSERT_TAIL(&g->ft_threads, p, p_fut_link); - p->p_futex = g; + if (++count == n) { + nf = TAILQ_NEXT(f, ft_entry); + break; } - count++; } - futex_put(f); + /* move matching futexes to the new bucket */ + while (nf != NULL) { + f = nf; + nf = TAILQ_NEXT(f, ft_entry); + + if (f->ft_off != okey.ft_off || + f->ft_ps != okey.ft_ps || + f->ft_obj != okey.ft_obj || + f->ft_amap != okey.ft_amap) + continue; + + TAILQ_REMOVE(&ofb->fb_list, f, ft_entry); + /* it should only be ft_off that changes, but eh */ + f->ft_ps = nkey.ft_ps; + f->ft_obj = nkey.ft_obj; + f->ft_amap = nkey.ft_amap; + f->ft_off = nkey.ft_off; + TAILQ_INSERT_TAIL(&nfb->fb_list, f, ft_entry); + + if (--m == 0) + break; + } + + if (ofb->fb_id < nfb->fb_id) { + rw_exit_write(&nfb->fb_lock); + rw_exit_write(&ofb->fb_lock); + } else if (ofb->fb_id > nfb->fb_id) { + rw_exit_write(&ofb->fb_lock); + rw_exit_write(&nfb->fb_lock); + } else + rw_exit_write(&ofb->fb_lock); - return count; + *retval = count; + + return (0); } /* * Wakeup at most ``n'' sibling threads sleeping on a futex at address * ``uaddr''. */ -int -futex_wake(uint32_t *uaddr, uint32_t n, int flags) +static int +futex_wake(struct proc *p, uint32_t *uaddr, uint32_t n, int flags, + register_t *retval) { - return futex_requeue(uaddr, n, NULL, 0, flags); + struct futex key; + struct futex *f; + struct futex_bucket *fb; + int count = 0; + int error; + + if (n > INT_MAX) + return (EINVAL); + if (n == 0) { + *retval = 0; + return (0); + } + + futex_addrs(p, &key, uaddr, flags); + fb = futex_get_bucket(&key); + + error = rw_enter(&fb->fb_lock, RW_READ|RW_INTR); + if (error != 0) + return (error); + + TAILQ_FOREACH(f, &fb->fb_list, ft_entry) { + if (f->ft_off != key.ft_off || + f->ft_ps != key.ft_ps || + f->ft_obj != key.ft_obj || + f->ft_amap != key.ft_amap) + continue; + + f->ft_wait = 0; + wakeup_one(f); + + if (++count == n) + break; + } + + rw_exit_read(&fb->fb_lock); + + *retval = count; + return (0); } Index: lib/libc/include/thread_private.h =================================================================== RCS file: /cvs/src/lib/libc/include/thread_private.h,v diff -u -p -r1.36 thread_private.h --- lib/libc/include/thread_private.h 6 Jan 2021 19:54:17 -0000 1.36 +++ lib/libc/include/thread_private.h 14 Jul 2024 22:48:01 -0000 @@ -287,33 +287,26 @@ struct __sem { int shared; }; -TAILQ_HEAD(pthread_queue, pthread); - -#ifdef FUTEX - -struct pthread_mutex { - volatile unsigned int lock; - int type; - pthread_t owner; - int count; - int prioceiling; -}; - -struct pthread_cond { - volatile unsigned int seq; - clockid_t clock; - struct pthread_mutex *mutex; +struct __cmtx_node { + volatile uint32_t wait; + SIMPLEQ_ENTRY(__cmtx_node) link; +}; +SIMPLEQ_HEAD(__cmtx_node_list, __cmtx_node); + +struct __cmtx { + _atomic_lock_t spin; + uint32_t lock; + struct __cmtx_node_list list; }; -struct pthread_rwlock { - volatile unsigned int value; -}; +TAILQ_HEAD(pthread_queue, pthread); -#else +struct pthread_locker; +TAILQ_HEAD(pthread_waiters, pthread_waiter); struct pthread_mutex { _atomic_lock_t lock; - struct pthread_queue lockers; + struct pthread_waiters waiters; int type; pthread_t owner; int count; @@ -322,7 +315,7 @@ struct pthread_mutex { struct pthread_cond { _atomic_lock_t lock; - struct pthread_queue waiters; + struct pthread_waiters waiters; struct pthread_mutex *mutex; clockid_t clock; }; @@ -330,10 +323,9 @@ struct pthread_cond { struct pthread_rwlock { _atomic_lock_t lock; pthread_t owner; - struct pthread_queue writers; + struct pthread_waiters writers; int readers; }; -#endif /* FUTEX */ struct pthread_mutex_attr { int ma_type; @@ -402,6 +394,13 @@ struct pthread { (self)->delayed_cancel = 0; \ ENTER_CANCEL_POINT_INNER(tib, 1, 1) +#define SPIN_COUNT 128 +#if defined(__i386__) || defined(__amd64__) +#define SPIN_WAIT() asm volatile("pause": : : "memory") +#else +#define SPIN_WAIT() do { } while (0) +#endif + /* * Internal functions exported from libc's thread bits for use by libpthread */ @@ -413,6 +412,19 @@ void _rthread_debug(int, const char *, . __attribute__((__format__ (printf, 2, 3))); pid_t _thread_dofork(pid_t (*_sys_fork)(void)); void _thread_finalize(void); + +/* + * simple^Wmutex for libc to use internally + */ +void __cmtx_init(struct __cmtx *); +void __cmtx_enter(struct __cmtx *); +void __cmtx_leave(struct __cmtx *); + +#define __CMTX_INITIALIZER(_cm) { \ + .spin = _SPINLOCK_UNLOCKED, \ + .lock = 0, \ + .list = SIMPLEQ_HEAD_INITIALIZER(_cm.list), \ +} /* * Threading syscalls not declared in system headers Index: lib/libc/thread/Makefile.inc =================================================================== RCS file: /cvs/src/lib/libc/thread/Makefile.inc,v diff -u -p -r1.19 Makefile.inc --- lib/libc/thread/Makefile.inc 6 Feb 2020 03:13:45 -0000 1.19 +++ lib/libc/thread/Makefile.inc 14 Jul 2024 22:48:01 -0000 @@ -12,6 +12,7 @@ SRCS+= rthread.c \ rthread_libc.c \ rthread_once.c \ rthread_tls.c \ + rthread_sync.c notyet= rthread_condattr_clock.c \ rthread_equal.c \ @@ -19,14 +20,6 @@ notyet= rthread_condattr_clock.c \ spinlock.c \ spinlocktry.c -.if ${MACHINE_ARCH} == "hppa" || ${MACHINE_ARCH} == "m88k" || \ - ${MACHINE_ARCH} == "sh" -SRCS+= rthread_sync.c -.else -CFLAGS+= -DFUTEX -SRCS+= rthread_mutex.c \ - rthread_cond.c -.endif .if defined(NOPIC) CFLAGS+=-DNO_PIC Index: lib/libc/thread/rthread.c =================================================================== RCS file: /cvs/src/lib/libc/thread/rthread.c,v diff -u -p -r1.9 rthread.c --- lib/libc/thread/rthread.c 12 Oct 2020 22:06:51 -0000 1.9 +++ lib/libc/thread/rthread.c 14 Jul 2024 22:48:01 -0000 @@ -21,6 +21,9 @@ #include #include +#include +#include +#include #include #include @@ -34,6 +37,7 @@ int _rthread_debug_level; static int _threads_inited; +static int ncpus; struct pthread _initial_thread = { .flags_lock = _SPINLOCK_UNLOCKED, @@ -41,13 +45,52 @@ struct pthread _initial_thread = { }; /* + * Wait for the spinlock to become unlocked. + * + * On uniprocessor systems it is pointless to spin waiting for + * another thread to release the lock because this thread occupies + * the only CPU, preventing the thread holding the lock from running + * and leaving the critical section. + * + * On multiprocessor systems we spin, but not forever in case there + * are more threads than CPUs still, and more progress might be made + * if we can get the other thread to run. + */ + +static inline void +_spinlock_wait(volatile _atomic_lock_t *lock) +{ + do { + if (ncpus > 1) { + unsigned int spin; + + for (spin = 0; spin < SPIN_COUNT; spin++) { + SPIN_WAIT(); + if (*lock == _ATOMIC_LOCK_UNLOCKED) + return; + } + } + + sched_yield(); + } while (*lock != _ATOMIC_LOCK_UNLOCKED); +} + +/* * internal support functions */ void _spinlock(volatile _atomic_lock_t *lock) { + if (ncpus == 0) { + static const int mib[] = { CTL_HW, HW_NCPU }; + size_t ncpuslen = sizeof(ncpus); + + if (sysctl(mib, 2, &ncpus, &ncpuslen, NULL, 0) == -1) + ncpus = 1; + } + while (_atomic_lock(lock)) - sched_yield(); + _spinlock_wait(lock); membar_enter_after_atomic(); } DEF_STRONG(_spinlock); @@ -69,6 +112,91 @@ _spinunlock(volatile _atomic_lock_t *loc *lock = _ATOMIC_LOCK_UNLOCKED; } DEF_STRONG(_spinunlock); + +/* + * libc internal mutex + * + * the lock is implemented as a list of waiters protected by a spinlock. + * threads waiting for the lock add themselves to the list, and then + * spin on their own wait variable. + * + * this avoids (some) contention on the lock data structure by + * having threads spin on their stack. the thread that "owns" the + * lock is responsible for checking if there are waiting threads and + * updating their wait variable to wake them up. + * + * it also provides ordered access to the critical section by having + * threads only woken up in the order the were queued on the lock. + * + * this in turn (ha) prevents the "thundering herd" in classic locks + * where all threads are woken up so they can try and take ownership. + */ + +void +__cmtx_init(struct __cmtx *cm) +{ + cm->spin = _SPINLOCK_UNLOCKED; + cm->lock = 0; + SIMPLEQ_INIT(&cm->list); +} + +void +__cmtx_enter(struct __cmtx *cm) +{ + struct __cmtx_node self = { .wait = 1 }; + uint32_t locked; + + _spinlock(&cm->spin); + locked = cm->lock; + if (!locked) + cm->lock = 1; + else + SIMPLEQ_INSERT_TAIL(&cm->list, &self, link); + _spinunlock(&cm->spin); + + if (!locked) { + /* the spinlock ops provided enough membars */ + return; + } + + if (ncpus > 1) { + unsigned int spin; + + for (spin = 0; spin < SPIN_COUNT; spin++) { + SPIN_WAIT(); + if (!self.wait) { + membar_enter(); + return; + } + } + } + + do { + futex(&self.wait, FUTEX_WAIT_PRIVATE, 1, NULL, NULL); + } while (self.wait); + + membar_enter(); +} + +void +__cmtx_leave(struct __cmtx *cm) +{ + struct __cmtx_node *next; + + _spinlock(&cm->spin); + next = SIMPLEQ_FIRST(&cm->list); + if (next != NULL) + SIMPLEQ_REMOVE_HEAD(&cm->list, link); + else + cm->lock = 0; + _spinunlock(&cm->spin); /* this provides membar_exit() */ + + if (next == NULL) + return; + + next->wait = 0; + futex(&next->wait, FUTEX_WAKE_PRIVATE, 1, NULL, NULL); +} static void _rthread_init(void) Index: lib/libc/thread/rthread_file.c =================================================================== RCS file: /cvs/src/lib/libc/thread/rthread_file.c,v diff -u -p -r1.3 rthread_file.c --- lib/libc/thread/rthread_file.c 27 Dec 2022 17:10:06 -0000 1.3 +++ lib/libc/thread/rthread_file.c 14 Jul 2024 22:48:01 -0000 @@ -38,6 +38,9 @@ * */ +#include +#include +#include #include #include #include @@ -47,17 +50,26 @@ #include "rthread.h" #include "rthread_cb.h" +struct file_lock_waiter { + volatile unsigned int wait; + pthread_t owner; + SIMPLEQ_ENTRY(file_lock_waiter) link; +}; + +SIMPLEQ_HEAD(file_lock_waiters, file_lock_waiter); + /* * The FILE lock structure. The FILE *fp is locked if the owner is * not NULL. If not locked, the file lock structure can be * reassigned to a different file by setting fp. */ struct file_lock { - LIST_ENTRY(file_lock) entry; /* Entry if file list. */ - FILE *fp; /* The target file. */ - struct pthread_queue lockers; - pthread_t owner; - int count; + LIST_ENTRY(file_lock) entry; /* Entry if file list. */ + FILE *fp; + + pthread_t owner; + unsigned int count; + struct file_lock_waiters waiters; }; /* @@ -83,115 +95,144 @@ struct file_lock { * allocated statically in the hope that there won't be too many * collisions that require a malloc and an element added to the list. */ -static struct static_file_lock { - LIST_HEAD(file_list_head, file_lock) head; +static struct file_lock_bucket { + volatile unsigned int initted; + struct __cmtx lock; + LIST_HEAD(file_lock_list, file_lock) head; struct file_lock fl; } flh[NUM_HEADS]; -/* Lock for accesses to the hash table: */ +/* Lock for initialisation of the hash table: */ static _atomic_lock_t hash_lock = _SPINLOCK_UNLOCKED; +static struct file_lock_bucket * +file_bucket(FILE *fp) +{ + int idx = file_idx(fp); + struct file_lock_bucket *flb = &flh[idx]; + + if (!flb->initted) { + _spinlock(&hash_lock); + if (!flb->initted) { + __cmtx_init(&flb->lock); + LIST_INIT(&flb->head); + + SIMPLEQ_INIT(&flb->fl.waiters); + + /* XXX barrier? */ + flb->initted = 1; + } + _spinunlock(&hash_lock); + } + + return (flb); +} + /* * Find a lock structure for a FILE, return NULL if the file is * not locked: */ -static -struct file_lock * -find_lock(int idx, FILE *fp) +static struct file_lock * +find_lock(struct file_lock_bucket *flb, FILE *fp) { struct file_lock *p; /* Check if the file is locked using the static structure: */ - if (flh[idx].fl.fp == fp && flh[idx].fl.owner != NULL) + if (flb->fl.fp == fp) { /* Return a pointer to the static lock: */ - p = &flh[idx].fl; - else { - /* Point to the first dynamic lock: */ - p = LIST_FIRST(&flh[idx].head); - + p = &flb->fl; + } else { /* * Loop through the dynamic locks looking for the * target file: */ - while (p != NULL && (p->fp != fp || p->owner == NULL)) - /* Not this file, try the next: */ - p = LIST_NEXT(p, entry); + LIST_FOREACH(p, &flb->head, entry) { + if (p->fp == fp) + break; + } } - return(p); + + return (p); } /* * Lock a file, assuming that there is no lock structure currently * assigned to it. */ -static -struct file_lock * -do_lock(int idx, FILE *fp) +static struct file_lock * +do_lock(struct file_lock_bucket *flb, FILE *fp) { struct file_lock *p; /* Check if the static structure is not being used: */ - if (flh[idx].fl.owner == NULL) { + if (flb->fl.owner == NULL) { /* Return a pointer to the static lock: */ - p = &flh[idx].fl; - } - else { - /* Point to the first dynamic lock: */ - p = LIST_FIRST(&flh[idx].head); - + p = &flb->fl; + } else { /* * Loop through the dynamic locks looking for a * lock structure that is not being used: */ - while (p != NULL && p->owner != NULL) - /* This one is used, try the next: */ - p = LIST_NEXT(p, entry); + LIST_FOREACH(p, &flb->head, entry) { + if (p->fp == NULL) + break; + } } /* * If an existing lock structure has not been found, * allocate memory for a new one: */ - if (p == NULL && (p = (struct file_lock *) - malloc(sizeof(struct file_lock))) != NULL) { + if (p == NULL) { + p = malloc(sizeof(*p)); + if (p == NULL) + return (NULL); + + p->owner = NULL; + p->count = 0; + SIMPLEQ_INIT(&p->waiters); + /* Add the new element to the list: */ - LIST_INSERT_HEAD(&flh[idx].head, p, entry); + LIST_INSERT_HEAD(&flb->head, p, entry); } - /* Check if there is a lock structure to acquire: */ - if (p != NULL) { - /* Acquire the lock for the running thread: */ - p->fp = fp; - p->owner = pthread_self(); - p->count = 1; - TAILQ_INIT(&p->lockers); - } - return(p); + p->fp = fp; + + return (p); } void _thread_flockfile(FILE * fp) { - int idx = file_idx(fp); - struct file_lock *p; - pthread_t self = pthread_self(); + struct file_lock_bucket *flb = file_bucket(fp); + struct file_lock *p; + pthread_t self = pthread_self(); + pthread_t owner; + struct file_lock_waiter wchan = { .wait = 1, .owner = self }; - /* Lock the hash table: */ - _spinlock(&hash_lock); + __cmtx_enter(&flb->lock); /* Get a pointer to any existing lock for the file: */ - if ((p = find_lock(idx, fp)) == NULL) { + p = find_lock(flb, fp); + if (p == NULL) { /* * The file is not locked, so this thread can * grab the lock: */ - do_lock(idx, fp); + p = do_lock(flb, fp); + if (p == NULL) { + /* XXX unable to allocate dynamic lock! */ + __cmtx_leave(&flb->lock); + /* abort(); */ + return; + } - /* - * The file is already locked, so check if the - * running thread is the owner: - */ - } else if (p->owner == self) { + p->owner = self; /* Take ownership of the file_lock */ + } + + owner = p->owner; + /* Check if the running thread is the owner: */ + if (owner == self) { /* * The running thread is already the * owner, so increment the count of @@ -205,101 +246,88 @@ _thread_flockfile(FILE * fp) * Append this thread to the queue of * threads waiting on the lock. */ - TAILQ_INSERT_TAIL(&p->lockers,self,waiting); - while (p->owner != self) { - __thrsleep(self, 0, NULL, &hash_lock, NULL); - _spinlock(&hash_lock); - } + SIMPLEQ_INSERT_TAIL(&p->waiters, &wchan, link); } + __cmtx_leave(&flb->lock); - /* Unlock the hash table: */ - _spinunlock(&hash_lock); + if (owner == self) { + /* __cmtx provided enough membars */ + return; + } + + /* spin? */ + + while (wchan.wait) + futex(&wchan.wait, FUTEX_WAIT_PRIVATE, 1, NULL, NULL); + + membar_enter(); } int _thread_ftrylockfile(FILE * fp) { - int ret = -1; - int idx = file_idx(fp); - struct file_lock *p; - - /* Lock the hash table: */ - _spinlock(&hash_lock); + struct file_lock_bucket *flb = file_bucket(fp); + struct file_lock *p; + pthread_t self = pthread_self(); + int ret = -1; + __cmtx_enter(&flb->lock); /* Get a pointer to any existing lock for the file: */ - if ((p = find_lock(idx, fp)) == NULL) { + p = find_lock(flb, fp); + if (p == NULL) { /* * The file is not locked, so this thread can * grab the lock: */ - p = do_lock(idx, fp); - - /* - * The file is already locked, so check if the - * running thread is the owner: - */ - } else if (p->owner == pthread_self()) { - /* - * The running thread is already the - * owner, so increment the count of - * the number of times it has locked - * the file: - */ - p->count++; - } else { - /* - * The file is locked for another thread, - * so this try fails. - */ - p = NULL; + p = do_lock(flb, fp); + if (p == NULL) { + /* XXX unable to allocate dynamic lock! */ + __cmtx_leave(&flb->lock); + /* abort(); */ + return (-1); + } + + p->owner = self; } - /* Unlock the hash table: */ - _spinunlock(&hash_lock); - - /* Check if the lock was obtained: */ - if (p != NULL) - /* Return success: */ + if (p->owner == self) { + p->count++; ret = 0; + } + __cmtx_leave(&flb->lock); return (ret); } -void +void _thread_funlockfile(FILE * fp) { - int idx = file_idx(fp); - struct file_lock *p; - - /* Lock the hash table: */ - _spinlock(&hash_lock); - - /* - * Get a pointer to the lock for the file and check that - * the running thread is the one with the lock: - */ - if ((p = find_lock(idx, fp)) != NULL && p->owner == pthread_self()) { - /* - * Check if this thread has locked the FILE - * more than once: - */ - if (--p->count == 0) { - /* Get the new owner of the lock: */ - if ((p->owner = TAILQ_FIRST(&p->lockers)) != NULL) { - /* Pop the thread off the queue: */ - TAILQ_REMOVE(&p->lockers,p->owner,waiting); - - /* - * This is the first lock for the new - * owner: - */ - p->count = 1; + struct file_lock_bucket *flb = file_bucket(fp); + struct file_lock *p; + struct file_lock_waiter *wchan = NULL; - __thrwakeup(p->owner, 1); - } + __cmtx_enter(&flb->lock); + /* Get a pointer to the lock for the file: */ + p = find_lock(flb, fp); + /* assert p != NULL */ + /* assert p->owner == self */ + if (--p->count == 0) { + wchan = SIMPLEQ_FIRST(&p->waiters); + if (wchan != NULL) { + SIMPLEQ_REMOVE_HEAD(&p->waiters, link); + p->owner = wchan->owner; + p->count = 1; + } else { + /* This gives the entry back to the bucket: */ + p->fp = NULL; + p->owner = NULL; } } + __cmtx_leave(&flb->lock); + + if (wchan == NULL) + return; - /* Unlock the hash table: */ - _spinunlock(&hash_lock); + wchan->wait = 0; + futex(&wchan->wait, FUTEX_WAKE_PRIVATE, 1, NULL, NULL); } Index: lib/libc/thread/rthread_libc.c =================================================================== RCS file: /cvs/src/lib/libc/thread/rthread_libc.c,v diff -u -p -r1.4 rthread_libc.c --- lib/libc/thread/rthread_libc.c 6 Jan 2021 19:54:17 -0000 1.4 +++ lib/libc/thread/rthread_libc.c 14 Jul 2024 22:48:01 -0000 @@ -152,24 +152,9 @@ _thread_mutex_destroy(void **mutex) /* * the malloc lock */ -#ifndef FUTEX -#define MALLOC_LOCK_INITIALIZER(n) { \ - _SPINLOCK_UNLOCKED, \ - TAILQ_HEAD_INITIALIZER(malloc_lock[n].lockers), \ - PTHREAD_MUTEX_DEFAULT, \ - NULL, \ - 0, \ - -1 } -#else -#define MALLOC_LOCK_INITIALIZER(n) { \ - _SPINLOCK_UNLOCKED, \ - PTHREAD_MUTEX_DEFAULT, \ - NULL, \ - 0, \ - -1 } -#endif +#define MALLOC_LOCK_INITIALIZER(n) __CMTX_INITIALIZER(malloc_lock[n]) -static struct pthread_mutex malloc_lock[_MALLOC_MUTEXES] = { +static struct __cmtx malloc_lock[_MALLOC_MUTEXES] = { MALLOC_LOCK_INITIALIZER(0), MALLOC_LOCK_INITIALIZER(1), MALLOC_LOCK_INITIALIZER(2), @@ -204,51 +189,16 @@ static struct pthread_mutex malloc_lock[ MALLOC_LOCK_INITIALIZER(31) }; -static pthread_mutex_t malloc_mutex[_MALLOC_MUTEXES] = { - &malloc_lock[0], - &malloc_lock[1], - &malloc_lock[2], - &malloc_lock[3], - &malloc_lock[4], - &malloc_lock[5], - &malloc_lock[6], - &malloc_lock[7], - &malloc_lock[8], - &malloc_lock[9], - &malloc_lock[10], - &malloc_lock[11], - &malloc_lock[12], - &malloc_lock[13], - &malloc_lock[14], - &malloc_lock[15], - &malloc_lock[16], - &malloc_lock[17], - &malloc_lock[18], - &malloc_lock[19], - &malloc_lock[20], - &malloc_lock[21], - &malloc_lock[22], - &malloc_lock[23], - &malloc_lock[24], - &malloc_lock[25], - &malloc_lock[26], - &malloc_lock[27], - &malloc_lock[28], - &malloc_lock[29], - &malloc_lock[30], - &malloc_lock[31] -}; - void _thread_malloc_lock(int i) { - pthread_mutex_lock(&malloc_mutex[i]); + __cmtx_enter(&malloc_lock[i]); } void _thread_malloc_unlock(int i) { - pthread_mutex_unlock(&malloc_mutex[i]); + __cmtx_leave(&malloc_lock[i]); } static void @@ -256,14 +206,8 @@ _thread_malloc_reinit(void) { int i; - for (i = 0; i < _MALLOC_MUTEXES; i++) { - malloc_lock[i].lock = _SPINLOCK_UNLOCKED; -#ifndef FUTEX - TAILQ_INIT(&malloc_lock[i].lockers); -#endif - malloc_lock[i].owner = NULL; - malloc_lock[i].count = 0; - } + for (i = 0; i < _MALLOC_MUTEXES; i++) + __cmtx_init(&malloc_lock[i]); } /* @@ -303,18 +247,18 @@ _thread_atfork_unlock(void) /* * arc4random lock */ -static _atomic_lock_t arc4_lock = _SPINLOCK_UNLOCKED; +static struct __cmtx arc4_lock = __CMTX_INITIALIZER(arc4_lock); void _thread_arc4_lock(void) { - _spinlock(&arc4_lock); + __cmtx_enter(&arc4_lock); } void _thread_arc4_unlock(void) { - _spinunlock(&arc4_lock); + __cmtx_leave(&arc4_lock); } pid_t Index: lib/libc/thread/rthread_mutex.c =================================================================== RCS file: /cvs/src/lib/libc/thread/rthread_mutex.c,v diff -u -p -r1.5 rthread_mutex.c --- lib/libc/thread/rthread_mutex.c 13 Feb 2019 13:09:32 -0000 1.5 +++ lib/libc/thread/rthread_mutex.c 14 Jul 2024 22:48:01 -0000 @@ -36,14 +36,7 @@ enum { CONTENDED = 2, /* threads waiting for this mutex */ }; -#define SPIN_COUNT 128 -#if defined(__i386__) || defined(__amd64__) -#define SPIN_WAIT() asm volatile("pause": : : "memory") -#else -#define SPIN_WAIT() do { } while (0) -#endif - -static _atomic_lock_t static_init_lock = _SPINLOCK_UNLOCKED; +static struct __cmtx static_init_lock = __CMTX_INITIALIZER(static_init_lock); int pthread_mutex_init(pthread_mutex_t *mutexp, const pthread_mutexattr_t *attr) @@ -151,10 +144,10 @@ _rthread_mutex_timedlock(pthread_mutex_t * is NULL. */ if (*mutexp == NULL) { - _spinlock(&static_init_lock); + __cmtx_enter(&static_init_lock); if (*mutexp == NULL) error = pthread_mutex_init(mutexp, NULL); - _spinunlock(&static_init_lock); + __cmtx_leave(&static_init_lock); if (error != 0) return (EINVAL); } Index: lib/libc/thread/rthread_sync.c =================================================================== RCS file: /cvs/src/lib/libc/thread/rthread_sync.c,v diff -u -p -r1.6 rthread_sync.c --- lib/libc/thread/rthread_sync.c 10 Jan 2024 04:28:43 -0000 1.6 +++ lib/libc/thread/rthread_sync.c 14 Jul 2024 22:48:01 -0000 @@ -28,9 +28,17 @@ #include #include "rthread.h" +#include "synch.h" #include "cancel.h" /* in libc/include */ -static _atomic_lock_t static_init_lock = _SPINLOCK_UNLOCKED; +static struct __cmtx static_init_lock = __CMTX_INITIALIZER(static_init_lock); + +struct pthread_waiter { + volatile uint32_t wait; + pthread_cond_t cv; + pthread_t owner; + TAILQ_ENTRY(pthread_waiter) entry; +}; /* * mutexen @@ -44,7 +52,7 @@ pthread_mutex_init(pthread_mutex_t *mute if (!mutex) return (errno); mutex->lock = _SPINLOCK_UNLOCKED; - TAILQ_INIT(&mutex->lockers); + TAILQ_INIT(&mutex->waiters); if (attr == NULL) { mutex->type = PTHREAD_MUTEX_DEFAULT; mutex->prioceiling = -1; @@ -68,7 +76,7 @@ pthread_mutex_destroy(pthread_mutex_t *m mutex = (struct pthread_mutex *)*mutexp; if (mutex) { if (mutex->count || mutex->owner != NULL || - !TAILQ_EMPTY(&mutex->lockers)) { + !TAILQ_EMPTY(&mutex->waiters)) { #define MSG "pthread_mutex_destroy on mutex with waiters!\n" write(2, MSG, sizeof(MSG) - 1); #undef MSG @@ -87,6 +95,9 @@ _rthread_mutex_lock(pthread_mutex_t *mut { struct pthread_mutex *mutex; pthread_t self = pthread_self(); + pthread_t owner; + struct pthread_waiter waiter = { .owner = self, .wait = 1 }; + unsigned int spin; int ret = 0; /* @@ -96,10 +107,10 @@ _rthread_mutex_lock(pthread_mutex_t *mut * is NULL. */ if (*mutexp == NULL) { - _spinlock(&static_init_lock); + __cmtx_enter(&static_init_lock); if (*mutexp == NULL) ret = pthread_mutex_init(mutexp, NULL); - _spinunlock(&static_init_lock); + __cmtx_leave(&static_init_lock); if (ret != 0) return (EINVAL); } @@ -107,62 +118,84 @@ _rthread_mutex_lock(pthread_mutex_t *mut _rthread_debug(5, "%p: mutex_lock %p\n", (void *)self, (void *)mutex); _spinlock(&mutex->lock); - if (mutex->owner == NULL && TAILQ_EMPTY(&mutex->lockers)) { + owner = mutex->owner; + if (owner == NULL) { assert(mutex->count == 0); - mutex->owner = self; - } else if (mutex->owner == self) { + assert(TAILQ_EMPTY(&mutex->waiters)); + mutex->owner = owner = self; + } else if (owner == self) { assert(mutex->count > 0); /* already owner? handle recursive behavior */ - if (mutex->type != PTHREAD_MUTEX_RECURSIVE) - { - if (trywait || - mutex->type == PTHREAD_MUTEX_ERRORCHECK) { - _spinunlock(&mutex->lock); - return (trywait ? EBUSY : EDEADLK); - } - - /* self-deadlock is disallowed by strict */ - if (mutex->type == PTHREAD_MUTEX_STRICT_NP && - abstime == NULL) - abort(); - - /* self-deadlock, possibly until timeout */ - while (__thrsleep(self, CLOCK_REALTIME, abstime, - &mutex->lock, NULL) != EWOULDBLOCK) - _spinlock(&mutex->lock); - return (ETIMEDOUT); - } - if (mutex->count == INT_MAX) { - _spinunlock(&mutex->lock); - return (EAGAIN); + if (mutex->type != PTHREAD_MUTEX_RECURSIVE) { + /* + * The pthread_mutex_lock() function may fail if + * a deadlock condition was detected. + */ + ret = EDEADLK; + goto err; } } else if (trywait) { /* try failed */ - _spinunlock(&mutex->lock); - return (EBUSY); + ret = EBUSY; + goto err; } else { - /* add to the wait queue and block until at the head */ - TAILQ_INSERT_TAIL(&mutex->lockers, self, waiting); - while (mutex->owner != self) { - ret = __thrsleep(self, CLOCK_REALTIME, abstime, - &mutex->lock, NULL); - _spinlock(&mutex->lock); - assert(mutex->owner != NULL); - if (ret == EWOULDBLOCK) { - if (mutex->owner == self) - break; - TAILQ_REMOVE(&mutex->lockers, self, waiting); - _spinunlock(&mutex->lock); - return (ETIMEDOUT); + /* add to the wait queue */ + TAILQ_INSERT_TAIL(&mutex->waiters, &waiter, entry); + } + _spinunlock(&mutex->lock); + + if (owner == self) { + int count = mutex->count; + if (count == INT_MAX) + return (EAGAIN); + mutex->count = count + 1; + + /* the spinlock has done enough membars */ + return (0); + } + +#if 0 + if (ncpus > 1) { + unsigned int spin; + + for (spin = 0; spin < SPIN_COUNT; spin++) { + SPIN_WAIT(); + if (!waiter.wait) { + membar_enter(); + return (0); } } } +#endif - mutex->count++; - _spinunlock(&mutex->lock); - + do { + ret = _twait(&waiter.wait, 1, CLOCK_REALTIME, abstime); + if (ret == ETIMEDOUT) + goto tmo; + } while (waiter.wait); + + assert(mutex->owner == self); + assert(mutex->count > 0); + membar_enter(); return (0); + +tmo: + assert(abstime != NULL); + _spinlock(&mutex->lock); + if (waiter.wait) { + /* take ourself off the wait queue */ + TAILQ_REMOVE(&mutex->waiters, &waiter, entry); + } else { + /* the timeout lost a race with actually getting the lock */ + assert(mutex->owner == self); + assert(mutex->count > 0); + ret = 0; + } + /* FALLTHROUGH */ +err: + _spinunlock(&mutex->lock); + return (ret); } int @@ -189,6 +222,7 @@ pthread_mutex_unlock(pthread_mutex_t *mu { pthread_t self = pthread_self(); struct pthread_mutex *mutex = (struct pthread_mutex *)*mutexp; + int count; _rthread_debug(5, "%p: mutex_unlock %p\n", (void *)self, (void *)mutex); @@ -221,17 +255,29 @@ pthread_mutex_unlock(pthread_mutex_t *mu } } - if (--mutex->count == 0) { - pthread_t next; + count = mutex->count - 1; + if (count == 0) { + struct pthread_waiter *nwaiter = NULL; _spinlock(&mutex->lock); - mutex->owner = next = TAILQ_FIRST(&mutex->lockers); - if (next != NULL) - TAILQ_REMOVE(&mutex->lockers, next, waiting); + nwaiter = TAILQ_FIRST(&mutex->waiters); + if (nwaiter != NULL) { + /* move ownership to the next thread from the list */ + TAILQ_REMOVE(&mutex->waiters, nwaiter, entry); + mutex->owner = nwaiter->owner; + /* leave mutex->count at 1 for the next thread */ + nwaiter->owner = NULL; + nwaiter->wait = 0; /* let them proceed */ + } else { + mutex->owner = NULL; + mutex->count = 0; + } _spinunlock(&mutex->lock); - if (next != NULL) - __thrwakeup(next, 1); - } + + if (nwaiter != NULL) + _wake(&nwaiter->wait, 1); + } else + mutex->count = count; return (0); } @@ -281,19 +327,36 @@ pthread_cond_destroy(pthread_cond_t *con return (0); } +static void +pthread_cond_mutexexit(pthread_cond_t cond, struct pthread_mutex *mutex, + struct pthread_waiter *waiter) +{ + +} + +static int +pthread_cond_mutexwait(pthread_cond_t cond, struct pthread_mutex *mutex, + struct pthread_waiter *waiter) +{ + + return (EAGAIN); +} + int -pthread_cond_timedwait(pthread_cond_t *condp, pthread_mutex_t *mutexp, +_rthread_cond_timedwait(pthread_cond_t *condp, pthread_mutex_t *mutexp, const struct timespec *abstime) { pthread_cond_t cond; struct pthread_mutex *mutex = (struct pthread_mutex *)*mutexp; struct tib *tib = TIB_GET(); pthread_t self = tib->tib_thread; - pthread_t next; + struct pthread_waiter waiter = { .owner = self, .wait = 1 }; + struct pthread_waiter *nwaiter; + pthread_t owner; int mutex_count; int canceled = 0; - int rv = 0; int error; + int rv = 0; PREP_CANCEL_POINT(tib); if (!*condp) @@ -317,10 +380,6 @@ pthread_cond_timedwait(pthread_cond_t *c abort(); } - if (abstime == NULL || abstime->tv_nsec < 0 || - abstime->tv_nsec >= 1000000000) - return (EINVAL); - ENTER_DELAYED_CANCEL_POINT(tib, self); _spinlock(&cond->lock); @@ -340,35 +399,35 @@ pthread_cond_timedwait(pthread_cond_t *c /* snag the count in case this is a recursive mutex */ mutex_count = mutex->count; + waiter.cv = cond; + TAILQ_INSERT_TAIL(&cond->waiters, &waiter, entry); + /* transfer from the mutex queue to the condvar queue */ _spinlock(&mutex->lock); - self->blocking_cond = cond; - TAILQ_INSERT_TAIL(&cond->waiters, self, waiting); _spinunlock(&cond->lock); - /* wake the next guy blocked on the mutex */ - mutex->count = 0; - mutex->owner = next = TAILQ_FIRST(&mutex->lockers); - if (next != NULL) { - TAILQ_REMOVE(&mutex->lockers, next, waiting); - __thrwakeup(next, 1); + nwaiter = TAILQ_FIRST(&mutex->waiters); + if (nwaiter != NULL) { + /* move ownership to the next thread from the list */ + TAILQ_REMOVE(&mutex->waiters, nwaiter, entry); + mutex->owner = nwaiter->owner; + mutex->count = 1; + nwaiter->wait = 0; /* let them proceed */ + } else { + mutex->owner = NULL; + mutex->count = 0; } + _spinunlock(&mutex->lock); - /* wait until we're the owner of the mutex again */ - while (mutex->owner != self) { - error = __thrsleep(self, cond->clock, abstime, - &mutex->lock, &self->delayed_cancel); + /* wake the next guy blocked on the mutex */ + if (nwaiter != NULL) + _wake(&nwaiter->wait, 1); - /* - * If abstime == NULL, then we're definitely waiting - * on the mutex instead of the condvar, and are - * just waiting for mutex ownership, regardless of - * why we woke up. - */ - if (abstime == NULL) { - _spinlock(&mutex->lock); + /* wait until we're the owner of the mutex again */ + while (waiter.wait) { + error = _twait(&waiter.wait, 1, cond->clock, abstime); + if (error == 0 || error == EAGAIN) continue; - } /* * If we took a normal signal (not from @@ -377,10 +436,8 @@ pthread_cond_timedwait(pthread_cond_t *c */ if ((error == EINTR || error == ECANCELED) && (tib->tib_canceled == 0 || - (tib->tib_cantcancel & CANCEL_DISABLED))) { - _spinlock(&mutex->lock); + (tib->tib_cantcancel & CANCEL_DISABLED))) continue; - } /* * The remaining reasons for waking up (normal @@ -388,50 +445,54 @@ pthread_cond_timedwait(pthread_cond_t *c * we won't be staying in the condvar queue and * we'll no longer time out or be cancelable. */ - abstime = NULL; LEAVE_CANCEL_POINT_INNER(tib, 0); + canceled = 1; + + /* if timeout or canceled, make note of that */ + if (error == ETIMEDOUT) + rv = ETIMEDOUT; + + abstime = NULL; - /* - * If we're no longer in the condvar's queue then - * we're just waiting for mutex ownership. Need - * cond->lock here to prevent race with cond_signal(). - */ _spinlock(&cond->lock); - if (self->blocking_cond == NULL) { + if (!waiter.wait) { + /* we lost a race with a signal and mutex */ _spinunlock(&cond->lock); - _spinlock(&mutex->lock); - continue; + assert(mutex->owner == self); + break; } - assert(self->blocking_cond == cond); - /* if timeout or canceled, make note of that */ - if (error == EWOULDBLOCK) - rv = ETIMEDOUT; - else if (error == EINTR) - canceled = 1; + /* something has already moved us off the cond wait list */ + if (waiter.cv == NULL) { + _spinunlock(&cond->lock); + continue; + } - /* transfer between the queues */ - TAILQ_REMOVE(&cond->waiters, self, waiting); - assert(mutex == cond->mutex); + /* move to the mutex */ + assert(waiter.cv == cond); + waiter.cv = NULL; + TAILQ_REMOVE(&cond->waiters, &waiter, entry); if (TAILQ_EMPTY(&cond->waiters)) cond->mutex = NULL; - self->blocking_cond = NULL; - _spinunlock(&cond->lock); + _spinlock(&mutex->lock); + _spinunlock(&cond->lock); + owner = mutex->owner; /* mutex unlocked right now? */ - if (mutex->owner == NULL && - TAILQ_EMPTY(&mutex->lockers)) { - assert(mutex->count == 0); + if (owner == NULL) { mutex->owner = self; + _spinunlock(&mutex->lock); break; } - TAILQ_INSERT_TAIL(&mutex->lockers, self, waiting); + assert(owner != self); + + TAILQ_INSERT_TAIL(&mutex->waiters, &waiter, entry); + _spinunlock(&mutex->lock); } /* restore the mutex's count */ mutex->count = mutex_count; - _spinunlock(&mutex->lock); LEAVE_CANCEL_POINT_INNER(tib, canceled); @@ -439,150 +500,29 @@ pthread_cond_timedwait(pthread_cond_t *c } int -pthread_cond_wait(pthread_cond_t *condp, pthread_mutex_t *mutexp) +pthread_cond_timedwait(pthread_cond_t *condp, pthread_mutex_t *mutexp, + const struct timespec *abstime) { - pthread_cond_t cond; - struct pthread_mutex *mutex = (struct pthread_mutex *)*mutexp; - struct tib *tib = TIB_GET(); - pthread_t self = tib->tib_thread; - pthread_t next; - int mutex_count; - int canceled = 0; - int error; - PREP_CANCEL_POINT(tib); - - if (!*condp) - if ((error = pthread_cond_init(condp, NULL))) - return (error); - cond = *condp; - _rthread_debug(5, "%p: cond_wait %p,%p\n", (void *)self, - (void *)cond, (void *)mutex); - - if (mutex == NULL) -#if PTHREAD_MUTEX_DEFAULT == PTHREAD_MUTEX_ERRORCHECK - return (EPERM); -#else - abort(); -#endif - - if (mutex->owner != self) { - if (mutex->type == PTHREAD_MUTEX_ERRORCHECK) - return (EPERM); - else - abort(); - } - - ENTER_DELAYED_CANCEL_POINT(tib, self); - - _spinlock(&cond->lock); - - /* mark the condvar as being associated with this mutex */ - if (cond->mutex == NULL) { - cond->mutex = mutex; - assert(TAILQ_EMPTY(&cond->waiters)); - } else if (cond->mutex != mutex) { - assert(cond->mutex == mutex); - _spinunlock(&cond->lock); - LEAVE_CANCEL_POINT_INNER(tib, 1); + if (abstime == NULL || abstime->tv_nsec < 0 || + abstime->tv_nsec >= 1000000000) return (EINVAL); - } else - assert(! TAILQ_EMPTY(&cond->waiters)); - - /* snag the count in case this is a recursive mutex */ - mutex_count = mutex->count; - /* transfer from the mutex queue to the condvar queue */ - _spinlock(&mutex->lock); - self->blocking_cond = cond; - TAILQ_INSERT_TAIL(&cond->waiters, self, waiting); - _spinunlock(&cond->lock); - - /* wake the next guy blocked on the mutex */ - mutex->count = 0; - mutex->owner = next = TAILQ_FIRST(&mutex->lockers); - if (next != NULL) { - TAILQ_REMOVE(&mutex->lockers, next, waiting); - __thrwakeup(next, 1); - } - - /* wait until we're the owner of the mutex again */ - while (mutex->owner != self) { - error = __thrsleep(self, 0, NULL, &mutex->lock, - &self->delayed_cancel); - - /* - * If we took a normal signal (not from - * cancellation) then we should just go back to - * sleep without changing state (timeouts, etc). - */ - if ((error == EINTR || error == ECANCELED) && - (tib->tib_canceled == 0 || - (tib->tib_cantcancel & CANCEL_DISABLED))) { - _spinlock(&mutex->lock); - continue; - } - - /* - * The remaining reasons for waking up (normal - * wakeup and cancellation) all mean that we won't - * be staying in the condvar queue and we'll no - * longer be cancelable. - */ - LEAVE_CANCEL_POINT_INNER(tib, 0); - - /* - * If we're no longer in the condvar's queue then - * we're just waiting for mutex ownership. Need - * cond->lock here to prevent race with cond_signal(). - */ - _spinlock(&cond->lock); - if (self->blocking_cond == NULL) { - _spinunlock(&cond->lock); - _spinlock(&mutex->lock); - continue; - } - assert(self->blocking_cond == cond); - - /* if canceled, make note of that */ - if (error == EINTR) - canceled = 1; - - /* transfer between the queues */ - TAILQ_REMOVE(&cond->waiters, self, waiting); - assert(mutex == cond->mutex); - if (TAILQ_EMPTY(&cond->waiters)) - cond->mutex = NULL; - self->blocking_cond = NULL; - _spinunlock(&cond->lock); - _spinlock(&mutex->lock); - - /* mutex unlocked right now? */ - if (mutex->owner == NULL && - TAILQ_EMPTY(&mutex->lockers)) { - assert(mutex->count == 0); - mutex->owner = self; - break; - } - TAILQ_INSERT_TAIL(&mutex->lockers, self, waiting); - } - - /* restore the mutex's count */ - mutex->count = mutex_count; - _spinunlock(&mutex->lock); - - LEAVE_CANCEL_POINT_INNER(tib, canceled); - - return (0); + return (_rthread_cond_timedwait(condp, mutexp, abstime)); } +int +pthread_cond_wait(pthread_cond_t *condp, pthread_mutex_t *mutexp) +{ + return (_rthread_cond_timedwait(condp, mutexp, NULL)); +} int pthread_cond_signal(pthread_cond_t *condp) { pthread_cond_t cond; struct pthread_mutex *mutex; - pthread_t thread; - int wakeup; + struct pthread_waiter *nwaiter; + pthread_t owner; /* uninitialized? Then there's obviously no one waiting! */ if (!*condp) @@ -591,17 +531,18 @@ pthread_cond_signal(pthread_cond_t *cond cond = *condp; _rthread_debug(5, "%p: cond_signal %p,%p\n", (void *)pthread_self(), (void *)cond, (void *)cond->mutex); + _spinlock(&cond->lock); - thread = TAILQ_FIRST(&cond->waiters); - if (thread == NULL) { + nwaiter = TAILQ_FIRST(&cond->waiters); + if (nwaiter == NULL) { assert(cond->mutex == NULL); _spinunlock(&cond->lock); return (0); } - assert(thread->blocking_cond == cond); - TAILQ_REMOVE(&cond->waiters, thread, waiting); - thread->blocking_cond = NULL; + assert(nwaiter->cv == cond); + nwaiter->cv = NULL; + TAILQ_REMOVE(&cond->waiters, nwaiter, entry); mutex = cond->mutex; assert(mutex != NULL); @@ -612,14 +553,19 @@ pthread_cond_signal(pthread_cond_t *cond _spinlock(&mutex->lock); _spinunlock(&cond->lock); - wakeup = mutex->owner == NULL && TAILQ_EMPTY(&mutex->lockers); - if (wakeup) - mutex->owner = thread; - else - TAILQ_INSERT_TAIL(&mutex->lockers, thread, waiting); + owner = mutex->owner; + if (owner == NULL) { + mutex->owner = nwaiter->owner; + /* mutex->count will be fixed by cond wait tail */ + nwaiter->wait = 0; + } else { + assert(owner != nwaiter->owner); + TAILQ_INSERT_TAIL(&mutex->waiters, nwaiter, entry); + } _spinunlock(&mutex->lock); - if (wakeup) - __thrwakeup(thread, 1); + + if (owner == NULL) + _wake(&nwaiter->wait, 1); return (0); } @@ -629,9 +575,9 @@ pthread_cond_broadcast(pthread_cond_t *c { pthread_cond_t cond; struct pthread_mutex *mutex; - pthread_t thread; - pthread_t p; - int wakeup; + struct pthread_waiter *nwaiter, *nnwaiter; + struct pthread_waiter **lwaiterp; + pthread_t owner; /* uninitialized? Then there's obviously no one waiting! */ if (!*condp) @@ -640,51 +586,64 @@ pthread_cond_broadcast(pthread_cond_t *c cond = *condp; _rthread_debug(5, "%p: cond_broadcast %p,%p\n", (void *)pthread_self(), (void *)cond, (void *)cond->mutex); + _spinlock(&cond->lock); - thread = TAILQ_FIRST(&cond->waiters); - if (thread == NULL) { + nwaiter = TAILQ_FIRST(&cond->waiters); + if (nwaiter == NULL) { assert(cond->mutex == NULL); _spinunlock(&cond->lock); return (0); } + lwaiterp = cond->waiters.tqh_last; mutex = cond->mutex; assert(mutex != NULL); + cond->mutex = NULL; + TAILQ_INIT(&cond->waiters); + /* walk the list, clearing the "blocked on condvar" pointer */ - p = thread; - do - p->blocking_cond = NULL; - while ((p = TAILQ_NEXT(p, waiting)) != NULL); + nnwaiter = nwaiter; + do { + assert(nnwaiter->cv == cond); + nnwaiter->cv = NULL; + + nnwaiter = TAILQ_NEXT(nnwaiter, entry); + } while (nnwaiter != NULL); + + _spinlock(&mutex->lock); + _spinunlock(&cond->lock); + + /* if the mutex is unowned, we can wake up the first waiter now */ + owner = mutex->owner; + if (owner == NULL) { + nnwaiter = TAILQ_NEXT(nwaiter, entry); + + mutex->owner = nwaiter->owner; + /* mutex->count will be fixed by cond wait tail */ + nwaiter->wait = 0; + } else { + /* move the whole list to the mutex waiters */ + nnwaiter = nwaiter; + } /* * We want to transfer all the threads from the condvar's list * to the mutex's list. The TAILQ_* macros don't let us do that * efficiently, so this is direct list surgery. Pay attention! */ + if (nnwaiter != NULL) { + /* 1) attach the first thread to the end of the mutex's list */ + nnwaiter->entry.tqe_prev = mutex->waiters.tqh_last; + *(mutex->waiters.tqh_last) = nnwaiter; - /* 1) attach the first thread to the end of the mutex's list */ - _spinlock(&mutex->lock); - wakeup = mutex->owner == NULL && TAILQ_EMPTY(&mutex->lockers); - thread->waiting.tqe_prev = mutex->lockers.tqh_last; - *(mutex->lockers.tqh_last) = thread; - - /* 2) fix up the end pointer for the mutex's list */ - mutex->lockers.tqh_last = cond->waiters.tqh_last; - - if (wakeup) { - TAILQ_REMOVE(&mutex->lockers, thread, waiting); - mutex->owner = thread; - _spinunlock(&mutex->lock); - __thrwakeup(thread, 1); - } else - _spinunlock(&mutex->lock); + /* 2) fix up the end pointer for the mutex's list */ + mutex->waiters.tqh_last = lwaiterp; + } + _spinunlock(&mutex->lock); - /* 3) reset the condvar's list and mutex pointer */ - TAILQ_INIT(&cond->waiters); - assert(cond->mutex != NULL); - cond->mutex = NULL; - _spinunlock(&cond->lock); + if (owner == NULL) + _wake(&nwaiter->wait, 1); return (0); }