Index: kern_synch.c =================================================================== RCS file: /cvs/src/sys/kern/kern_synch.c,v diff -u -p -r1.213 kern_synch.c --- kern_synch.c 11 Nov 2024 13:28:29 -0000 1.213 +++ kern_synch.c 25 Nov 2024 01:36:04 -0000 @@ -63,8 +63,6 @@ #endif int sleep_signal_check(struct proc *, int); -int thrsleep(struct proc *, struct sys___thrsleep_args *); -int thrsleep_unlock(void *); extern void proc_stop(struct proc *p, int); @@ -543,25 +541,25 @@ unsleep(struct proc *p) p->p_wmesg = NULL; TRACEPOINT(sched, unsleep, p->p_tid + THREAD_PID_OFFSET, p->p_p->ps_pid); + LLTRACE(lltrace_runnable, p); } } /* * Make a number of processes sleeping on the specified identifier runnable. */ -void +int wakeup_n(const volatile void *ident, int n) { - struct slpque *qp, wakeq; - struct proc *p; - struct proc *pnext; + struct slpque wakeq = TAILQ_HEAD_INITIALIZER(wakeq); + struct slpque *qp; + struct proc *p, *np; + int c = 0; - TAILQ_INIT(&wakeq); + qp = &slpque[LOOKUP(ident)]; SCHED_LOCK(); - qp = &slpque[LOOKUP(ident)]; - for (p = TAILQ_FIRST(qp); p != NULL && n != 0; p = pnext) { - pnext = TAILQ_NEXT(p, p_runq); + TAILQ_FOREACH_SAFE(p, qp, p_runq, np) { #ifdef DIAGNOSTIC if (p->p_stat != SSLEEP && p->p_stat != SSTOP) panic("thread %d p_stat is %d", p->p_tid, p->p_stat); @@ -572,26 +570,31 @@ wakeup_n(const volatile void *ident, int p->p_wchan = NULL; p->p_wmesg = NULL; TAILQ_INSERT_TAIL(&wakeq, p, p_runq); - --n; + + if (++c >= n) + break; } } while ((p = TAILQ_FIRST(&wakeq))) { TAILQ_REMOVE(&wakeq, p, p_runq); TRACEPOINT(sched, unsleep, p->p_tid + THREAD_PID_OFFSET, p->p_p->ps_pid); + LLTRACE(lltrace_runnable, p); if (p->p_stat == SSLEEP) setrunnable(p); } SCHED_UNLOCK(); + + return (c); } /* * Make all processes sleeping on the specified identifier runnable. */ -void +int wakeup(const volatile void *chan) { - wakeup_n(chan, -1); + return wakeup_n(chan, INT_MAX); } int @@ -620,28 +623,66 @@ sys_sched_yield(struct proc *p, void *v, return (0); } -int -thrsleep_unlock(void *lock) +static inline int +thrsleep_unlock(_atomic_lock_t *atomiclock) { static _atomic_lock_t unlocked = _ATOMIC_LOCK_UNLOCKED; - _atomic_lock_t *atomiclock = lock; - if (!lock) + if (atomiclock == NULL) return 0; return copyout(&unlocked, atomiclock, sizeof(unlocked)); } struct tslpentry { - TAILQ_ENTRY(tslpentry) tslp_link; - long tslp_ident; + TAILQ_ENTRY(tslpentry) tslp_link; + struct process *tslp_ps; + long tslp_ident; + struct proc *volatile tslp_p; }; +struct tslp_bucket { + struct tslpqueue tsb_list; + struct mutex tsb_lock; +} __aligned(64); + /* thrsleep queue shared between processes */ -static struct tslpqueue thrsleep_queue = TAILQ_HEAD_INITIALIZER(thrsleep_queue); -static struct rwlock thrsleep_lock = RWLOCK_INITIALIZER("thrsleeplk"); +static struct tslp_bucket tsb_shared; -int +#define TSLP_BUCKET_BITS 6 +#define TSLP_BUCKET_SIZE (1UL << TSLP_BUCKET_BITS) +#define TSLP_BUCKET_MASK (TSLP_BUCKET_SIZE - 1) + +static struct tslp_bucket tsb_buckets[TSLP_BUCKET_SIZE]; + +void +tslp_init(void) +{ + struct tslp_bucket *tsb; + size_t i; + + TAILQ_INIT(&tsb_shared.tsb_list); + mtx_init(&tsb_shared.tsb_lock, IPL_MPFLOOR); + + for (i = 0; i < nitems(tsb_buckets); i++) { + tsb = &tsb_buckets[i]; + + TAILQ_INIT(&tsb->tsb_list); + mtx_init(&tsb->tsb_lock, IPL_MPFLOOR); + } +} + +static struct tslp_bucket * +thrsleep_bucket(long ident) +{ + ident >>= 3; + ident ^= ident >> TSLP_BUCKET_BITS; + ident &= TSLP_BUCKET_MASK; + + return &tsb_buckets[ident]; +} + +static int thrsleep(struct proc *p, struct sys___thrsleep_args *v) { struct sys___thrsleep_args /* { @@ -653,18 +694,19 @@ thrsleep(struct proc *p, struct sys___th } */ *uap = v; long ident = (long)SCARG(uap, ident); struct tslpentry entry; - struct tslpqueue *queue; - struct rwlock *qlock; + struct tslp_bucket *tsb; struct timespec *tsp = (struct timespec *)SCARG(uap, tp); void *lock = SCARG(uap, lock); - uint64_t nsecs = INFSLP; - int abort = 0, error; + const uint32_t *abortp = SCARG(uap, abort); clockid_t clock_id = SCARG(uap, clock_id); + uint64_t to_ticks = 0; + int error = 0; if (ident == 0) return (EINVAL); if (tsp != NULL) { struct timespec now; + uint64_t nsecs; if ((error = clock_gettime(p, clock_id, &now))) return (error); @@ -682,49 +724,62 @@ thrsleep(struct proc *p, struct sys___th timespecsub(tsp, &now, tsp); nsecs = MIN(TIMESPEC_TO_NSEC(tsp), MAXTSLP); + to_ticks = (nsecs + tick_nsec - 1) / (tick_nsec + 1) + 1; + if (to_ticks > INT_MAX) + to_ticks = INT_MAX; } - if (ident == -1) { - queue = &thrsleep_queue; - qlock = &thrsleep_lock; - } else { - queue = &p->p_p->ps_tslpqueue; - qlock = &p->p_p->ps_lock; - } + tsb = (ident == -1) ? &tsb_shared : thrsleep_bucket(ident); /* Interlock with wakeup. */ + entry.tslp_ps = p->p_p; entry.tslp_ident = ident; - rw_enter_write(qlock); - TAILQ_INSERT_TAIL(queue, &entry, tslp_link); - rw_exit_write(qlock); + entry.tslp_p = p; + + mtx_enter(&tsb->tsb_lock); + TAILQ_INSERT_TAIL(&tsb->tsb_list, &entry, tslp_link); + mtx_leave(&tsb->tsb_lock); error = thrsleep_unlock(lock); + if (error != 0) + goto leave; - if (error == 0 && SCARG(uap, abort) != NULL) - error = copyin(SCARG(uap, abort), &abort, sizeof(abort)); + if (abortp != NULL) { + uint32_t abort; + error = copyin32(abortp, &abort); + if (error != 0) + goto leave; + if (abort != 0) { + error = EINTR; + goto leave; + } + } - rw_enter_write(qlock); - if (error != 0) - goto out; - if (abort != 0) { - error = EINTR; - goto out; - } - if (entry.tslp_ident != 0) { - error = rwsleep_nsec(&entry, qlock, PWAIT|PCATCH, "thrsleep", - nsecs); - } - -out: - if (entry.tslp_ident != 0) - TAILQ_REMOVE(queue, &entry, tslp_link); - rw_exit_write(qlock); + sleep_setup(&entry, PWAIT|PCATCH, "thrsleep"); + error = sleep_finish(to_ticks, entry.tslp_p != NULL); + if (error != 0 || entry.tslp_p != NULL) { + mtx_enter(&tsb->tsb_lock); + if (entry.tslp_p != NULL) + TAILQ_REMOVE(&tsb->tsb_list, &entry, tslp_link); + else + error = 0; + mtx_leave(&tsb->tsb_lock); - if (error == ERESTART) - error = ECANCELED; + if (error == ERESTART) + error = ECANCELED; + } return (error); +leave: + if (entry.tslp_p != NULL) { + mtx_enter(&tsb->tsb_lock); + if (entry.tslp_p != NULL) + TAILQ_REMOVE(&tsb->tsb_list, &entry, tslp_link); + mtx_leave(&tsb->tsb_lock); + } + + return (error); } int @@ -756,6 +811,21 @@ sys___thrsleep(struct proc *p, void *v, return 0; } +static void +tslp_wakeups(struct tslpqueue *tslpq) +{ + struct tslpentry *entry, *nentry; + struct proc *p; + + SCHED_LOCK(); + TAILQ_FOREACH_SAFE(entry, tslpq, tslp_link, nentry) { + p = entry->tslp_p; + entry->tslp_p = NULL; + wakeup_proc(p, 0); + } + SCHED_UNLOCK(); +} + int sys___thrwakeup(struct proc *p, void *v, register_t *retval) { @@ -763,50 +833,53 @@ sys___thrwakeup(struct proc *p, void *v, syscallarg(const volatile void *) ident; syscallarg(int) n; } */ *uap = v; - struct tslpentry *entry, *tmp; - struct tslpqueue *queue; - struct rwlock *qlock; + struct tslpentry *entry, *nentry; + struct tslp_bucket *tsb; long ident = (long)SCARG(uap, ident); int n = SCARG(uap, n); int found = 0; + struct tslpqueue wq = TAILQ_HEAD_INITIALIZER(wq); - if (ident == 0) + if (ident == 0) { *retval = EINVAL; - else { - if (ident == -1) { - queue = &thrsleep_queue; - qlock = &thrsleep_lock; - /* - * Wake up all waiters with ident -1. This is needed - * because ident -1 can be shared by multiple userspace - * lock state machines concurrently. The implementation - * has no way to direct the wakeup to a particular - * state machine. - */ - n = 0; - } else { - queue = &p->p_p->ps_tslpqueue; - qlock = &p->p_p->ps_lock; - } + return (0); + } - rw_enter_write(qlock); - TAILQ_FOREACH_SAFE(entry, queue, tslp_link, tmp) { - if (entry->tslp_ident == ident) { - TAILQ_REMOVE(queue, entry, tslp_link); - entry->tslp_ident = 0; - wakeup_one(entry); - if (++found == n) - break; - } - } - rw_exit_write(qlock); + if (ident == -1) { + /* + * Wake up all waiters with ident -1. This is needed + * because ident -1 can be shared by multiple userspace + * lock state machines concurrently. The implementation + * has no way to direct the wakeup to a particular + * state machine. + */ + mtx_enter(&tsb_shared.tsb_lock); + tslp_wakeups(&tsb_shared.tsb_list); + TAILQ_INIT(&tsb_shared.tsb_list); + mtx_leave(&tsb_shared.tsb_lock); - if (ident == -1) - *retval = 0; - else - *retval = found ? 0 : ESRCH; + *retval = 0; + return (0); } + tsb = thrsleep_bucket(ident); + + mtx_enter(&tsb->tsb_lock); + TAILQ_FOREACH_SAFE(entry, &tsb->tsb_list, tslp_link, nentry) { + if (entry->tslp_ident == ident && entry->tslp_ps == p->p_p) { + TAILQ_REMOVE(&tsb->tsb_list, entry, tslp_link); + TAILQ_INSERT_TAIL(&wq, entry, tslp_link); + + if (++found == n) + break; + } + } + + if (found) + tslp_wakeups(&wq); + mtx_leave(&tsb->tsb_lock); + + *retval = found ? 0 : ESRCH; return (0); } Index: init_main.c =================================================================== RCS file: /cvs/src/sys/kern/init_main.c,v diff -u -p -r1.326 init_main.c --- init_main.c 2 Apr 2024 08:39:16 -0000 1.326 +++ init_main.c 25 Nov 2024 01:36:04 -0000 @@ -140,6 +140,7 @@ void db_ctf_init(void); void prof_init(void); void init_exec(void); void futex_init(void); +void tslp_init(void); void taskq_init(void); void timeout_proc_init(void); void pool_gc_pages(void *); @@ -250,6 +251,7 @@ main(void *framep) * Initialize futexes. */ futex_init(); + tslp_init(); /* Create credentials. */ p->p_ucred = crget();