currently {t,m,rw}sleep_nsec and it's siblings are wrappers around {t,m,rw}sleep. the wrappers convert nanoseconds to ticks before calling the real sleep function. this swaps that arrangement around so {t,m,rw}sleep_nsec are the real functions and {t,m,rw}sleep are the wrappers. to do this, sleep_finish is tweaked to also take nsecs, which means it calls timeout_add_nsec to schedule when endtsleep should fire. the reason i want this is i have a plan to move timeouts from using ticks for deadlines to using nanoseconds instead. since basically every syscall that specifies time works out how long they want to sleep in nanoseconds, this diff here would allow that value to pass the whole way through to the timeout subsystem intact. without it, we'd be translating to ticks and then back to nanoseconds, and suffering loss along the way. i also think it makes the code touched by this diff a bit simpler too. even if we don't go ahead with my plan for timeouts, at least it centralises the translation of nsecs to ticks in one place (src/sys/kern/kern_timeout.c). Index: dev/dt/dt_dev.c =================================================================== RCS file: /cvs/src/sys/dev/dt/dt_dev.c,v diff -u -p -r1.42 dt_dev.c --- dev/dt/dt_dev.c 4 Dec 2024 09:37:33 -0000 1.42 +++ dev/dt/dt_dev.c 19 May 2025 00:23:15 -0000 @@ -252,7 +252,7 @@ dtread(dev_t dev, struct uio *uio, int f while (!atomic_load_int(&sc->ds_evtcnt)) { sleep_setup(sc, PWAIT | PCATCH, "dtread"); - error = sleep_finish(0, !atomic_load_int(&sc->ds_evtcnt)); + error = sleep_finish(INFSLP, !atomic_load_int(&sc->ds_evtcnt)); if (error == EINTR || error == ERESTART) break; } Index: dev/pci/if_myx.c =================================================================== RCS file: /cvs/src/sys/dev/pci/if_myx.c,v diff -u -p -r1.120 if_myx.c --- dev/pci/if_myx.c 24 May 2024 06:02:56 -0000 1.120 +++ dev/pci/if_myx.c 19 May 2025 00:23:15 -0000 @@ -1395,7 +1395,7 @@ myx_down(struct myx_softc *sc) while (sc->sc_state != MYX_S_OFF) { sleep_setup(sts, PWAIT, "myxdown"); membar_consumer(); - sleep_finish(0, sc->sc_state != MYX_S_OFF); + sleep_finish(INFSLP, sc->sc_state != MYX_S_OFF); } s = splnet(); Index: kern/kern_exit.c =================================================================== RCS file: /cvs/src/sys/kern/kern_exit.c,v diff -u -p -r1.246 kern_exit.c --- kern/kern_exit.c 16 May 2025 13:40:30 -0000 1.246 +++ kern/kern_exit.c 19 May 2025 00:23:19 -0000 @@ -651,7 +651,7 @@ loop: return (0); } sleep_setup(q->p_p, PWAIT | PCATCH, "wait"); - if ((error = sleep_finish(0, + if ((error = sleep_finish(INFSLP, !ISSET(atomic_load_int(&q->p_p->ps_flags), PS_WAITEVENT))) != 0) return (error); goto loop; Index: kern/kern_rwlock.c =================================================================== RCS file: /cvs/src/sys/kern/kern_rwlock.c,v diff -u -p -r1.56 kern_rwlock.c --- kern/kern_rwlock.c 18 May 2025 00:13:57 -0000 1.56 +++ kern/kern_rwlock.c 19 May 2025 00:23:19 -0000 @@ -27,10 +27,9 @@ #include #ifdef RWDIAG -#include /* for hz */ -#define RW_SLEEP_TMO 10 * hz +#define RW_SLEEP_TMO 10000000000ULL /* 10 seconds */ #else -#define RW_SLEEP_TMO 0 +#define RW_SLEEP_TMO INFSLP #endif /* Index: kern/kern_sched.c =================================================================== RCS file: /cvs/src/sys/kern/kern_sched.c,v diff -u -p -r1.105 kern_sched.c --- kern/kern_sched.c 16 May 2025 13:40:30 -0000 1.105 +++ kern/kern_sched.c 19 May 2025 00:23:19 -0000 @@ -690,7 +689,7 @@ sched_stop_secondary_cpus(void) continue; while ((spc->spc_schedflags & SPCF_HALTED) == 0) { sleep_setup(spc, PZERO, "schedstate"); - sleep_finish(0, + sleep_finish(INFSLP, (spc->spc_schedflags & SPCF_HALTED) == 0); } } Index: kern/kern_synch.c =================================================================== RCS file: /cvs/src/sys/kern/kern_synch.c,v diff -u -p -r1.223 kern_synch.c --- kern/kern_synch.c 1 May 2025 06:58:21 -0000 1.223 +++ kern/kern_synch.c 19 May 2025 00:23:19 -0000 @@ -111,17 +111,18 @@ extern int safepri; * call should be interrupted by the signal (return EINTR). */ int -tsleep(const volatile void *ident, int priority, const char *wmesg, int timo) +tsleep_nsec(const volatile void *ident, int priority, const char *wmesg, + uint64_t nsecs) { #ifdef MULTIPROCESSOR int hold_count; #endif KASSERT((priority & ~(PRIMASK | PCATCH)) == 0); - KASSERT(ident != &nowake || ISSET(priority, PCATCH) || timo != 0); + KASSERT(ident != &nowake || ISSET(priority, PCATCH) || nsecs != INFSLP); #ifdef MULTIPROCESSOR - KASSERT(ident == &nowake || timo || _kernel_lock_held()); + KASSERT(ident == &nowake || nsecs != INFSLP || _kernel_lock_held()); #endif #ifdef DDB @@ -149,50 +150,21 @@ tsleep(const volatile void *ident, int p } sleep_setup(ident, priority, wmesg); - return sleep_finish(timo, 1); + return sleep_finish(nsecs, 1); } int -tsleep_nsec(const volatile void *ident, int priority, const char *wmesg, - uint64_t nsecs) +tsleep(const volatile void *ident, int priority, const char *wmesg, + int timo) { - uint64_t to_ticks; + uint64_t nsecs = INFSLP; - if (nsecs == INFSLP) - return tsleep(ident, priority, wmesg, 0); -#ifdef DIAGNOSTIC - if (nsecs == 0) { - log(LOG_WARNING, - "%s: %s[%d]: %s: trying to sleep zero nanoseconds\n", - __func__, curproc->p_p->ps_comm, curproc->p_p->ps_pid, - wmesg); - } -#endif - /* - * We want to sleep at least nsecs nanoseconds worth of ticks. - * - * - Clamp nsecs to prevent arithmetic overflow. - * - * - Round nsecs up to account for any nanoseconds that do not - * divide evenly into tick_nsec, otherwise we'll lose them to - * integer division in the next step. We add (tick_nsec - 1) - * to keep from introducing a spurious tick if there are no - * such nanoseconds, i.e. nsecs % tick_nsec == 0. - * - * - Divide the rounded value to a count of ticks. We divide - * by (tick_nsec + 1) to discard the extra tick introduced if, - * before rounding, nsecs % tick_nsec == 1. - * - * - Finally, add a tick to the result. We need to wait out - * the current tick before we can begin counting our interval, - * as we do not know how much time has elapsed since the - * current tick began. - */ - nsecs = MIN(nsecs, UINT64_MAX - tick_nsec); - to_ticks = (nsecs + tick_nsec - 1) / (tick_nsec + 1) + 1; - if (to_ticks > INT_MAX) - to_ticks = INT_MAX; - return tsleep(ident, priority, wmesg, (int)to_ticks); + if (timo < 0) + panic("%s: negative timo %d", __func__, timo); + if (timo > 0) + nsecs = timo * tick_nsec; + + return tsleep_nsec(ident, priority, wmesg, nsecs); } /* @@ -200,8 +172,8 @@ tsleep_nsec(const volatile void *ident, * entered the sleep queue we drop the mutex. After sleeping we re-lock. */ int -msleep(const volatile void *ident, struct mutex *mtx, int priority, - const char *wmesg, int timo) +msleep_nsec(const volatile void *ident, struct mutex *mtx, int priority, + const char *wmesg, uint64_t nsecs) { int error, spl; #ifdef MULTIPROCESSOR @@ -209,7 +181,7 @@ msleep(const volatile void *ident, struc #endif KASSERT((priority & ~(PRIMASK | PCATCH | PNORELOCK)) == 0); - KASSERT(ident != &nowake || ISSET(priority, PCATCH) || timo != 0); + KASSERT(ident != &nowake || ISSET(priority, PCATCH) || nsecs != INFSLP); KASSERT(mtx != NULL); #ifdef DDB @@ -244,7 +216,7 @@ msleep(const volatile void *ident, struc mtx_leave(mtx); /* signal may stop the process, release mutex before that */ - error = sleep_finish(timo, 1); + error = sleep_finish(nsecs, 1); if ((priority & PNORELOCK) == 0) mtx_enter(mtx); @@ -253,26 +225,17 @@ msleep(const volatile void *ident, struc } int -msleep_nsec(const volatile void *ident, struct mutex *mtx, int priority, - const char *wmesg, uint64_t nsecs) +msleep(const volatile void *ident, struct mutex *mtx, int priority, + const char *wmesg, int timo) { - uint64_t to_ticks; + uint64_t nsecs = INFSLP; - if (nsecs == INFSLP) - return msleep(ident, mtx, priority, wmesg, 0); -#ifdef DIAGNOSTIC - if (nsecs == 0) { - log(LOG_WARNING, - "%s: %s[%d]: %s: trying to sleep zero nanoseconds\n", - __func__, curproc->p_p->ps_comm, curproc->p_p->ps_pid, - wmesg); - } -#endif - nsecs = MIN(nsecs, UINT64_MAX - tick_nsec); - to_ticks = (nsecs + tick_nsec - 1) / (tick_nsec + 1) + 1; - if (to_ticks > INT_MAX) - to_ticks = INT_MAX; - return msleep(ident, mtx, priority, wmesg, (int)to_ticks); + if (timo < 0) + panic("%s: negative timo %d", __func__, timo); + if (timo > 0) + nsecs = timo * tick_nsec; + + return msleep_nsec(ident, mtx, priority, wmesg, nsecs); } /* @@ -280,13 +243,13 @@ msleep_nsec(const volatile void *ident, * entered the sleep queue we drop the it. After sleeping we re-lock. */ int -rwsleep(const volatile void *ident, struct rwlock *rwl, int priority, - const char *wmesg, int timo) +rwsleep_nsec(const volatile void *ident, struct rwlock *rwl, int priority, + const char *wmesg, uint64_t nsecs) { int error, status; KASSERT((priority & ~(PRIMASK | PCATCH | PNORELOCK)) == 0); - KASSERT(ident != &nowake || ISSET(priority, PCATCH) || timo != 0); + KASSERT(ident != &nowake || ISSET(priority, PCATCH) || nsecs != INFSLP); KASSERT(ident != rwl); rw_assert_anylock(rwl); status = rw_status(rwl); @@ -295,7 +258,7 @@ rwsleep(const volatile void *ident, stru rw_exit(rwl); /* signal may stop the process, release rwlock before that */ - error = sleep_finish(timo, 1); + error = sleep_finish(nsecs, 1); if ((priority & PNORELOCK) == 0) rw_enter(rwl, status); @@ -304,26 +267,17 @@ rwsleep(const volatile void *ident, stru } int -rwsleep_nsec(const volatile void *ident, struct rwlock *rwl, int priority, - const char *wmesg, uint64_t nsecs) +rwsleep(const volatile void *ident, struct rwlock *rwl, int priority, + const char *wmesg, int timo) { - uint64_t to_ticks; + uint64_t nsecs = INFSLP; - if (nsecs == INFSLP) - return rwsleep(ident, rwl, priority, wmesg, 0); -#ifdef DIAGNOSTIC - if (nsecs == 0) { - log(LOG_WARNING, - "%s: %s[%d]: %s: trying to sleep zero nanoseconds\n", - __func__, curproc->p_p->ps_comm, curproc->p_p->ps_pid, - wmesg); - } -#endif - nsecs = MIN(nsecs, UINT64_MAX - tick_nsec); - to_ticks = (nsecs + tick_nsec - 1) / (tick_nsec + 1) + 1; - if (to_ticks > INT_MAX) - to_ticks = INT_MAX; - return rwsleep(ident, rwl, priority, wmesg, (int)to_ticks); + if (timo < 0) + panic("%s: negative timo %d", __func__, timo); + if (timo > 0) + nsecs = timo * tick_nsec; + + return rwsleep_nsec(ident, rwl, priority, wmesg, nsecs); } void @@ -361,18 +315,25 @@ sleep_setup(const volatile void *ident, } int -sleep_finish(int timo, int do_sleep) +sleep_finish(uint64_t nsecs, int do_sleep) { struct proc *p = curproc; int catch, error = 0, error1 = 0; - catch = p->p_flag & P_SINTR; +#ifdef DIAGNOSTIC + if (nsecs == 0) { + log(LOG_WARNING, + "%s: %s[%d]: %s: trying to sleep zero nanoseconds\n", + __func__, p->p_p->ps_comm, p->p_p->ps_pid, p->p_wmesg); + } +#endif - if (timo != 0) { + if (nsecs != INFSLP) { KASSERT(!ISSET(p->p_flag, P_TIMEOUT|P_TIMEOUTRAN)); - timeout_add(&p->p_sleep_to, timo); + timeout_add_nsec(&p->p_sleep_to, nsecs); } + catch = p->p_flag & P_SINTR; if (catch != 0) { if ((error = sleep_signal_check(p, 0)) != 0) { catch = 0; @@ -445,7 +406,7 @@ sleep_finish(int timo, int do_sleep) * to sleep to wait for endtsleep to run, we'd also have to * take the sched lock, so we'd be spinning against it anyway. */ - if (timo != 0 && !timeout_del(&p->p_sleep_to)) { + if (nsecs != INFSLP && !timeout_del(&p->p_sleep_to)) { int flag; /* Wait for endtsleep timeout to finish running */ @@ -753,14 +714,13 @@ thrsleep(struct proc *p, struct sys___th void *lock = SCARG(uap, lock); const uint32_t *abortp = SCARG(uap, abort); clockid_t clock_id = SCARG(uap, clock_id); - uint64_t to_ticks = 0; + uint64_t nsecs = INFSLP; int error = 0; if (ident == 0) return (EINVAL); if (tsp != NULL) { struct timespec now; - uint64_t nsecs; if ((error = clock_gettime(p, clock_id, &now))) return (error); @@ -777,10 +737,7 @@ thrsleep(struct proc *p, struct sys___th } timespecsub(tsp, &now, tsp); - nsecs = MIN(TIMESPEC_TO_NSEC(tsp), MAXTSLP); - to_ticks = (nsecs + tick_nsec - 1) / (tick_nsec + 1) + 1; - if (to_ticks > INT_MAX) - to_ticks = INT_MAX; + nsecs = MAX(1, MIN(TIMESPEC_TO_NSEC(tsp), MAXTSLP)); } tsb = (ident == -1) ? &tsb_shared : thrsleep_bucket(ident); @@ -810,7 +767,7 @@ thrsleep(struct proc *p, struct sys___th } sleep_setup(&entry, PWAIT|PCATCH, "thrsleep"); - error = sleep_finish(to_ticks, entry.tslp_p != NULL); + error = sleep_finish(nsecs, entry.tslp_p != NULL); if (error != 0 || entry.tslp_p != NULL) { mtx_enter(&tsb->tsb_lock); if (entry.tslp_p != NULL) @@ -997,7 +954,7 @@ refcnt_finalize(struct refcnt *r, const while (refs) { sleep_setup(r, PWAIT, wmesg); refs = atomic_load_int(&r->r_refs); - sleep_finish(0, refs); + sleep_finish(INFSLP, refs); } TRACEINDEX(refcnt, r->r_traceidx, r, refs, 0); /* Order subsequent loads and stores after refs == 0 load. */ @@ -1047,6 +1004,6 @@ cond_wait(struct cond *c, const char *wm while (wait) { sleep_setup(c, PWAIT, wmesg); wait = atomic_load_int(&c->c_wait); - sleep_finish(0, wait); + sleep_finish(INFSLP, wait); } } Index: kern/kern_timeout.c =================================================================== RCS file: /cvs/src/sys/kern/kern_timeout.c,v diff -u -p -r1.103 kern_timeout.c --- kern/kern_timeout.c 2 May 2025 00:51:09 -0000 1.103 +++ kern/kern_timeout.c 19 May 2025 00:23:19 -0000 @@ -823,7 +823,7 @@ softclock_thread_run(struct timeout_ctx * at the same time. */ sleep_setup(todo, PSWP, "tmoslp"); - sleep_finish(0, CIRCQ_EMPTY(tctx->tctx_todo)); + sleep_finish(INFSLP, CIRCQ_EMPTY(tctx->tctx_todo)); mtx_enter(&timeout_mutex); tostat.tos_thread_wakeups++; Index: kern/subr_log.c =================================================================== RCS file: /cvs/src/sys/kern/subr_log.c,v diff -u -p -r1.80 subr_log.c --- kern/subr_log.c 30 Dec 2024 02:46:00 -0000 1.80 +++ kern/subr_log.c 19 May 2025 00:23:19 -0000 @@ -261,7 +261,7 @@ logread(dev_t dev, struct uio *uio, int * to keep log_mtx as a leaf lock. */ sleep_setup(mbp, LOG_RDPRI | PCATCH, "klog"); - error = sleep_finish(0, logsoftc.sc_state & LOG_RDWAIT); + error = sleep_finish(INFSLP, logsoftc.sc_state & LOG_RDWAIT); mtx_enter(&log_mtx); if (error) goto out; Index: kern/sys_futex.c =================================================================== RCS file: /cvs/src/sys/kern/sys_futex.c,v diff -u -p -r1.23 sys_futex.c --- kern/sys_futex.c 7 May 2025 00:39:09 -0000 1.23 +++ kern/sys_futex.c 19 May 2025 00:23:19 -0000 @@ -251,13 +251,12 @@ futex_wait(struct proc *p, uint32_t *uad { struct futex f; struct futex_slpque *fsq; - uint64_t to_ticks = 0; + uint64_t nsecs = INFSLP; uint32_t cval; int error; if (timeout != NULL) { struct timespec ts; - uint64_t nsecs; if ((error = copyin(timeout, &ts, sizeof(ts)))) return error; @@ -268,10 +267,9 @@ futex_wait(struct proc *p, uint32_t *uad if (ts.tv_sec < 0 || !timespecisvalid(&ts)) return EINVAL; - nsecs = MAX(1, MIN(TIMESPEC_TO_NSEC(&ts), MAXTSLP)); - to_ticks = (nsecs + tick_nsec - 1) / (tick_nsec + 1) + 1; - if (to_ticks > INT_MAX) - to_ticks = INT_MAX; + nsecs = MIN(TIMESPEC_TO_NSEC(&ts), MAXTSLP); + if (nsecs == 0) + return ETIMEDOUT; } futex_addrs(p, &f, uaddr, flags); @@ -301,7 +299,7 @@ futex_wait(struct proc *p, uint32_t *uad } sleep_setup(&f, PWAIT|PCATCH, "fsleep"); - error = sleep_finish(to_ticks, f.ft_proc != NULL); + error = sleep_finish(nsecs, f.ft_proc != NULL); /* Remove ourself if we haven't been awaken. */ if (error != 0 || f.ft_proc != NULL) { if (futex_unwait(fsq, &f) == 0) Index: sys/systm.h =================================================================== RCS file: /cvs/src/sys/sys/systm.h,v diff -u -p -r1.171 systm.h --- sys/systm.h 28 May 2024 12:50:23 -0000 1.171 +++ sys/systm.h 19 May 2025 00:23:20 -0000 @@ -256,7 +256,7 @@ void start_periodic_resettodr(void); void stop_periodic_resettodr(void); void sleep_setup(const volatile void *, int, const char *); -int sleep_finish(int, int); +int sleep_finish(uint64_t, int); void sleep_queue_init(void); struct cond;