Index: conf/files =================================================================== RCS file: /cvs/src/sys/conf/files,v diff -u -p -r1.745 files --- conf/files 23 Apr 2025 21:40:22 -0000 1.745 +++ conf/files 13 May 2025 00:06:14 -0000 @@ -764,6 +764,7 @@ file kern/subr_log.c file kern/subr_percpu.c file kern/subr_poison.c diagnostic file kern/subr_pool.c +file kern/subr_heap.c file kern/subr_tree.c file kern/dma_alloc.c file kern/subr_prf.c Index: dev/dt/dt_dev.c =================================================================== RCS file: /cvs/src/sys/dev/dt/dt_dev.c,v diff -u -p -r1.42 dt_dev.c --- dev/dt/dt_dev.c 4 Dec 2024 09:37:33 -0000 1.42 +++ dev/dt/dt_dev.c 13 May 2025 00:06:14 -0000 @@ -252,7 +252,7 @@ dtread(dev_t dev, struct uio *uio, int f while (!atomic_load_int(&sc->ds_evtcnt)) { sleep_setup(sc, PWAIT | PCATCH, "dtread"); - error = sleep_finish(0, !atomic_load_int(&sc->ds_evtcnt)); + error = sleep_finish(INFSLP, !atomic_load_int(&sc->ds_evtcnt)); if (error == EINTR || error == ERESTART) break; } Index: dev/pci/if_myx.c =================================================================== RCS file: /cvs/src/sys/dev/pci/if_myx.c,v diff -u -p -r1.120 if_myx.c --- dev/pci/if_myx.c 24 May 2024 06:02:56 -0000 1.120 +++ dev/pci/if_myx.c 13 May 2025 00:06:14 -0000 @@ -1395,7 +1395,7 @@ myx_down(struct myx_softc *sc) while (sc->sc_state != MYX_S_OFF) { sleep_setup(sts, PWAIT, "myxdown"); membar_consumer(); - sleep_finish(0, sc->sc_state != MYX_S_OFF); + sleep_finish(INFSLP, sc->sc_state != MYX_S_OFF); } s = splnet(); Index: kern/kern_exit.c =================================================================== RCS file: /cvs/src/sys/kern/kern_exit.c,v diff -u -p -r1.245 kern_exit.c --- kern/kern_exit.c 2 May 2025 05:04:38 -0000 1.245 +++ kern/kern_exit.c 13 May 2025 00:06:18 -0000 @@ -651,7 +651,7 @@ loop: return (0); } sleep_setup(q->p_p, PWAIT | PCATCH, "wait"); - if ((error = sleep_finish(0, + if ((error = sleep_finish(INFSLP, !ISSET(atomic_load_int(&q->p_p->ps_flags), PS_WAITEVENT))) != 0) return (error); goto loop; Index: kern/kern_rwlock.c =================================================================== RCS file: /cvs/src/sys/kern/kern_rwlock.c,v diff -u -p -r1.55 kern_rwlock.c --- kern/kern_rwlock.c 29 Jan 2025 15:10:09 -0000 1.55 +++ kern/kern_rwlock.c 13 May 2025 00:06:18 -0000 @@ -27,10 +27,9 @@ #include #ifdef RWDIAG -#include /* for hz */ -#define RW_SLEEP_TMO 10 * hz +#define RW_SLEEP_TMO 10000000000ULL /* 10 seconds */ #else -#define RW_SLEEP_TMO 0 +#define RW_SLEEP_TMO INFSLP #endif /* Index: kern/kern_sched.c =================================================================== RCS file: /cvs/src/sys/kern/kern_sched.c,v diff -u -p -r1.104 kern_sched.c --- kern/kern_sched.c 10 Mar 2025 09:28:56 -0000 1.104 +++ kern/kern_sched.c 13 May 2025 00:06:18 -0000 @@ -692,7 +692,7 @@ sched_stop_secondary_cpus(void) continue; while ((spc->spc_schedflags & SPCF_HALTED) == 0) { sleep_setup(spc, PZERO, "schedstate"); - sleep_finish(0, + sleep_finish(INFSLP, (spc->spc_schedflags & SPCF_HALTED) == 0); } } Index: kern/kern_synch.c =================================================================== RCS file: /cvs/src/sys/kern/kern_synch.c,v diff -u -p -r1.223 kern_synch.c --- kern/kern_synch.c 1 May 2025 06:58:21 -0000 1.223 +++ kern/kern_synch.c 13 May 2025 00:06:18 -0000 @@ -111,17 +111,27 @@ extern int safepri; * call should be interrupted by the signal (return EINTR). */ int -tsleep(const volatile void *ident, int priority, const char *wmesg, int timo) +tsleep_nsec(const volatile void *ident, int priority, const char *wmesg, + uint64_t nsecs) { #ifdef MULTIPROCESSOR int hold_count; #endif +#ifdef DIAGNOSTIC + if (nsecs == 0) { + log(LOG_WARNING, + "%s: %s[%d]: %s: trying to sleep zero nanoseconds\n", + __func__, curproc->p_p->ps_comm, curproc->p_p->ps_pid, + wmesg); + } +#endif + KASSERT((priority & ~(PRIMASK | PCATCH)) == 0); - KASSERT(ident != &nowake || ISSET(priority, PCATCH) || timo != 0); + KASSERT(ident != &nowake || ISSET(priority, PCATCH) || nsecs != INFSLP); #ifdef MULTIPROCESSOR - KASSERT(ident == &nowake || timo || _kernel_lock_held()); + KASSERT(ident == &nowake || nsecs == INFSLP || _kernel_lock_held()); #endif #ifdef DDB @@ -149,50 +159,21 @@ tsleep(const volatile void *ident, int p } sleep_setup(ident, priority, wmesg); - return sleep_finish(timo, 1); + return sleep_finish(nsecs, 1); } int -tsleep_nsec(const volatile void *ident, int priority, const char *wmesg, - uint64_t nsecs) +tsleep(const volatile void *ident, int priority, const char *wmesg, + int timo) { - uint64_t to_ticks; + uint64_t nsecs = INFSLP; - if (nsecs == INFSLP) - return tsleep(ident, priority, wmesg, 0); -#ifdef DIAGNOSTIC - if (nsecs == 0) { - log(LOG_WARNING, - "%s: %s[%d]: %s: trying to sleep zero nanoseconds\n", - __func__, curproc->p_p->ps_comm, curproc->p_p->ps_pid, - wmesg); - } -#endif - /* - * We want to sleep at least nsecs nanoseconds worth of ticks. - * - * - Clamp nsecs to prevent arithmetic overflow. - * - * - Round nsecs up to account for any nanoseconds that do not - * divide evenly into tick_nsec, otherwise we'll lose them to - * integer division in the next step. We add (tick_nsec - 1) - * to keep from introducing a spurious tick if there are no - * such nanoseconds, i.e. nsecs % tick_nsec == 0. - * - * - Divide the rounded value to a count of ticks. We divide - * by (tick_nsec + 1) to discard the extra tick introduced if, - * before rounding, nsecs % tick_nsec == 1. - * - * - Finally, add a tick to the result. We need to wait out - * the current tick before we can begin counting our interval, - * as we do not know how much time has elapsed since the - * current tick began. - */ - nsecs = MIN(nsecs, UINT64_MAX - tick_nsec); - to_ticks = (nsecs + tick_nsec - 1) / (tick_nsec + 1) + 1; - if (to_ticks > INT_MAX) - to_ticks = INT_MAX; - return tsleep(ident, priority, wmesg, (int)to_ticks); + if (timo < 0) + panic("%s: negative timo %d", __func__, timo); + if (timo > 0) + nsecs = timo * tick_nsec; + + return tsleep_nsec(ident, priority, wmesg, nsecs); } /* @@ -200,8 +181,8 @@ tsleep_nsec(const volatile void *ident, * entered the sleep queue we drop the mutex. After sleeping we re-lock. */ int -msleep(const volatile void *ident, struct mutex *mtx, int priority, - const char *wmesg, int timo) +msleep_nsec(const volatile void *ident, struct mutex *mtx, int priority, + const char *wmesg, uint64_t nsecs) { int error, spl; #ifdef MULTIPROCESSOR @@ -209,7 +190,7 @@ msleep(const volatile void *ident, struc #endif KASSERT((priority & ~(PRIMASK | PCATCH | PNORELOCK)) == 0); - KASSERT(ident != &nowake || ISSET(priority, PCATCH) || timo != 0); + KASSERT(ident != &nowake || ISSET(priority, PCATCH) || nsecs != INFSLP); KASSERT(mtx != NULL); #ifdef DDB @@ -244,7 +225,7 @@ msleep(const volatile void *ident, struc mtx_leave(mtx); /* signal may stop the process, release mutex before that */ - error = sleep_finish(timo, 1); + error = sleep_finish(nsecs, 1); if ((priority & PNORELOCK) == 0) mtx_enter(mtx); @@ -253,26 +234,17 @@ msleep(const volatile void *ident, struc } int -msleep_nsec(const volatile void *ident, struct mutex *mtx, int priority, - const char *wmesg, uint64_t nsecs) +msleep(const volatile void *ident, struct mutex *mtx, int priority, + const char *wmesg, int timo) { - uint64_t to_ticks; + uint64_t nsecs = INFSLP; - if (nsecs == INFSLP) - return msleep(ident, mtx, priority, wmesg, 0); -#ifdef DIAGNOSTIC - if (nsecs == 0) { - log(LOG_WARNING, - "%s: %s[%d]: %s: trying to sleep zero nanoseconds\n", - __func__, curproc->p_p->ps_comm, curproc->p_p->ps_pid, - wmesg); - } -#endif - nsecs = MIN(nsecs, UINT64_MAX - tick_nsec); - to_ticks = (nsecs + tick_nsec - 1) / (tick_nsec + 1) + 1; - if (to_ticks > INT_MAX) - to_ticks = INT_MAX; - return msleep(ident, mtx, priority, wmesg, (int)to_ticks); + if (timo < 0) + panic("%s: negative timo %d", __func__, timo); + if (timo > 0) + nsecs = timo * tick_nsec; + + return msleep_nsec(ident, mtx, priority, wmesg, nsecs); } /* @@ -280,13 +252,13 @@ msleep_nsec(const volatile void *ident, * entered the sleep queue we drop the it. After sleeping we re-lock. */ int -rwsleep(const volatile void *ident, struct rwlock *rwl, int priority, - const char *wmesg, int timo) +rwsleep_nsec(const volatile void *ident, struct rwlock *rwl, int priority, + const char *wmesg, uint64_t nsecs) { int error, status; KASSERT((priority & ~(PRIMASK | PCATCH | PNORELOCK)) == 0); - KASSERT(ident != &nowake || ISSET(priority, PCATCH) || timo != 0); + KASSERT(ident != &nowake || ISSET(priority, PCATCH) || nsecs != INFSLP); KASSERT(ident != rwl); rw_assert_anylock(rwl); status = rw_status(rwl); @@ -295,7 +267,7 @@ rwsleep(const volatile void *ident, stru rw_exit(rwl); /* signal may stop the process, release rwlock before that */ - error = sleep_finish(timo, 1); + error = sleep_finish(nsecs, 1); if ((priority & PNORELOCK) == 0) rw_enter(rwl, status); @@ -304,26 +276,17 @@ rwsleep(const volatile void *ident, stru } int -rwsleep_nsec(const volatile void *ident, struct rwlock *rwl, int priority, - const char *wmesg, uint64_t nsecs) +rwsleep(const volatile void *ident, struct rwlock *rwl, int priority, + const char *wmesg, int timo) { - uint64_t to_ticks; + uint64_t nsecs = INFSLP; - if (nsecs == INFSLP) - return rwsleep(ident, rwl, priority, wmesg, 0); -#ifdef DIAGNOSTIC - if (nsecs == 0) { - log(LOG_WARNING, - "%s: %s[%d]: %s: trying to sleep zero nanoseconds\n", - __func__, curproc->p_p->ps_comm, curproc->p_p->ps_pid, - wmesg); - } -#endif - nsecs = MIN(nsecs, UINT64_MAX - tick_nsec); - to_ticks = (nsecs + tick_nsec - 1) / (tick_nsec + 1) + 1; - if (to_ticks > INT_MAX) - to_ticks = INT_MAX; - return rwsleep(ident, rwl, priority, wmesg, (int)to_ticks); + if (timo < 0) + panic("%s: negative timo %d", __func__, timo); + if (timo > 0) + nsecs = timo * tick_nsec; + + return rwsleep_nsec(ident, rwl, priority, wmesg, nsecs); } void @@ -361,16 +324,16 @@ sleep_setup(const volatile void *ident, } int -sleep_finish(int timo, int do_sleep) +sleep_finish(uint64_t nsecs, int do_sleep) { struct proc *p = curproc; int catch, error = 0, error1 = 0; catch = p->p_flag & P_SINTR; - if (timo != 0) { - KASSERT(!ISSET(p->p_flag, P_TIMEOUT|P_TIMEOUTRAN)); - timeout_add(&p->p_sleep_to, timo); + KASSERT(!ISSET(p->p_flag, P_TIMEOUT|P_TIMEOUTRAN)); + if (nsecs != INFSLP) { + timeout_add_nsec(&p->p_sleep_to, nsecs); } if (catch != 0) { @@ -445,7 +408,7 @@ sleep_finish(int timo, int do_sleep) * to sleep to wait for endtsleep to run, we'd also have to * take the sched lock, so we'd be spinning against it anyway. */ - if (timo != 0 && !timeout_del(&p->p_sleep_to)) { + if (nsecs != INFSLP && !timeout_del(&p->p_sleep_to)) { int flag; /* Wait for endtsleep timeout to finish running */ @@ -753,14 +716,13 @@ thrsleep(struct proc *p, struct sys___th void *lock = SCARG(uap, lock); const uint32_t *abortp = SCARG(uap, abort); clockid_t clock_id = SCARG(uap, clock_id); - uint64_t to_ticks = 0; + uint64_t nsecs = INFSLP; int error = 0; if (ident == 0) return (EINVAL); if (tsp != NULL) { struct timespec now; - uint64_t nsecs; if ((error = clock_gettime(p, clock_id, &now))) return (error); @@ -778,9 +740,6 @@ thrsleep(struct proc *p, struct sys___th timespecsub(tsp, &now, tsp); nsecs = MIN(TIMESPEC_TO_NSEC(tsp), MAXTSLP); - to_ticks = (nsecs + tick_nsec - 1) / (tick_nsec + 1) + 1; - if (to_ticks > INT_MAX) - to_ticks = INT_MAX; } tsb = (ident == -1) ? &tsb_shared : thrsleep_bucket(ident); @@ -810,7 +769,7 @@ thrsleep(struct proc *p, struct sys___th } sleep_setup(&entry, PWAIT|PCATCH, "thrsleep"); - error = sleep_finish(to_ticks, entry.tslp_p != NULL); + error = sleep_finish(nsecs, entry.tslp_p != NULL); if (error != 0 || entry.tslp_p != NULL) { mtx_enter(&tsb->tsb_lock); if (entry.tslp_p != NULL) @@ -997,7 +956,7 @@ refcnt_finalize(struct refcnt *r, const while (refs) { sleep_setup(r, PWAIT, wmesg); refs = atomic_load_int(&r->r_refs); - sleep_finish(0, refs); + sleep_finish(INFSLP, refs); } TRACEINDEX(refcnt, r->r_traceidx, r, refs, 0); /* Order subsequent loads and stores after refs == 0 load. */ @@ -1047,6 +1006,6 @@ cond_wait(struct cond *c, const char *wm while (wait) { sleep_setup(c, PWAIT, wmesg); wait = atomic_load_int(&c->c_wait); - sleep_finish(0, wait); + sleep_finish(INFSLP, wait); } } Index: kern/kern_tc.c =================================================================== RCS file: /cvs/src/sys/kern/kern_tc.c,v diff -u -p -r1.83 kern_tc.c --- kern/kern_tc.c 23 Feb 2024 23:01:15 -0000 1.83 +++ kern/kern_tc.c 13 May 2025 00:06:18 -0000 @@ -267,6 +267,15 @@ getnsecuptime(void) return BINTIME_TO_NSEC(&bt); } +uint64_t +getnsectime(void) +{ + struct timespec ts; + + getnanotime(&ts); + return TIMESPEC_TO_NSEC(&ts); +} + void binruntime(struct bintime *bt) { Index: kern/kern_timeout.c =================================================================== RCS file: /cvs/src/sys/kern/kern_timeout.c,v diff -u -p -r1.103 kern_timeout.c --- kern/kern_timeout.c 2 May 2025 00:51:09 -0000 1.103 +++ kern/kern_timeout.c 13 May 2025 00:06:18 -0000 @@ -48,6 +48,11 @@ #include #endif +CTASSERT((TIMEOUT_PROC|TIMEOUT_MPSAFE) < 4); + +HEAP_HEAD(timeout_heap); +HEAP_PROTOTYPE(timeout_heap, timeout); + struct timeout_ctx { struct circq *tctx_todo; struct timeout *tctx_running; @@ -64,21 +69,11 @@ struct mutex timeout_mutex = MUTEX_INITI void *softclock_si; /* [I] softclock() interrupt handle */ struct timeoutstat tostat; /* [T] statistics and totals */ -/* - * Timeouts are kept in a hierarchical timing wheel. The to_time is the value - * of the global variable "ticks" when the timeout should be called. There are - * four levels with 256 buckets each. - */ -#define WHEELCOUNT 4 -#define WHEELSIZE 256 -#define WHEELMASK 255 -#define WHEELBITS 8 -#define BUCKETS (WHEELCOUNT * WHEELSIZE) - -struct circq timeout_wheel[BUCKETS]; /* [T] Tick-based timeouts */ -struct circq timeout_wheel_kc[BUCKETS]; /* [T] Clock-based timeouts */ struct circq timeout_new; /* [T] New, unscheduled timeouts */ +struct timeout_heap timeout_heap_ut; /* [T] Uptime based timeouts */ +struct timeout_heap timeout_heap_rt; /* [T] Realtime timeouts */ struct circq timeout_todo; /* [T] Due or needs rescheduling */ + static struct timeout_ctx timeout_ctx_si = { .tctx_todo = &timeout_todo, /* [I] */ .tctx_running = NULL, /* [T] */ @@ -94,32 +89,35 @@ static struct timeout_ctx timeout_ctx_pr .tctx_todo = &timeout_proc_mp, /* [I] */ .tctx_running = NULL, /* [T] */ }; +#else +#define timeout_ctx_proc_mp timeout_ctx_proc #endif -time_t timeout_level_width[WHEELCOUNT]; /* [I] Wheel level width (seconds) */ -struct timespec tick_ts; /* [I] Length of a tick (1/hz secs) */ +static struct timeout_ctx * const timeout_contexts[] = { + [0] = &timeout_ctx_si, + [TIMEOUT_PROC] = &timeout_ctx_proc, + [TIMEOUT_MPSAFE] = &timeout_ctx_si, /* XXX */ + [TIMEOUT_MPSAFE|TIMEOUT_PROC] = &timeout_ctx_proc_mp, +}; struct kclock { - struct timespec kc_lastscan; /* [T] Clock time at last wheel scan */ - struct timespec kc_late; /* [T] Late if due prior */ - struct timespec kc_offset; /* [T] Offset from primary kclock */ -} timeout_kclock[KCLOCK_MAX]; - -#define MASKWHEEL(wheel, time) (((time) >> ((wheel)*WHEELBITS)) & WHEELMASK) - -#define BUCKET(rel, abs) \ - (timeout_wheel[ \ - ((rel) <= (1 << (2*WHEELBITS))) \ - ? ((rel) <= (1 << WHEELBITS)) \ - ? MASKWHEEL(0, (abs)) \ - : MASKWHEEL(1, (abs)) + WHEELSIZE \ - : ((rel) <= (1 << (3*WHEELBITS))) \ - ? MASKWHEEL(2, (abs)) + 2*WHEELSIZE \ - : MASKWHEEL(3, (abs)) + 3*WHEELSIZE]) - -#define MOVEBUCKET(wheel, time) \ - CIRCQ_CONCAT(&timeout_todo, \ - &timeout_wheel[MASKWHEEL((wheel), (time)) + (wheel)*WHEELSIZE]) + const char *kc_name; + struct timeout_heap *kc_heap; + uint64_t (*kc_gettime)(void); +}; + +static const struct kclock timeout_kclocks[KCLOCK_MAX] = { /* [I] */ + [KCLOCK_UPTIME] = { + .kc_name = "uptime", + .kc_heap = &timeout_heap_ut, + .kc_gettime = getnsecuptime + }, + [KCLOCK_REALTIME] = { + .kc_name = "realtime", + .kc_heap = &timeout_heap_rt, + .kc_gettime = getnsectime + }, +}; /* * Circular queue definitions. @@ -196,17 +194,27 @@ struct lock_type timeout_spinlock_type = void softclock(void *); void softclock_create_thread(void *); -void softclock_process_kclock_timeout(struct timeout *, int); -void softclock_process_tick_timeout(struct timeout *, int); void softclock_thread(void *); #ifdef MULTIPROCESSOR void softclock_thread_mp(void *); #endif void timeout_barrier_timeout(void *); -uint32_t timeout_bucket(const struct timeout *); -uint32_t timeout_maskwheel(uint32_t, const struct timespec *); void timeout_run(struct timeout_ctx *, struct timeout *); +static inline int +timeout_cmp(const struct timeout *a, const struct timeout *b) +{ + int64_t delta = a->to_time - b->to_time; + + if (delta < 0) + return (-1); + if (delta > 0) + return (1); + return (0); +} + +HEAP_GENERATE(timeout_heap, timeout, to_entry.to_heap, timeout_cmp); + /* * The first thing in a struct timeout is its struct circq, so we * can get back from a pointer to the latter to a pointer to the @@ -247,29 +255,21 @@ timeout_sync_leave(int needsproc) * be positive or negative so comparing it with anything is dangerous. * The only way we can use the to->to_time value in any predictable way * is when we calculate how far in the future `to' will timeout - - * "to->to_time - ticks". The result will always be positive for future + * "to->to_time - getnsecuptime()". The result will be positive for future * timeouts and 0 or negative for due timeouts. */ void timeout_startup(void) { - int b, level; - CIRCQ_INIT(&timeout_new); + HEAP_INIT(timeout_heap, &timeout_heap_ut); + HEAP_INIT(timeout_heap, &timeout_heap_rt); CIRCQ_INIT(&timeout_todo); CIRCQ_INIT(&timeout_proc); #ifdef MULTIPROCESSOR CIRCQ_INIT(&timeout_proc_mp); #endif - for (b = 0; b < nitems(timeout_wheel); b++) - CIRCQ_INIT(&timeout_wheel[b]); - for (b = 0; b < nitems(timeout_wheel_kc); b++) - CIRCQ_INIT(&timeout_wheel_kc[b]); - - for (level = 0; level < nitems(timeout_level_width); level++) - timeout_level_width[level] = 2 << (level * WHEELBITS); - NSEC_TO_TIMESPEC(tick_nsec, &tick_ts); } void @@ -314,39 +314,80 @@ timeout_set_proc(struct timeout *new, vo timeout_set_flags(new, fn, arg, KCLOCK_NONE, TIMEOUT_PROC); } +static inline void +timeout_list_insert(struct circq *tlist, struct timeout *to) +{ + CIRCQ_INSERT_TAIL(tlist, &to->to_entry.to_list); +} + +static inline int +timeout_get_kclock(const struct timeout *to) +{ + int kclock = to->to_kclock; + if (kclock == KCLOCK_NONE) + kclock = KCLOCK_UPTIME; + return (kclock); +} + +static inline struct timeout_heap * +timeout_get_heap(const struct timeout *to) +{ + return (timeout_kclocks[timeout_get_kclock(to)].kc_heap); +} +static inline void +timeout_heap_insert(struct timeout_heap *theap, struct timeout *to) +{ + SET(to->to_flags, TIMEOUT_ONHEAP); + HEAP_INSERT(timeout_heap, theap, to); +} + +static inline void +timeout_heap_remove(struct timeout_heap *theap, struct timeout *to) +{ + HEAP_REMOVE(timeout_heap, theap, to); + CLR(to->to_flags, TIMEOUT_ONHEAP); +} + +static inline struct timeout * +timeout_heap_extract(struct timeout_heap *theap, const struct timeout *now) +{ + struct timeout *to; + + to = HEAP_CEXTRACT(timeout_heap, theap, now); + if (to != NULL) + CLR(to->to_flags, TIMEOUT_ONHEAP); + + return (to); +} + +static void +timeout_remove(struct timeout *to) +{ + if (ISSET(to->to_flags, TIMEOUT_ONHEAP)) + timeout_heap_remove(timeout_get_heap(to), to); + else + CIRCQ_REMOVE(&to->to_entry.to_list); +} + int -timeout_add(struct timeout *new, int to_ticks) +_timeout_add(struct timeout *to, uint64_t nsec) { - int old_time; int ret = 1; - KASSERT(ISSET(new->to_flags, TIMEOUT_INITIALIZED)); - KASSERT(new->to_kclock == KCLOCK_NONE); - KASSERT(to_ticks >= 0); + KASSERT(ISSET(to->to_flags, TIMEOUT_INITIALIZED)); mtx_enter(&timeout_mutex); + CLR(to->to_flags, TIMEOUT_TRIGGERED); - /* Initialize the time here, it won't change. */ - old_time = new->to_time; - new->to_time = to_ticks + ticks; - CLR(new->to_flags, TIMEOUT_TRIGGERED); - - /* - * If this timeout already is scheduled and now is moved - * earlier, reschedule it now. Otherwise leave it in place - * and let it be rescheduled later. - */ - if (ISSET(new->to_flags, TIMEOUT_ONQUEUE)) { - if (new->to_time - ticks < old_time - ticks) { - CIRCQ_REMOVE(&new->to_list); - CIRCQ_INSERT_TAIL(&timeout_new, &new->to_list); - } + if (ISSET(to->to_flags, TIMEOUT_ONQUEUE)) { + timeout_remove(to); tostat.tos_readded++; ret = 0; - } else { - SET(new->to_flags, TIMEOUT_ONQUEUE); - CIRCQ_INSERT_TAIL(&timeout_new, &new->to_list); - } + } else + SET(to->to_flags, TIMEOUT_ONQUEUE); + + to->to_time = nsec; + timeout_list_insert(&timeout_new, to); #if NKCOV > 0 if (!kcov_cold) new->to_process = curproc->p_p; @@ -357,102 +398,87 @@ timeout_add(struct timeout *new, int to_ return ret; } -static inline int -timeout_add_ticks(struct timeout *to, uint64_t to_ticks, int notzero) +int +timeout_add_nsec(struct timeout *to, uint64_t nsecs) { - if (to_ticks > INT_MAX) - to_ticks = INT_MAX; - else if (to_ticks == 0 && notzero) - to_ticks = 1; + KASSERT(to->to_kclock == KCLOCK_NONE); + KASSERT(nsecs <= INT64_MAX); - return timeout_add(to, (int)to_ticks); + nsecs += getnsecuptime(); + + return _timeout_add(to, nsecs); } int timeout_add_tv(struct timeout *to, const struct timeval *tv) { - uint64_t to_ticks; + uint64_t nsecs; - to_ticks = (uint64_t)hz * tv->tv_sec + tv->tv_usec / tick; + nsecs = TIMEVAL_TO_NSEC(tv); + if (nsecs > INT64_MAX) + nsecs = INT64_MAX; - return timeout_add_ticks(to, to_ticks, tv->tv_usec > 0); + return timeout_add_nsec(to, nsecs); } int timeout_add_sec(struct timeout *to, int secs) { - uint64_t to_ticks; + uint64_t nsecs; + + KASSERT(secs >= 0); - to_ticks = (uint64_t)hz * secs; + nsecs = (uint64_t)secs * 1000000000ULL; - return timeout_add_ticks(to, to_ticks, 1); + return timeout_add_nsec(to, nsecs); } int timeout_add_msec(struct timeout *to, uint64_t msecs) { - uint64_t to_ticks; + uint64_t nsecs; - to_ticks = msecs * 1000 / tick; + nsecs = msecs * 1000000; + if (nsecs < msecs) + nsecs = INT64_MAX; - return timeout_add_ticks(to, to_ticks, msecs > 0); + return timeout_add_nsec(to, nsecs); } int timeout_add_usec(struct timeout *to, uint64_t usecs) { - uint64_t to_ticks; + uint64_t nsecs; - to_ticks = usecs / tick; + nsecs = usecs * 1000; + if (nsecs < usecs) + nsecs = INT64_MAX; - return timeout_add_ticks(to, to_ticks, usecs > 0); + return timeout_add_nsec(to, nsecs); } int -timeout_add_nsec(struct timeout *to, uint64_t nsecs) +timeout_add(struct timeout *to, int to_ticks) { - uint64_t to_ticks; + uint64_t nsecs; + + KASSERT(to_ticks >= 0); - to_ticks = nsecs / (tick * 1000); + nsecs = (uint64_t)to_ticks * (uint64_t)tick_nsec; - return timeout_add_ticks(to, to_ticks, nsecs > 0); + return timeout_add_nsec(to, nsecs); } int timeout_abs_ts(struct timeout *to, const struct timespec *abstime) { - struct timespec old_abstime; - int ret = 1; - - mtx_enter(&timeout_mutex); + uint64_t nsec; - KASSERT(ISSET(to->to_flags, TIMEOUT_INITIALIZED)); - KASSERT(to->to_kclock == KCLOCK_UPTIME); - - old_abstime = to->to_abstime; - to->to_abstime = *abstime; - CLR(to->to_flags, TIMEOUT_TRIGGERED); - - if (ISSET(to->to_flags, TIMEOUT_ONQUEUE)) { - if (timespeccmp(abstime, &old_abstime, <)) { - CIRCQ_REMOVE(&to->to_list); - CIRCQ_INSERT_TAIL(&timeout_new, &to->to_list); - } - tostat.tos_readded++; - ret = 0; - } else { - SET(to->to_flags, TIMEOUT_ONQUEUE); - CIRCQ_INSERT_TAIL(&timeout_new, &to->to_list); - } -#if NKCOV > 0 - if (!kcov_cold) - to->to_process = curproc->p_p; -#endif - tostat.tos_added++; + KASSERT(to->to_kclock != KCLOCK_NONE); - mtx_leave(&timeout_mutex); + nsec = TIMESPEC_TO_NSEC(abstime); - return ret; + return _timeout_add(to, nsec); } int @@ -462,7 +488,7 @@ timeout_del(struct timeout *to) mtx_enter(&timeout_mutex); if (ISSET(to->to_flags, TIMEOUT_ONQUEUE)) { - CIRCQ_REMOVE(&to->to_list); + timeout_remove(to); CLR(to->to_flags, TIMEOUT_ONQUEUE); tostat.tos_cancelled++; ret = 1; @@ -474,6 +500,12 @@ timeout_del(struct timeout *to) return ret; } +static inline struct timeout_ctx * +timeout_context(const struct timeout *to) +{ + return timeout_contexts[to->to_flags & (TIMEOUT_PROC|TIMEOUT_MPSAFE)]; +} + int timeout_del_barrier(struct timeout *to) { @@ -504,24 +536,14 @@ timeout_barrier(struct timeout *to) cond_init(&c); mtx_enter(&timeout_mutex); - if (ISSET(flags, TIMEOUT_PROC)) { -#ifdef MULTIPROCESSOR - if (ISSET(flags, TIMEOUT_MPSAFE)) - tctx = &timeout_ctx_proc_mp; - else -#endif - tctx = &timeout_ctx_proc; - } else - tctx = &timeout_ctx_si; - + tctx = timeout_context(to); if (tctx->tctx_running != to) { mtx_leave(&timeout_mutex); return; } - barrier.to_time = ticks; SET(barrier.to_flags, TIMEOUT_ONQUEUE); - CIRCQ_INSERT_HEAD(tctx->tctx_todo, &barrier.to_list); + CIRCQ_INSERT_HEAD(tctx->tctx_todo, &barrier.to_entry.to_list); mtx_leave(&timeout_mutex); /* @@ -541,49 +563,6 @@ timeout_barrier_timeout(void *arg) cond_signal(c); } -uint32_t -timeout_bucket(const struct timeout *to) -{ - struct timespec diff, shifted_abstime; - struct kclock *kc; - uint32_t level; - - KASSERT(to->to_kclock == KCLOCK_UPTIME); - kc = &timeout_kclock[to->to_kclock]; - - KASSERT(timespeccmp(&kc->kc_lastscan, &to->to_abstime, <)); - timespecsub(&to->to_abstime, &kc->kc_lastscan, &diff); - for (level = 0; level < nitems(timeout_level_width) - 1; level++) { - if (diff.tv_sec < timeout_level_width[level]) - break; - } - timespecadd(&to->to_abstime, &kc->kc_offset, &shifted_abstime); - return level * WHEELSIZE + timeout_maskwheel(level, &shifted_abstime); -} - -/* - * Hash the absolute time into a bucket on a given level of the wheel. - * - * The complete hash is 32 bits. The upper 25 bits are seconds, the - * lower 7 bits are nanoseconds. tv_nsec is a positive value less - * than one billion so we need to divide it to isolate the desired - * bits. We can't just shift it. - * - * The level is used to isolate an 8-bit portion of the hash. The - * resulting number indicates which bucket the absolute time belongs - * in on the given level of the wheel. - */ -uint32_t -timeout_maskwheel(uint32_t level, const struct timespec *abstime) -{ - uint32_t hi, lo; - - hi = abstime->tv_sec << 7; - lo = abstime->tv_nsec / 7812500; - - return ((hi | lo) >> (level * WHEELBITS)) & WHEELMASK; -} - /* * This is called from hardclock() on the primary CPU at the start of * every tick. @@ -591,69 +570,30 @@ timeout_maskwheel(uint32_t level, const void timeout_hardclock_update(void) { - struct timespec elapsed, now; - struct kclock *kc; - struct timespec *lastscan = &timeout_kclock[KCLOCK_UPTIME].kc_lastscan; - int b, done, first, i, last, level, need_softclock = 1, off; + int need_softclock = 0; mtx_enter(&timeout_mutex); + if (!CIRCQ_EMPTY(&timeout_new)) + need_softclock = 1; + else { + size_t i; - MOVEBUCKET(0, ticks); - if (MASKWHEEL(0, ticks) == 0) { - MOVEBUCKET(1, ticks); - if (MASKWHEEL(1, ticks) == 0) { - MOVEBUCKET(2, ticks); - if (MASKWHEEL(2, ticks) == 0) - MOVEBUCKET(3, ticks); - } - } - - /* - * Dump the buckets that expired while we were away. - * - * If the elapsed time has exceeded a level's limit then we need - * to dump every bucket in the level. We have necessarily completed - * a lap of that level, too, so we need to process buckets in the - * next level. - * - * Otherwise we need to compare indices: if the index of the first - * expired bucket is greater than that of the last then we have - * completed a lap of the level and need to process buckets in the - * next level. - */ - nanouptime(&now); - timespecsub(&now, lastscan, &elapsed); - for (level = 0; level < nitems(timeout_level_width); level++) { - first = timeout_maskwheel(level, lastscan); - if (elapsed.tv_sec >= timeout_level_width[level]) { - last = (first == 0) ? WHEELSIZE - 1 : first - 1; - done = 0; - } else { - last = timeout_maskwheel(level, &now); - done = first <= last; - } - off = level * WHEELSIZE; - for (b = first;; b = (b + 1) % WHEELSIZE) { - CIRCQ_CONCAT(&timeout_todo, &timeout_wheel_kc[off + b]); - if (b == last) + for (i = 0; i < nitems(timeout_kclocks); i++) { + const struct kclock *kc = &timeout_kclocks[i]; + struct timeout *to; + struct timeout now; + + to = HEAP_FIRST(timeout_heap, kc->kc_heap); + if (to == NULL) + continue; + + now.to_time = kc->kc_gettime(); + if (timeout_cmp(&now, to) >= 0) { + need_softclock = 1; break; + } } - if (done) - break; } - - /* - * Update the cached state for each kclock. - */ - for (i = 0; i < nitems(timeout_kclock); i++) { - kc = &timeout_kclock[i]; - timespecadd(&now, &kc->kc_offset, &kc->kc_lastscan); - timespecsub(&kc->kc_lastscan, &tick_ts, &kc->kc_late); - } - - if (CIRCQ_EMPTY(&timeout_new) && CIRCQ_EMPTY(&timeout_todo)) - need_softclock = 0; - mtx_leave(&timeout_mutex); if (need_softclock) @@ -694,61 +634,6 @@ timeout_run(struct timeout_ctx *tctx, st tctx->tctx_running = NULL; } -void -softclock_process_kclock_timeout(struct timeout *to, int new) -{ - struct kclock *kc = &timeout_kclock[to->to_kclock]; - - if (timespeccmp(&to->to_abstime, &kc->kc_lastscan, >)) { - tostat.tos_scheduled++; - if (!new) - tostat.tos_rescheduled++; - CIRCQ_INSERT_TAIL(&timeout_wheel_kc[timeout_bucket(to)], - &to->to_list); - return; - } - if (!new && timespeccmp(&to->to_abstime, &kc->kc_late, <=)) - tostat.tos_late++; - if (ISSET(to->to_flags, TIMEOUT_PROC)) { -#ifdef MULTIPROCESSOR - if (ISSET(to->to_flags, TIMEOUT_MPSAFE)) - CIRCQ_INSERT_TAIL(&timeout_proc_mp, &to->to_list); - else -#endif - CIRCQ_INSERT_TAIL(&timeout_proc, &to->to_list); - return; - } - timeout_run(&timeout_ctx_si, to); - tostat.tos_run_softclock++; -} - -void -softclock_process_tick_timeout(struct timeout *to, int new) -{ - int delta = to->to_time - ticks; - - if (delta > 0) { - tostat.tos_scheduled++; - if (!new) - tostat.tos_rescheduled++; - CIRCQ_INSERT_TAIL(&BUCKET(delta, to->to_time), &to->to_list); - return; - } - if (!new && delta < 0) - tostat.tos_late++; - if (ISSET(to->to_flags, TIMEOUT_PROC)) { -#ifdef MULTIPROCESSOR - if (ISSET(to->to_flags, TIMEOUT_MPSAFE)) - CIRCQ_INSERT_TAIL(&timeout_proc_mp, &to->to_list); - else -#endif - CIRCQ_INSERT_TAIL(&timeout_proc, &to->to_list); - return; - } - timeout_run(&timeout_ctx_si, to); - tostat.tos_run_softclock++; -} - /* * Timeouts are processed here instead of timeout_hardclock_update() * to avoid doing any more work at IPL_CLOCK than absolutely necessary. @@ -758,44 +643,63 @@ softclock_process_tick_timeout(struct ti void softclock(void *arg) { - struct timeout *first_new, *to; - int needsproc, new; + struct timeout now[KCLOCK_MAX]; + const struct kclock *kc; + struct timeout_heap *theap; + struct timeout *to; + struct timeout_ctx *tctx; + int needsproc; #ifdef MULTIPROCESSOR - int need_proc_mp; + int needsproc_mp; #endif + int i; - first_new = NULL; - new = 0; + for (i = 0; i < KCLOCK_MAX; i++) { + kc = &timeout_kclocks[i]; + now[i].to_time = kc->kc_gettime(); + } mtx_enter(&timeout_mutex); - if (!CIRCQ_EMPTY(&timeout_new)) - first_new = timeout_from_circq(CIRCQ_FIRST(&timeout_new)); - CIRCQ_CONCAT(&timeout_todo, &timeout_new); + tostat.tos_softclocks++; + + for (i = 0; i < KCLOCK_MAX; i++) { + kc = &timeout_kclocks[i]; + theap = kc->kc_heap; + + while ((to = timeout_heap_extract(theap, &now[i])) != NULL) { + tctx = timeout_context(to); + timeout_list_insert(tctx->tctx_todo, to); + } + } + + while (!CIRCQ_EMPTY(&timeout_new)) { + to = timeout_from_circq(CIRCQ_FIRST(&timeout_new)); + CIRCQ_REMOVE(&to->to_entry.to_list); + + if (timeout_cmp(&now[timeout_get_kclock(to)], to) >= 0) { + tctx = timeout_context(to); + timeout_list_insert(tctx->tctx_todo, to); + } else { + timeout_heap_insert(timeout_get_heap(to), to); + } + } + while (!CIRCQ_EMPTY(&timeout_todo)) { to = timeout_from_circq(CIRCQ_FIRST(&timeout_todo)); - CIRCQ_REMOVE(&to->to_list); - if (to == first_new) - new = 1; - if (to->to_kclock == KCLOCK_NONE) - softclock_process_tick_timeout(to, new); - else if (to->to_kclock == KCLOCK_UPTIME) - softclock_process_kclock_timeout(to, new); - else { - panic("%s: invalid to_clock: %d", - __func__, to->to_kclock); - } + CIRCQ_REMOVE(&to->to_entry.to_list); + timeout_run(&timeout_ctx_si, to); + tostat.tos_run_softclock++; } - tostat.tos_softclocks++; needsproc = !CIRCQ_EMPTY(&timeout_proc); #ifdef MULTIPROCESSOR - need_proc_mp = !CIRCQ_EMPTY(&timeout_proc_mp); + needsproc_mp = !CIRCQ_EMPTY(&timeout_proc_mp); #endif mtx_leave(&timeout_mutex); if (needsproc) wakeup(&timeout_proc); #ifdef MULTIPROCESSOR - if (need_proc_mp) + if (needsproc_mp) wakeup(&timeout_proc_mp); #endif } @@ -829,7 +733,7 @@ softclock_thread_run(struct timeout_ctx tostat.tos_thread_wakeups++; while (!CIRCQ_EMPTY(todo)) { to = timeout_from_circq(CIRCQ_FIRST(todo)); - CIRCQ_REMOVE(&to->to_list); + CIRCQ_REMOVE(&to->to_entry.to_list); timeout_run(tctx, to); tostat.tos_run_thread++; } @@ -874,31 +778,7 @@ softclock_thread_mp(void *arg) void timeout_adjust_ticks(int adj) { - struct timeout *to; - struct circq *p; - int new_ticks, b; - - /* adjusting the monotonic clock backwards would be a Bad Thing */ - if (adj <= 0) - return; - - mtx_enter(&timeout_mutex); - new_ticks = ticks + adj; - for (b = 0; b < nitems(timeout_wheel); b++) { - p = CIRCQ_FIRST(&timeout_wheel[b]); - while (p != &timeout_wheel[b]) { - to = timeout_from_circq(p); - p = CIRCQ_FIRST(p); - - /* when moving a timeout forward need to reinsert it */ - if (to->to_time - ticks < adj) - to->to_time = new_ticks; - CIRCQ_REMOVE(&to->to_list); - CIRCQ_INSERT_TAIL(&timeout_todo, &to->to_list); - } - } - ticks = new_ticks; - mtx_leave(&timeout_mutex); + printf("%s: adj %d\n", __func__, adj); } #endif @@ -923,12 +803,9 @@ const char *db_timespec(const struct tim const char * db_kclock(int kclock) { - switch (kclock) { - case KCLOCK_UPTIME: - return "uptime"; - default: - return "invalid"; - } + if (kclock < 0 || kclock > KCLOCK_MAX) + return ("invalid"); + return (timeout_kclocks[kclock].kc_name); } const char * @@ -961,11 +838,8 @@ db_show_callout_bucket(struct circq *buc void db_show_timeout(struct timeout *to, struct circq *bucket) { - struct timespec remaining; - struct kclock *kc; - char buf[8]; + uint64_t now = getnsecuptime(); db_expr_t offset; - struct circq *wheel; const char *name, *where; int width = sizeof(long) * 2; @@ -981,51 +855,30 @@ db_show_timeout(struct timeout *to, stru else if (bucket == &timeout_proc_mp) where = "thread-mp"; #endif - else { - if (to->to_kclock == KCLOCK_UPTIME) - wheel = timeout_wheel_kc; - else if (to->to_kclock == KCLOCK_NONE) - wheel = timeout_wheel; - else - goto invalid; - snprintf(buf, sizeof(buf), "%3ld/%1ld", - (bucket - wheel) % WHEELSIZE, - (bucket - wheel) / WHEELSIZE); - where = buf; - } - if (to->to_kclock == KCLOCK_UPTIME) { - kc = &timeout_kclock[to->to_kclock]; - timespecsub(&to->to_abstime, &kc->kc_lastscan, &remaining); - db_printf("%20s %8s %9s 0x%0*lx %s\n", - db_timespec(&remaining), db_kclock(to->to_kclock), where, - width, (ulong)to->to_arg, name); - } else if (to->to_kclock == KCLOCK_NONE) { - db_printf("%20d %8s %9s 0x%0*lx %s\n", - to->to_time - ticks, "ticks", where, - width, (ulong)to->to_arg, name); - } else - goto invalid; - return; - - invalid: - db_printf("%s: timeout 0x%p: invalid to_kclock: %d", - __func__, to, to->to_kclock); + else if (to->to_kclock == KCLOCK_UPTIME) + where = "timeout_heap_kc"; + else + where = "timeout_heap_si"; + + db_printf("%20lld %8s %9s 0x%0*lx %s\n", + to->to_time - now, "nsecs", where, + width, (ulong)to->to_arg, name); } void db_show_callout(db_expr_t addr, int haddr, db_expr_t count, char *modif) { - struct kclock *kc; +// struct kclock *kc; int width = sizeof(long) * 2 + 2; - int b, i; +// int b, i; db_printf("%20s %8s\n", "lastscan", "clock"); db_printf("%20d %8s\n", ticks, "ticks"); - for (i = 0; i < nitems(timeout_kclock); i++) { - kc = &timeout_kclock[i]; - db_printf("%20s %8s\n", - db_timespec(&kc->kc_lastscan), db_kclock(i)); - } +// for (i = 0; i < nitems(timeout_kclock); i++) { +// kc = &timeout_kclock[i]; +// db_printf("%20s %8s\n", +// db_timespec(&kc->kc_lastscan), db_kclock(i)); +// } db_printf("\n"); db_printf("%20s %8s %9s %*s %s\n", "remaining", "clock", "wheel", width, "arg", "func"); @@ -1035,9 +888,9 @@ db_show_callout(db_expr_t addr, int hadd #ifdef MULTIPROCESSOR db_show_callout_bucket(&timeout_proc_mp); #endif - for (b = 0; b < nitems(timeout_wheel); b++) - db_show_callout_bucket(&timeout_wheel[b]); - for (b = 0; b < nitems(timeout_wheel_kc); b++) - db_show_callout_bucket(&timeout_wheel_kc[b]); +// for (b = 0; b < nitems(timeout_wheel); b++) +// db_show_callout_bucket(&timeout_wheel[b]); +// for (b = 0; b < nitems(timeout_wheel_kc); b++) +// db_show_callout_bucket(&timeout_wheel_kc[b]); } #endif Index: kern/subr_heap.c =================================================================== RCS file: kern/subr_heap.c diff -N kern/subr_heap.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ kern/subr_heap.c 13 May 2025 00:06:18 -0000 @@ -0,0 +1,197 @@ +/* */ + +/* + * Copyright (c) 2017 David Gwynne + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include + +static inline struct _heap_entry * +heap_n2e(const struct _heap_type *t, void *node) +{ + unsigned long addr = (unsigned long)node; + + return ((struct _heap_entry *)(addr + t->t_offset)); +} + +static inline void * +heap_e2n(const struct _heap_type *t, struct _heap_entry *he) +{ + unsigned long addr = (unsigned long)he; + + return ((void *)(addr - t->t_offset)); +} + +struct _heap_entry * +_heap_merge(const struct _heap_type *t, + struct _heap_entry *he1, struct _heap_entry *he2) +{ + struct _heap_entry *hi, *lo; + struct _heap_entry *child; + + if (he1 == NULL) + return (he2); + if (he2 == NULL) + return (he1); + + if (t->t_compare(heap_e2n(t, he1), heap_e2n(t, he2)) < 0) { + lo = he1; + hi = he2; + } else { + hi = he1; + lo = he2; + } + + child = lo->he_child; + + hi->he_left = lo; + hi->he_nextsibling = child; + if (child != NULL) + child->he_left = hi; + lo->he_child = hi; + lo->he_left = NULL; + lo->he_nextsibling = NULL; + + return (lo); +} + +static inline void +_heap_sibling_remove(struct _heap_entry *he) +{ + if (he->he_left == NULL) + return; + + if (he->he_left->he_child == he) { + if ((he->he_left->he_child = he->he_nextsibling) != NULL) + he->he_nextsibling->he_left = he->he_left; + } else { + if ((he->he_left->he_nextsibling = he->he_nextsibling) != NULL) + he->he_nextsibling->he_left = he->he_left; + } + + he->he_left = NULL; + he->he_nextsibling = NULL; +} + +static inline struct _heap_entry * +_heap_2pass_merge(const struct _heap_type *t, struct _heap_entry *root) +{ + struct _heap_entry *node, *next = NULL; + struct _heap_entry *tmp, *list = NULL; + + node = root->he_child; + if (node == NULL) + return (NULL); + + root->he_child = NULL; + + /* first pass */ + for (next = node->he_nextsibling; next != NULL; + next = (node != NULL ? node->he_nextsibling : NULL)) { + tmp = next->he_nextsibling; + node = _heap_merge(t, node, next); + + /* insert head */ + node->he_nextsibling = list; + list = node; + node = tmp; + } + + /* odd child case */ + if (node != NULL) { + node->he_nextsibling = list; + list = node; + } + + /* second pass */ + while (list->he_nextsibling != NULL) { + tmp = list->he_nextsibling->he_nextsibling; + list = _heap_merge(t, list, list->he_nextsibling); + list->he_nextsibling = tmp; + } + + list->he_left = NULL; + list->he_nextsibling = NULL; + + return (list); +} + +void +_heap_insert(const struct _heap_type *t, struct _heap *h, void *node) +{ + struct _heap_entry *he = heap_n2e(t, node); + + he->he_left = NULL; + he->he_child = NULL; + he->he_nextsibling = NULL; + + h->h_root = _heap_merge(t, h->h_root, he); +} + +void +_heap_remove(const struct _heap_type *t, struct _heap *h, void *node) +{ + struct _heap_entry *he = heap_n2e(t, node); + + if (he->he_left == NULL) { + _heap_extract(t, h); + return; + } + + _heap_sibling_remove(he); + h->h_root = _heap_merge(t, h->h_root, _heap_2pass_merge(t, he)); +} + +void * +_heap_first(const struct _heap_type *t, struct _heap *h) +{ + struct _heap_entry *first = h->h_root; + + if (first == NULL) + return (NULL); + + return (heap_e2n(t, first)); +} + +void * +_heap_extract(const struct _heap_type *t, struct _heap *h) +{ + struct _heap_entry *first = h->h_root; + + if (first == NULL) + return (NULL); + + h->h_root = _heap_2pass_merge(t, first); + + return (heap_e2n(t, first)); +} + +void * +_heap_cextract(const struct _heap_type *t, struct _heap *h, const void *key) +{ + struct _heap_entry *first = h->h_root; + void *node; + + if (first == NULL) + return (NULL); + + node = heap_e2n(t, first); + if (t->t_compare(key, node) < 0) + return (NULL); + + h->h_root = _heap_2pass_merge(t, first); + + return (node); +} Index: kern/subr_log.c =================================================================== RCS file: /cvs/src/sys/kern/subr_log.c,v diff -u -p -r1.80 subr_log.c --- kern/subr_log.c 30 Dec 2024 02:46:00 -0000 1.80 +++ kern/subr_log.c 13 May 2025 00:06:18 -0000 @@ -261,7 +261,7 @@ logread(dev_t dev, struct uio *uio, int * to keep log_mtx as a leaf lock. */ sleep_setup(mbp, LOG_RDPRI | PCATCH, "klog"); - error = sleep_finish(0, logsoftc.sc_state & LOG_RDWAIT); + error = sleep_finish(INFSLP, logsoftc.sc_state & LOG_RDWAIT); mtx_enter(&log_mtx); if (error) goto out; Index: kern/sys_futex.c =================================================================== RCS file: /cvs/src/sys/kern/sys_futex.c,v diff -u -p -r1.23 sys_futex.c --- kern/sys_futex.c 7 May 2025 00:39:09 -0000 1.23 +++ kern/sys_futex.c 13 May 2025 00:06:18 -0000 @@ -251,13 +251,12 @@ futex_wait(struct proc *p, uint32_t *uad { struct futex f; struct futex_slpque *fsq; - uint64_t to_ticks = 0; + uint64_t nsecs = INFSLP; uint32_t cval; int error; if (timeout != NULL) { struct timespec ts; - uint64_t nsecs; if ((error = copyin(timeout, &ts, sizeof(ts)))) return error; @@ -269,9 +268,6 @@ futex_wait(struct proc *p, uint32_t *uad return EINVAL; nsecs = MAX(1, MIN(TIMESPEC_TO_NSEC(&ts), MAXTSLP)); - to_ticks = (nsecs + tick_nsec - 1) / (tick_nsec + 1) + 1; - if (to_ticks > INT_MAX) - to_ticks = INT_MAX; } futex_addrs(p, &f, uaddr, flags); @@ -301,7 +297,7 @@ futex_wait(struct proc *p, uint32_t *uad } sleep_setup(&f, PWAIT|PCATCH, "fsleep"); - error = sleep_finish(to_ticks, f.ft_proc != NULL); + error = sleep_finish(nsecs, f.ft_proc != NULL); /* Remove ourself if we haven't been awaken. */ if (error != 0 || f.ft_proc != NULL) { if (futex_unwait(fsq, &f) == 0) Index: kern/uipc_socket.c =================================================================== RCS file: /cvs/src/sys/kern/uipc_socket.c,v diff -u -p -r1.377 uipc_socket.c --- kern/uipc_socket.c 15 Apr 2025 12:14:06 -0000 1.377 +++ kern/uipc_socket.c 13 May 2025 00:06:18 -0000 @@ -2568,7 +2568,7 @@ so_print(void *v, (unsigned long long)so->so_sp->ssp_max); (*pr)("\tssp_idletv: %lld %ld\n", so->so_sp->ssp_idletv.tv_sec, so->so_sp->ssp_idletv.tv_usec); - (*pr)("\tssp_idleto: %spending (@%i)\n", + (*pr)("\tssp_idleto: %spending (@%lli)\n", timeout_pending(&so->so_sp->ssp_idleto) ? "" : "not ", so->so_sp->ssp_idleto.to_time); } Index: sys/heap.h =================================================================== RCS file: sys/heap.h diff -N sys/heap.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/heap.h 13 May 2025 00:06:18 -0000 @@ -0,0 +1,143 @@ +/* */ + +/* + * Copyright (c) 2017 David Gwynne + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef _HEAP_H_ +#define _HEAP_H_ + +#include + +struct _heap_type { + int (*t_compare)(const void *, const void *); + unsigned int t_offset; /* offset of heap_entry in type */ +}; + +struct _heap_entry { + struct _heap_entry *he_left; + struct _heap_entry *he_child; + struct _heap_entry *he_nextsibling; +}; +#define HEAP_ENTRY(_entry) struct _heap_entry + +struct _heap { + struct _heap_entry *h_root; +}; + +#define HEAP_HEAD(_name) \ +struct _name { \ + struct _heap heap; \ +} + +static inline void +_heap_init(struct _heap *h) +{ + h->h_root = NULL; +} + +static inline int +_heap_empty(struct _heap *h) +{ + return (h->h_root == NULL); +} + +void _heap_insert(const struct _heap_type *, struct _heap *, void *); +void _heap_remove(const struct _heap_type *, struct _heap *, void *); +void *_heap_first(const struct _heap_type *, struct _heap *); +struct _heap_entry * + _heap_merge(const struct _heap_type *, + struct _heap_entry *, struct _heap_entry *); +void *_heap_extract(const struct _heap_type *, struct _heap *); +void *_heap_cextract(const struct _heap_type *, struct _heap *, + const void *); + +#define HEAP_INITIALIZER(_head) { { NULL } } + +#define HEAP_PROTOTYPE(_name, _type) \ +extern const struct _heap_type *const _name##_HEAP_TYPE; \ + \ +static __unused inline void \ +_name##_HEAP_INIT(struct _name *head) \ +{ \ + _heap_init(&head->heap); \ +} \ + \ +static __unused inline void \ +_name##_HEAP_INSERT(struct _name *head, struct _type *elm) \ +{ \ + _heap_insert(_name##_HEAP_TYPE, &head->heap, elm); \ +} \ + \ +static __unused inline void \ +_name##_HEAP_REMOVE(struct _name *head, struct _type *elm) \ +{ \ + _heap_remove(_name##_HEAP_TYPE, &head->heap, elm); \ +} \ + \ +static __unused inline struct _type * \ +_name##_HEAP_FIRST(struct _name *head) \ +{ \ + return _heap_first(_name##_HEAP_TYPE, &head->heap); \ +} \ + \ +static __unused inline void \ +_name##_HEAP_MERGE(struct _name *head1, struct _name *head2) \ +{ \ + head1->heap.h_root = _heap_merge(_name##_HEAP_TYPE, \ + head1->heap.h_root, head2->heap.h_root); \ +} \ + \ +static __unused inline struct _type * \ +_name##_HEAP_EXTRACT(struct _name *head) \ +{ \ + return _heap_extract(_name##_HEAP_TYPE, &head->heap); \ +} \ + \ +static __unused inline struct _type * \ +_name##_HEAP_CEXTRACT(struct _name *head, const struct _type *key) \ +{ \ + return _heap_cextract(_name##_HEAP_TYPE, &head->heap, key); \ +} \ + \ +static __unused inline int \ +_name##_HEAP_EMPTY(struct _name *head) \ +{ \ + return _heap_empty(&head->heap); \ +} + +#define HEAP_GENERATE(_name, _type, _field, _cmp) \ +static int \ +_name##_HEAP_COMPARE(const void *lptr, const void *rptr) \ +{ \ + const struct _type *l = lptr, *r = rptr; \ + return _cmp(l, r); \ +} \ +static const struct _heap_type _name##_HEAP_INFO = { \ + _name##_HEAP_COMPARE, \ + offsetof(struct _type, _field), \ +}; \ +const struct _heap_type *const _name##_HEAP_TYPE = &_name##_HEAP_INFO + +#define HEAP_INIT(_name, _h) _name##_HEAP_INIT((_h)) +#define HEAP_INSERT(_name, _h, _e) _name##_HEAP_INSERT((_h), (_e)) +#define HEAP_REMOVE(_name, _h, _e) _name##_HEAP_REMOVE((_h), (_e)) +#define HEAP_FIRST(_name, _h) _name##_HEAP_FIRST((_h)) +#define HEAP_MERGE(_name, _h1, _h2) _name##_HEAP_MERGE((_h1), (_h2)) +#define HEAP_EXTRACT(_name, _h) _name##_HEAP_EXTRACT((_h)) +#define HEAP_CEXTRACT(_name, _h, _k) _name##_HEAP_CEXTRACT((_h), (_k)) +#define HEAP_EMPTY(_name, _h) _name##_HEAP_EMPTY((_h)) + +#endif /* _HEAP_H_ */ Index: sys/systm.h =================================================================== RCS file: /cvs/src/sys/sys/systm.h,v diff -u -p -r1.171 systm.h --- sys/systm.h 28 May 2024 12:50:23 -0000 1.171 +++ sys/systm.h 13 May 2025 00:06:18 -0000 @@ -256,7 +256,7 @@ void start_periodic_resettodr(void); void stop_periodic_resettodr(void); void sleep_setup(const volatile void *, int, const char *); -int sleep_finish(int, int); +int sleep_finish(uint64_t, int); void sleep_queue_init(void); struct cond; @@ -265,7 +265,7 @@ void cond_wait(struct cond *, const char void cond_signal(struct cond *); #define INFSLP UINT64_MAX -#define MAXTSLP (UINT64_MAX - 1) +#define MAXTSLP INT64_MAX struct mutex; struct rwlock; Index: sys/time.h =================================================================== RCS file: /cvs/src/sys/sys/time.h,v diff -u -p -r1.66 time.h --- sys/time.h 17 Oct 2023 00:04:02 -0000 1.66 +++ sys/time.h 13 May 2025 00:06:18 -0000 @@ -327,6 +327,8 @@ time_t getuptime(void); uint64_t nsecuptime(void); uint64_t getnsecuptime(void); +uint64_t getnsectime(void); + struct proc; int clock_gettime(struct proc *, clockid_t, struct timespec *); Index: sys/timeout.h =================================================================== RCS file: /cvs/src/sys/sys/timeout.h,v diff -u -p -r1.50 timeout.h --- sys/timeout.h 11 Aug 2024 00:49:34 -0000 1.50 +++ sys/timeout.h 13 May 2025 00:06:18 -0000 @@ -28,6 +28,7 @@ #define _SYS_TIMEOUT_H_ #include +#include struct circq { struct circq *next; /* next element */ @@ -35,14 +36,16 @@ struct circq { }; struct timeout { - struct circq to_list; /* timeout queue, don't move */ - struct timespec to_abstime; /* absolute time to run at */ + union { + HEAP_ENTRY(timeout) to_heap; + struct circq to_list; + } to_entry; /* timeout queue, don't move */ + uint64_t to_time; /* when to fire event */ void (*to_func)(void *); /* function to call */ void *to_arg; /* function argument */ #if 1 /* NKCOV > 0 */ struct process *to_process; /* kcov identifier */ #endif - int to_time; /* ticks on event */ int to_flags; /* misc flags */ int to_kclock; /* abstime's kernel clock */ }; @@ -51,10 +54,11 @@ struct timeout { * flags in the to_flags field. */ #define TIMEOUT_PROC 0x01 /* needs a process context */ -#define TIMEOUT_ONQUEUE 0x02 /* on any timeout queue */ -#define TIMEOUT_INITIALIZED 0x04 /* initialized */ -#define TIMEOUT_TRIGGERED 0x08 /* running or ran */ -#define TIMEOUT_MPSAFE 0x10 /* run without kernel lock */ +#define TIMEOUT_MPSAFE 0x02 /* run without kernel lock */ +#define TIMEOUT_ONQUEUE 0x04 /* on any timeout queue */ +#define TIMEOUT_ONHEAP 0x08 /* on any timeout queue */ +#define TIMEOUT_INITIALIZED 0x10 /* initialized */ +#define TIMEOUT_TRIGGERED 0x20 /* running or ran */ struct timeoutstat { uint64_t tos_added; /* timeout_add*(9) calls */ @@ -86,11 +90,10 @@ int timeout_sysctl(void *, size_t *, voi #define KCLOCK_NONE (-1) /* dummy clock for sanity checks */ #define KCLOCK_UPTIME 0 /* uptime clock; time since boot */ -#define KCLOCK_MAX 1 +#define KCLOCK_REALTIME 1 +#define KCLOCK_MAX 2 #define TIMEOUT_INITIALIZER_FLAGS(_fn, _arg, _kclock, _flags) { \ - .to_list = { NULL, NULL }, \ - .to_abstime = { .tv_sec = 0, .tv_nsec = 0 }, \ .to_func = (_fn), \ .to_arg = (_arg), \ .to_time = 0, \