Index: kern/kern_timeout.c =================================================================== RCS file: /cvs/src/sys/kern/kern_timeout.c,v diff -u -p -r1.106 kern_timeout.c --- kern/kern_timeout.c 24 May 2025 00:19:09 -0000 1.106 +++ kern/kern_timeout.c 25 May 2025 00:11:08 -0000 @@ -48,6 +48,11 @@ #include #endif +CTASSERT((TIMEOUT_PROC|TIMEOUT_MPSAFE) < 4); + +HEAP_HEAD(timeout_heap); +HEAP_PROTOTYPE(timeout_heap, timeout); + struct timeout_ctx { struct circq *tctx_todo; struct timeout *tctx_running; @@ -64,21 +69,10 @@ struct mutex timeout_mutex = MUTEX_INITI void *softclock_si; /* [I] softclock() interrupt handle */ struct timeoutstat tostat; /* [T] statistics and totals */ -/* - * Timeouts are kept in a hierarchical timing wheel. The to_time is the value - * of the global variable "ticks" when the timeout should be called. There are - * four levels with 256 buckets each. - */ -#define WHEELCOUNT 4 -#define WHEELSIZE 256 -#define WHEELMASK 255 -#define WHEELBITS 8 -#define BUCKETS (WHEELCOUNT * WHEELSIZE) - -struct circq timeout_wheel[BUCKETS]; /* [T] Tick-based timeouts */ -struct circq timeout_wheel_kc[BUCKETS]; /* [T] Clock-based timeouts */ struct circq timeout_new; /* [T] New, unscheduled timeouts */ -struct circq timeout_todo; /* [T] Due or needs rescheduling */ +struct timeout_heap timeout_heap_ut; /* [T] Uptime based timeouts */ +struct circq timeout_todo; /* [T] Due timeouts */ + static struct timeout_ctx timeout_ctx_si = { .tctx_todo = &timeout_todo, /* [I] */ .tctx_running = NULL, /* [T] */ @@ -94,32 +88,31 @@ static struct timeout_ctx timeout_ctx_pr .tctx_todo = &timeout_proc_mp, /* [I] */ .tctx_running = NULL, /* [T] */ }; +#else +#define timeout_ctx_proc_mp timeout_ctx_proc #endif -time_t timeout_level_width[WHEELCOUNT]; /* [I] Wheel level width (seconds) */ -struct timespec tick_ts; /* [I] Length of a tick (1/hz secs) */ +static struct timeout_ctx * const timeout_contexts[] = { + [0] = &timeout_ctx_si, + [TIMEOUT_PROC] = &timeout_ctx_proc, + [TIMEOUT_MPSAFE] = &timeout_ctx_si, /* XXX */ + [TIMEOUT_MPSAFE|TIMEOUT_PROC] = &timeout_ctx_proc_mp, +}; struct kclock { - struct timespec kc_lastscan; /* [T] Clock time at last wheel scan */ - struct timespec kc_late; /* [T] Late if due prior */ - struct timespec kc_offset; /* [T] Offset from primary kclock */ -} timeout_kclock[KCLOCK_MAX]; - -#define MASKWHEEL(wheel, time) (((time) >> ((wheel)*WHEELBITS)) & WHEELMASK) - -#define BUCKET(rel, abs) \ - (timeout_wheel[ \ - ((rel) <= (1 << (2*WHEELBITS))) \ - ? ((rel) <= (1 << WHEELBITS)) \ - ? MASKWHEEL(0, (abs)) \ - : MASKWHEEL(1, (abs)) + WHEELSIZE \ - : ((rel) <= (1 << (3*WHEELBITS))) \ - ? MASKWHEEL(2, (abs)) + 2*WHEELSIZE \ - : MASKWHEEL(3, (abs)) + 3*WHEELSIZE]) - -#define MOVEBUCKET(wheel, time) \ - CIRCQ_CONCAT(&timeout_todo, \ - &timeout_wheel[MASKWHEEL((wheel), (time)) + (wheel)*WHEELSIZE]) + const char *kc_name; + struct timeout_heap *kc_heap; + uint64_t (*kc_gettime)(void); +}; + +static const struct kclock timeout_kclocks[KCLOCK_MAX] = { /* [I] */ + [KCLOCK_UPTIME] = { + .kc_name = "uptime", + .kc_heap = &timeout_heap_ut, + .kc_gettime = getnsecuptime + }, + /* KCLOCK_REALTIME? */ +}; /* * Circular queue definitions. @@ -135,7 +128,6 @@ struct kclock { (list)->next->prev = (elem); \ (list)->next = (elem); \ (elem)->prev = (list); \ - tostat.tos_pending++; \ } while (0) #define CIRCQ_INSERT_TAIL(list, elem) do { \ @@ -143,7 +135,6 @@ struct kclock { (elem)->next = (list); \ (list)->prev->next = (elem); \ (list)->prev = (elem); \ - tostat.tos_pending++; \ } while (0) #define CIRCQ_CONCAT(fst, snd) do { \ @@ -161,7 +152,6 @@ struct kclock { (elem)->prev->next = (elem)->next; \ _Q_INVALIDATE((elem)->prev); \ _Q_INVALIDATE((elem)->next); \ - tostat.tos_pending--; \ } while (0) #define CIRCQ_FIRST(elem) ((elem)->next) @@ -196,15 +186,11 @@ struct lock_type timeout_spinlock_type = void softclock(void *); void softclock_create_thread(void *); -void softclock_process_kclock_timeout(struct timeout *, int); -void softclock_process_tick_timeout(struct timeout *, int); void softclock_thread(void *); #ifdef MULTIPROCESSOR void softclock_thread_mp(void *); #endif void timeout_barrier_timeout(void *); -uint32_t timeout_bucket(const struct timeout *); -uint32_t timeout_maskwheel(uint32_t, const struct timespec *); void timeout_run(struct timeout_ctx *, struct timeout *); /* @@ -243,33 +229,38 @@ timeout_sync_leave(int needsproc) * We have to beware of wrapping ints. * We use the fact that any element added to the queue must be added with a * positive time. That means that any element `to' on the queue cannot be - * scheduled to timeout further in time than INT_MAX, but to->to_time can + * scheduled to timeout further in time than INT64_MAX, but to->to_time can * be positive or negative so comparing it with anything is dangerous. * The only way we can use the to->to_time value in any predictable way * is when we calculate how far in the future `to' will timeout - - * "to->to_time - ticks". The result will always be positive for future + * "to->to_time - getnsecuptime()". The result will be positive for future * timeouts and 0 or negative for due timeouts. */ +static inline int +timeout_cmp(const struct timeout *a, const struct timeout *b) +{ + int64_t delta = a->to_time - b->to_time; + + if (delta < 0) + return (-1); + if (delta > 0) + return (1); + return (0); +} + +HEAP_GENERATE(timeout_heap, timeout, to_entry.to_heap, timeout_cmp); + void timeout_startup(void) { - int b, level; - CIRCQ_INIT(&timeout_new); + HEAP_INIT(timeout_heap, &timeout_heap_ut); CIRCQ_INIT(&timeout_todo); CIRCQ_INIT(&timeout_proc); #ifdef MULTIPROCESSOR CIRCQ_INIT(&timeout_proc_mp); #endif - for (b = 0; b < nitems(timeout_wheel); b++) - CIRCQ_INIT(&timeout_wheel[b]); - for (b = 0; b < nitems(timeout_wheel_kc); b++) - CIRCQ_INIT(&timeout_wheel_kc[b]); - - for (level = 0; level < nitems(timeout_level_width); level++) - timeout_level_width[level] = 2 << (level * WHEELBITS); - NSEC_TO_TIMESPEC(tick_nsec, &tick_ts); } void @@ -314,39 +305,83 @@ timeout_set_proc(struct timeout *new, vo timeout_set_flags(new, fn, arg, KCLOCK_NONE, TIMEOUT_PROC); } -int -timeout_add(struct timeout *new, int to_ticks) +static inline void +timeout_list_insert(struct circq *tlist, struct timeout *to) +{ + CIRCQ_INSERT_TAIL(tlist, &to->to_entry.to_list); +} + +static inline int +timeout_get_kclock(const struct timeout *to) +{ + int kclock = to->to_kclock; + if (kclock == KCLOCK_NONE) + kclock = KCLOCK_UPTIME; + return (kclock); +} + +static inline struct timeout_heap * +timeout_get_heap(const struct timeout *to) +{ + return (timeout_kclocks[timeout_get_kclock(to)].kc_heap); +} + +static inline void +timeout_heap_insert(struct timeout_heap *theap, struct timeout *to) +{ + SET(to->to_flags, TIMEOUT_ONHEAP); + HEAP_INSERT(timeout_heap, theap, to); +} + +static inline void +timeout_heap_remove(struct timeout_heap *theap, struct timeout *to) +{ + HEAP_REMOVE(timeout_heap, theap, to); + CLR(to->to_flags, TIMEOUT_ONHEAP); +} + +static inline struct timeout * +timeout_heap_extract(struct timeout_heap *theap, const struct timeout *now) +{ + struct timeout *to; + + to = HEAP_CEXTRACT(timeout_heap, theap, now); + if (to != NULL) + CLR(to->to_flags, TIMEOUT_ONHEAP); + + return (to); +} + +static void +timeout_remove(struct timeout *to) +{ + if (ISSET(to->to_flags, TIMEOUT_ONHEAP)) + timeout_heap_remove(timeout_get_heap(to), to); + else + CIRCQ_REMOVE(&to->to_entry.to_list); +} + +static int +_timeout_add(struct timeout *to, uint64_t to_time) { - int old_time; int ret = 1; - KASSERT(ISSET(new->to_flags, TIMEOUT_INITIALIZED)); - KASSERT(new->to_kclock == KCLOCK_NONE); - KASSERT(to_ticks >= 0); + KASSERT(ISSET(to->to_flags, TIMEOUT_INITIALIZED)); mtx_enter(&timeout_mutex); + CLR(to->to_flags, TIMEOUT_TRIGGERED); - /* Initialize the time here, it won't change. */ - old_time = new->to_time; - new->to_time = to_ticks + ticks; - CLR(new->to_flags, TIMEOUT_TRIGGERED); - - /* - * If this timeout already is scheduled and now is moved - * earlier, reschedule it now. Otherwise leave it in place - * and let it be rescheduled later. - */ - if (ISSET(new->to_flags, TIMEOUT_ONQUEUE)) { - if (new->to_time - ticks < old_time - ticks) { - CIRCQ_REMOVE(&new->to_list); - CIRCQ_INSERT_TAIL(&timeout_new, &new->to_list); - } + if (ISSET(to->to_flags, TIMEOUT_ONQUEUE)) { + timeout_remove(to); tostat.tos_readded++; ret = 0; } else { - SET(new->to_flags, TIMEOUT_ONQUEUE); - CIRCQ_INSERT_TAIL(&timeout_new, &new->to_list); + tostat.tos_pending++; + SET(to->to_flags, TIMEOUT_ONQUEUE); } + + to->to_time = to_time; + timeout_list_insert(&timeout_new, to); #if NKCOV > 0 if (!kcov_cold) new->to_process = curproc->p_p; @@ -357,25 +392,39 @@ timeout_add(struct timeout *new, int to_ return ret; } -static inline int -timeout_add_ticks(struct timeout *to, uint64_t to_ticks) +static int +timeout_add_interval(struct timeout *to, uint64_t nsecs) { - if (to_ticks > INT_MAX) - to_ticks = INT_MAX; + KASSERT(to->to_kclock == KCLOCK_NONE); + + if (nsecs > MAXTSLP) + nsecs = MAXTSLP; - return timeout_add(to, (int)to_ticks); + return _timeout_add(to, getnsecuptime() + nsecs); +} + +int +timeout_add(struct timeout *to, int to_ticks) +{ + uint64_t nsecs; + + KASSERT(to_ticks >= 0); + /* to_ticks is a 31bit int, so this can't overflow 64bits */ + nsecs = (uint64_t)to_ticks * (uint64_t)tick_nsec; + + return timeout_add_interval(to, nsecs); } int timeout_add_sec(struct timeout *to, int secs) { - uint64_t to_ticks; + uint64_t nsecs; KASSERT(secs >= 0); /* secs is a 31bit int, so this can't overflow 64bits */ - to_ticks = (uint64_t)hz * (uint64_t)secs; + nsecs = (uint64_t)secs * 1000000000ULL; - return timeout_add_ticks(to, to_ticks); + return timeout_add_interval(to, nsecs); } /* @@ -390,73 +439,46 @@ timeout_add_sec(struct timeout *to, int int timeout_add_msec(struct timeout *to, uint64_t msecs) { - if (msecs >= (UINT64_MAX / 1000)) - return timeout_add(to, INT_MAX); + if (msecs >= (UINT64_MAX / 1000000)) + return timeout_add_interval(to, MAXTSLP); - return timeout_add_usec(to, msecs * 1000); + return timeout_add_nsec(to, msecs * 1000000); } int timeout_add_usec(struct timeout *to, uint64_t usecs) { - uint64_t to_ticks; - - if (usecs >= (UINT64_MAX - tick)) - return timeout_add(to, INT_MAX); + if (usecs >= (UINT64_MAX / 1000)) + return timeout_add_interval(to, MAXTSLP); - to_ticks = (usecs + (tick - 1)) / tick; - - return timeout_add_ticks(to, to_ticks); + return timeout_add_nsec(to, usecs * 1000); } int timeout_add_nsec(struct timeout *to, uint64_t nsecs) { - uint64_t to_ticks; - - if (nsecs >= (UINT64_MAX - tick_nsec)) - return timeout_add(to, INT_MAX); + if (nsecs > 0) { + if (nsecs >= (MAXTSLP - tick_nsec)) + return timeout_add_interval(to, MAXTSLP); - to_ticks = (nsecs + (tick_nsec - 1)) / tick_nsec; + /* push the deadline into the next clock tick */ + nsecs += tick_nsec - 1; + } - return timeout_add_ticks(to, to_ticks); + return timeout_add_interval(to, nsecs); } int timeout_abs_ts(struct timeout *to, const struct timespec *abstime) { - struct timespec old_abstime; - int ret = 1; - - mtx_enter(&timeout_mutex); + uint64_t nsec; - KASSERT(ISSET(to->to_flags, TIMEOUT_INITIALIZED)); KASSERT(to->to_kclock == KCLOCK_UPTIME); - old_abstime = to->to_abstime; - to->to_abstime = *abstime; - CLR(to->to_flags, TIMEOUT_TRIGGERED); - - if (ISSET(to->to_flags, TIMEOUT_ONQUEUE)) { - if (timespeccmp(abstime, &old_abstime, <)) { - CIRCQ_REMOVE(&to->to_list); - CIRCQ_INSERT_TAIL(&timeout_new, &to->to_list); - } - tostat.tos_readded++; - ret = 0; - } else { - SET(to->to_flags, TIMEOUT_ONQUEUE); - CIRCQ_INSERT_TAIL(&timeout_new, &to->to_list); - } -#if NKCOV > 0 - if (!kcov_cold) - to->to_process = curproc->p_p; -#endif - tostat.tos_added++; - - mtx_leave(&timeout_mutex); + nsec = TIMESPEC_TO_NSEC(abstime); + /* XXX check diff with getnsecuptime() is less than MAXTSLP? */ - return ret; + return _timeout_add(to, nsec); } int @@ -466,8 +488,9 @@ timeout_del(struct timeout *to) mtx_enter(&timeout_mutex); if (ISSET(to->to_flags, TIMEOUT_ONQUEUE)) { - CIRCQ_REMOVE(&to->to_list); + timeout_remove(to); CLR(to->to_flags, TIMEOUT_ONQUEUE); + tostat.tos_pending--; tostat.tos_cancelled++; ret = 1; } @@ -478,6 +501,12 @@ timeout_del(struct timeout *to) return ret; } +static inline struct timeout_ctx * +timeout_context(const struct timeout *to) +{ + return timeout_contexts[to->to_flags & (TIMEOUT_PROC|TIMEOUT_MPSAFE)]; +} + int timeout_del_barrier(struct timeout *to) { @@ -508,24 +537,15 @@ timeout_barrier(struct timeout *to) cond_init(&c); mtx_enter(&timeout_mutex); - if (ISSET(flags, TIMEOUT_PROC)) { -#ifdef MULTIPROCESSOR - if (ISSET(flags, TIMEOUT_MPSAFE)) - tctx = &timeout_ctx_proc_mp; - else -#endif - tctx = &timeout_ctx_proc; - } else - tctx = &timeout_ctx_si; - + tctx = timeout_context(to); if (tctx->tctx_running != to) { mtx_leave(&timeout_mutex); return; } - barrier.to_time = ticks; + tostat.tos_pending++; SET(barrier.to_flags, TIMEOUT_ONQUEUE); - CIRCQ_INSERT_HEAD(tctx->tctx_todo, &barrier.to_list); + CIRCQ_INSERT_HEAD(tctx->tctx_todo, &barrier.to_entry.to_list); mtx_leave(&timeout_mutex); /* @@ -545,49 +565,6 @@ timeout_barrier_timeout(void *arg) cond_signal(c); } -uint32_t -timeout_bucket(const struct timeout *to) -{ - struct timespec diff, shifted_abstime; - struct kclock *kc; - uint32_t level; - - KASSERT(to->to_kclock == KCLOCK_UPTIME); - kc = &timeout_kclock[to->to_kclock]; - - KASSERT(timespeccmp(&kc->kc_lastscan, &to->to_abstime, <)); - timespecsub(&to->to_abstime, &kc->kc_lastscan, &diff); - for (level = 0; level < nitems(timeout_level_width) - 1; level++) { - if (diff.tv_sec < timeout_level_width[level]) - break; - } - timespecadd(&to->to_abstime, &kc->kc_offset, &shifted_abstime); - return level * WHEELSIZE + timeout_maskwheel(level, &shifted_abstime); -} - -/* - * Hash the absolute time into a bucket on a given level of the wheel. - * - * The complete hash is 32 bits. The upper 25 bits are seconds, the - * lower 7 bits are nanoseconds. tv_nsec is a positive value less - * than one billion so we need to divide it to isolate the desired - * bits. We can't just shift it. - * - * The level is used to isolate an 8-bit portion of the hash. The - * resulting number indicates which bucket the absolute time belongs - * in on the given level of the wheel. - */ -uint32_t -timeout_maskwheel(uint32_t level, const struct timespec *abstime) -{ - uint32_t hi, lo; - - hi = abstime->tv_sec << 7; - lo = abstime->tv_nsec / 7812500; - - return ((hi | lo) >> (level * WHEELBITS)) & WHEELMASK; -} - /* * This is called from hardclock() on the primary CPU at the start of * every tick. @@ -595,69 +572,30 @@ timeout_maskwheel(uint32_t level, const void timeout_hardclock_update(void) { - struct timespec elapsed, now; - struct kclock *kc; - struct timespec *lastscan = &timeout_kclock[KCLOCK_UPTIME].kc_lastscan; - int b, done, first, i, last, level, need_softclock = 1, off; + int need_softclock = 0; mtx_enter(&timeout_mutex); + if (!CIRCQ_EMPTY(&timeout_new)) + need_softclock = 1; + else { + size_t i; - MOVEBUCKET(0, ticks); - if (MASKWHEEL(0, ticks) == 0) { - MOVEBUCKET(1, ticks); - if (MASKWHEEL(1, ticks) == 0) { - MOVEBUCKET(2, ticks); - if (MASKWHEEL(2, ticks) == 0) - MOVEBUCKET(3, ticks); - } - } - - /* - * Dump the buckets that expired while we were away. - * - * If the elapsed time has exceeded a level's limit then we need - * to dump every bucket in the level. We have necessarily completed - * a lap of that level, too, so we need to process buckets in the - * next level. - * - * Otherwise we need to compare indices: if the index of the first - * expired bucket is greater than that of the last then we have - * completed a lap of the level and need to process buckets in the - * next level. - */ - nanouptime(&now); - timespecsub(&now, lastscan, &elapsed); - for (level = 0; level < nitems(timeout_level_width); level++) { - first = timeout_maskwheel(level, lastscan); - if (elapsed.tv_sec >= timeout_level_width[level]) { - last = (first == 0) ? WHEELSIZE - 1 : first - 1; - done = 0; - } else { - last = timeout_maskwheel(level, &now); - done = first <= last; - } - off = level * WHEELSIZE; - for (b = first;; b = (b + 1) % WHEELSIZE) { - CIRCQ_CONCAT(&timeout_todo, &timeout_wheel_kc[off + b]); - if (b == last) + for (i = 0; i < nitems(timeout_kclocks); i++) { + const struct kclock *kc = &timeout_kclocks[i]; + struct timeout *to; + struct timeout now; + + to = HEAP_FIRST(timeout_heap, kc->kc_heap); + if (to == NULL) + continue; + + now.to_time = kc->kc_gettime(); + if (timeout_cmp(&now, to) >= 0) { + need_softclock = 1; break; + } } - if (done) - break; - } - - /* - * Update the cached state for each kclock. - */ - for (i = 0; i < nitems(timeout_kclock); i++) { - kc = &timeout_kclock[i]; - timespecadd(&now, &kc->kc_offset, &kc->kc_lastscan); - timespecsub(&kc->kc_lastscan, &tick_ts, &kc->kc_late); } - - if (CIRCQ_EMPTY(&timeout_new) && CIRCQ_EMPTY(&timeout_todo)) - need_softclock = 0; - mtx_leave(&timeout_mutex); if (need_softclock) @@ -673,6 +611,7 @@ timeout_run(struct timeout_ctx *tctx, st MUTEX_ASSERT_LOCKED(&timeout_mutex); + tostat.tos_pending--; CLR(to->to_flags, TIMEOUT_ONQUEUE); SET(to->to_flags, TIMEOUT_TRIGGERED); @@ -698,61 +637,6 @@ timeout_run(struct timeout_ctx *tctx, st tctx->tctx_running = NULL; } -void -softclock_process_kclock_timeout(struct timeout *to, int new) -{ - struct kclock *kc = &timeout_kclock[to->to_kclock]; - - if (timespeccmp(&to->to_abstime, &kc->kc_lastscan, >)) { - tostat.tos_scheduled++; - if (!new) - tostat.tos_rescheduled++; - CIRCQ_INSERT_TAIL(&timeout_wheel_kc[timeout_bucket(to)], - &to->to_list); - return; - } - if (!new && timespeccmp(&to->to_abstime, &kc->kc_late, <=)) - tostat.tos_late++; - if (ISSET(to->to_flags, TIMEOUT_PROC)) { -#ifdef MULTIPROCESSOR - if (ISSET(to->to_flags, TIMEOUT_MPSAFE)) - CIRCQ_INSERT_TAIL(&timeout_proc_mp, &to->to_list); - else -#endif - CIRCQ_INSERT_TAIL(&timeout_proc, &to->to_list); - return; - } - timeout_run(&timeout_ctx_si, to); - tostat.tos_run_softclock++; -} - -void -softclock_process_tick_timeout(struct timeout *to, int new) -{ - int delta = to->to_time - ticks; - - if (delta > 0) { - tostat.tos_scheduled++; - if (!new) - tostat.tos_rescheduled++; - CIRCQ_INSERT_TAIL(&BUCKET(delta, to->to_time), &to->to_list); - return; - } - if (!new && delta < 0) - tostat.tos_late++; - if (ISSET(to->to_flags, TIMEOUT_PROC)) { -#ifdef MULTIPROCESSOR - if (ISSET(to->to_flags, TIMEOUT_MPSAFE)) - CIRCQ_INSERT_TAIL(&timeout_proc_mp, &to->to_list); - else -#endif - CIRCQ_INSERT_TAIL(&timeout_proc, &to->to_list); - return; - } - timeout_run(&timeout_ctx_si, to); - tostat.tos_run_softclock++; -} - /* * Timeouts are processed here instead of timeout_hardclock_update() * to avoid doing any more work at IPL_CLOCK than absolutely necessary. @@ -762,44 +646,63 @@ softclock_process_tick_timeout(struct ti void softclock(void *arg) { - struct timeout *first_new, *to; - int needsproc, new; + struct timeout now[KCLOCK_MAX]; + const struct kclock *kc; + struct timeout_heap *theap; + struct timeout *to; + struct timeout_ctx *tctx; + int needsproc; #ifdef MULTIPROCESSOR - int need_proc_mp; + int needsproc_mp; #endif + int i; - first_new = NULL; - new = 0; + for (i = 0; i < KCLOCK_MAX; i++) { + kc = &timeout_kclocks[i]; + now[i].to_time = kc->kc_gettime(); + } mtx_enter(&timeout_mutex); - if (!CIRCQ_EMPTY(&timeout_new)) - first_new = timeout_from_circq(CIRCQ_FIRST(&timeout_new)); - CIRCQ_CONCAT(&timeout_todo, &timeout_new); + tostat.tos_softclocks++; + + for (i = 0; i < KCLOCK_MAX; i++) { + kc = &timeout_kclocks[i]; + theap = kc->kc_heap; + + while ((to = timeout_heap_extract(theap, &now[i])) != NULL) { + tctx = timeout_context(to); + timeout_list_insert(tctx->tctx_todo, to); + } + } + + while (!CIRCQ_EMPTY(&timeout_new)) { + to = timeout_from_circq(CIRCQ_FIRST(&timeout_new)); + CIRCQ_REMOVE(&to->to_entry.to_list); + + if (timeout_cmp(&now[timeout_get_kclock(to)], to) >= 0) { + tctx = timeout_context(to); + timeout_list_insert(tctx->tctx_todo, to); + } else { + timeout_heap_insert(timeout_get_heap(to), to); + } + } + while (!CIRCQ_EMPTY(&timeout_todo)) { to = timeout_from_circq(CIRCQ_FIRST(&timeout_todo)); - CIRCQ_REMOVE(&to->to_list); - if (to == first_new) - new = 1; - if (to->to_kclock == KCLOCK_NONE) - softclock_process_tick_timeout(to, new); - else if (to->to_kclock == KCLOCK_UPTIME) - softclock_process_kclock_timeout(to, new); - else { - panic("%s: invalid to_clock: %d", - __func__, to->to_kclock); - } + CIRCQ_REMOVE(&to->to_entry.to_list); + timeout_run(&timeout_ctx_si, to); + tostat.tos_run_softclock++; } - tostat.tos_softclocks++; needsproc = !CIRCQ_EMPTY(&timeout_proc); #ifdef MULTIPROCESSOR - need_proc_mp = !CIRCQ_EMPTY(&timeout_proc_mp); + needsproc_mp = !CIRCQ_EMPTY(&timeout_proc_mp); #endif mtx_leave(&timeout_mutex); if (needsproc) wakeup(&timeout_proc); #ifdef MULTIPROCESSOR - if (need_proc_mp) + if (needsproc_mp) wakeup(&timeout_proc_mp); #endif } @@ -833,7 +736,7 @@ softclock_thread_run(struct timeout_ctx tostat.tos_thread_wakeups++; while (!CIRCQ_EMPTY(todo)) { to = timeout_from_circq(CIRCQ_FIRST(todo)); - CIRCQ_REMOVE(&to->to_list); + CIRCQ_REMOVE(&to->to_entry.to_list); timeout_run(tctx, to); tostat.tos_run_thread++; } @@ -878,31 +781,7 @@ softclock_thread_mp(void *arg) void timeout_adjust_ticks(int adj) { - struct timeout *to; - struct circq *p; - int new_ticks, b; - - /* adjusting the monotonic clock backwards would be a Bad Thing */ - if (adj <= 0) - return; - - mtx_enter(&timeout_mutex); - new_ticks = ticks + adj; - for (b = 0; b < nitems(timeout_wheel); b++) { - p = CIRCQ_FIRST(&timeout_wheel[b]); - while (p != &timeout_wheel[b]) { - to = timeout_from_circq(p); - p = CIRCQ_FIRST(p); - - /* when moving a timeout forward need to reinsert it */ - if (to->to_time - ticks < adj) - to->to_time = new_ticks; - CIRCQ_REMOVE(&to->to_list); - CIRCQ_INSERT_TAIL(&timeout_todo, &to->to_list); - } - } - ticks = new_ticks; - mtx_leave(&timeout_mutex); + printf("%s: adj %d\n", __func__, adj); } #endif @@ -927,12 +806,9 @@ const char *db_timespec(const struct tim const char * db_kclock(int kclock) { - switch (kclock) { - case KCLOCK_UPTIME: - return "uptime"; - default: - return "invalid"; - } + if (kclock < 0 || kclock > KCLOCK_MAX) + return ("invalid"); + return (timeout_kclocks[kclock].kc_name); } const char * @@ -965,11 +841,8 @@ db_show_callout_bucket(struct circq *buc void db_show_timeout(struct timeout *to, struct circq *bucket) { - struct timespec remaining; - struct kclock *kc; - char buf[8]; + uint64_t now = getnsecuptime(); db_expr_t offset; - struct circq *wheel; const char *name, *where; int width = sizeof(long) * 2; @@ -985,51 +858,28 @@ db_show_timeout(struct timeout *to, stru else if (bucket == &timeout_proc_mp) where = "thread-mp"; #endif - else { - if (to->to_kclock == KCLOCK_UPTIME) - wheel = timeout_wheel_kc; - else if (to->to_kclock == KCLOCK_NONE) - wheel = timeout_wheel; - else - goto invalid; - snprintf(buf, sizeof(buf), "%3ld/%1ld", - (bucket - wheel) % WHEELSIZE, - (bucket - wheel) / WHEELSIZE); - where = buf; - } - if (to->to_kclock == KCLOCK_UPTIME) { - kc = &timeout_kclock[to->to_kclock]; - timespecsub(&to->to_abstime, &kc->kc_lastscan, &remaining); - db_printf("%20s %8s %9s 0x%0*lx %s\n", - db_timespec(&remaining), db_kclock(to->to_kclock), where, - width, (ulong)to->to_arg, name); - } else if (to->to_kclock == KCLOCK_NONE) { - db_printf("%20d %8s %9s 0x%0*lx %s\n", - to->to_time - ticks, "ticks", where, - width, (ulong)to->to_arg, name); - } else - goto invalid; - return; - - invalid: - db_printf("%s: timeout 0x%p: invalid to_kclock: %d", - __func__, to, to->to_kclock); + else if (to->to_kclock == KCLOCK_UPTIME) + where = "timeout_heap_kc"; + else + where = "timeout_heap_si"; + + db_printf("%20lld %8s %9s 0x%0*lx %s\n", + to->to_time - now, "nsecs", where, + width, (ulong)to->to_arg, name); } void db_show_callout(db_expr_t addr, int haddr, db_expr_t count, char *modif) { - struct kclock *kc; int width = sizeof(long) * 2 + 2; - int b, i; db_printf("%20s %8s\n", "lastscan", "clock"); db_printf("%20d %8s\n", ticks, "ticks"); - for (i = 0; i < nitems(timeout_kclock); i++) { - kc = &timeout_kclock[i]; - db_printf("%20s %8s\n", - db_timespec(&kc->kc_lastscan), db_kclock(i)); - } +// for (i = 0; i < nitems(timeout_kclock); i++) { +// kc = &timeout_kclock[i]; +// db_printf("%20s %8s\n", +// db_timespec(&kc->kc_lastscan), db_kclock(i)); +// } db_printf("\n"); db_printf("%20s %8s %9s %*s %s\n", "remaining", "clock", "wheel", width, "arg", "func"); @@ -1039,9 +889,9 @@ db_show_callout(db_expr_t addr, int hadd #ifdef MULTIPROCESSOR db_show_callout_bucket(&timeout_proc_mp); #endif - for (b = 0; b < nitems(timeout_wheel); b++) - db_show_callout_bucket(&timeout_wheel[b]); - for (b = 0; b < nitems(timeout_wheel_kc); b++) - db_show_callout_bucket(&timeout_wheel_kc[b]); +// for (b = 0; b < nitems(timeout_wheel); b++) +// db_show_callout_bucket(&timeout_wheel[b]); +// for (b = 0; b < nitems(timeout_wheel_kc); b++) +// db_show_callout_bucket(&timeout_wheel_kc[b]); } #endif Index: kern/subr_heap.c =================================================================== RCS file: kern/subr_heap.c diff -N kern/subr_heap.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ kern/subr_heap.c 25 May 2025 00:11:08 -0000 @@ -0,0 +1,197 @@ +/* */ + +/* + * Copyright (c) 2017 David Gwynne + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include + +static inline struct _heap_entry * +heap_n2e(const struct _heap_type *t, void *node) +{ + unsigned long addr = (unsigned long)node; + + return ((struct _heap_entry *)(addr + t->t_offset)); +} + +static inline void * +heap_e2n(const struct _heap_type *t, struct _heap_entry *he) +{ + unsigned long addr = (unsigned long)he; + + return ((void *)(addr - t->t_offset)); +} + +struct _heap_entry * +_heap_merge(const struct _heap_type *t, + struct _heap_entry *he1, struct _heap_entry *he2) +{ + struct _heap_entry *hi, *lo; + struct _heap_entry *child; + + if (he1 == NULL) + return (he2); + if (he2 == NULL) + return (he1); + + if (t->t_compare(heap_e2n(t, he1), heap_e2n(t, he2)) < 0) { + lo = he1; + hi = he2; + } else { + hi = he1; + lo = he2; + } + + child = lo->he_child; + + hi->he_left = lo; + hi->he_nextsibling = child; + if (child != NULL) + child->he_left = hi; + lo->he_child = hi; + lo->he_left = NULL; + lo->he_nextsibling = NULL; + + return (lo); +} + +static inline void +_heap_sibling_remove(struct _heap_entry *he) +{ + if (he->he_left == NULL) + return; + + if (he->he_left->he_child == he) { + if ((he->he_left->he_child = he->he_nextsibling) != NULL) + he->he_nextsibling->he_left = he->he_left; + } else { + if ((he->he_left->he_nextsibling = he->he_nextsibling) != NULL) + he->he_nextsibling->he_left = he->he_left; + } + + he->he_left = NULL; + he->he_nextsibling = NULL; +} + +static inline struct _heap_entry * +_heap_2pass_merge(const struct _heap_type *t, struct _heap_entry *root) +{ + struct _heap_entry *node, *next = NULL; + struct _heap_entry *tmp, *list = NULL; + + node = root->he_child; + if (node == NULL) + return (NULL); + + root->he_child = NULL; + + /* first pass */ + for (next = node->he_nextsibling; next != NULL; + next = (node != NULL ? node->he_nextsibling : NULL)) { + tmp = next->he_nextsibling; + node = _heap_merge(t, node, next); + + /* insert head */ + node->he_nextsibling = list; + list = node; + node = tmp; + } + + /* odd child case */ + if (node != NULL) { + node->he_nextsibling = list; + list = node; + } + + /* second pass */ + while (list->he_nextsibling != NULL) { + tmp = list->he_nextsibling->he_nextsibling; + list = _heap_merge(t, list, list->he_nextsibling); + list->he_nextsibling = tmp; + } + + list->he_left = NULL; + list->he_nextsibling = NULL; + + return (list); +} + +void +_heap_insert(const struct _heap_type *t, struct _heap *h, void *node) +{ + struct _heap_entry *he = heap_n2e(t, node); + + he->he_left = NULL; + he->he_child = NULL; + he->he_nextsibling = NULL; + + h->h_root = _heap_merge(t, h->h_root, he); +} + +void +_heap_remove(const struct _heap_type *t, struct _heap *h, void *node) +{ + struct _heap_entry *he = heap_n2e(t, node); + + if (he->he_left == NULL) { + _heap_extract(t, h); + return; + } + + _heap_sibling_remove(he); + h->h_root = _heap_merge(t, h->h_root, _heap_2pass_merge(t, he)); +} + +void * +_heap_first(const struct _heap_type *t, struct _heap *h) +{ + struct _heap_entry *first = h->h_root; + + if (first == NULL) + return (NULL); + + return (heap_e2n(t, first)); +} + +void * +_heap_extract(const struct _heap_type *t, struct _heap *h) +{ + struct _heap_entry *first = h->h_root; + + if (first == NULL) + return (NULL); + + h->h_root = _heap_2pass_merge(t, first); + + return (heap_e2n(t, first)); +} + +void * +_heap_cextract(const struct _heap_type *t, struct _heap *h, const void *key) +{ + struct _heap_entry *first = h->h_root; + void *node; + + if (first == NULL) + return (NULL); + + node = heap_e2n(t, first); + if (t->t_compare(key, node) < 0) + return (NULL); + + h->h_root = _heap_2pass_merge(t, first); + + return (node); +} Index: conf/files =================================================================== RCS file: /cvs/src/sys/conf/files,v diff -u -p -r1.745 files --- conf/files 23 Apr 2025 21:40:22 -0000 1.745 +++ conf/files 25 May 2025 00:11:08 -0000 @@ -764,6 +764,7 @@ file kern/subr_log.c file kern/subr_percpu.c file kern/subr_poison.c diagnostic file kern/subr_pool.c +file kern/subr_heap.c file kern/subr_tree.c file kern/dma_alloc.c file kern/subr_prf.c Index: sys/heap.h =================================================================== RCS file: sys/heap.h diff -N sys/heap.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/heap.h 25 May 2025 00:11:08 -0000 @@ -0,0 +1,143 @@ +/* */ + +/* + * Copyright (c) 2017 David Gwynne + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef _HEAP_H_ +#define _HEAP_H_ + +#include + +struct _heap_type { + int (*t_compare)(const void *, const void *); + unsigned int t_offset; /* offset of heap_entry in type */ +}; + +struct _heap_entry { + struct _heap_entry *he_left; + struct _heap_entry *he_child; + struct _heap_entry *he_nextsibling; +}; +#define HEAP_ENTRY(_entry) struct _heap_entry + +struct _heap { + struct _heap_entry *h_root; +}; + +#define HEAP_HEAD(_name) \ +struct _name { \ + struct _heap heap; \ +} + +static inline void +_heap_init(struct _heap *h) +{ + h->h_root = NULL; +} + +static inline int +_heap_empty(struct _heap *h) +{ + return (h->h_root == NULL); +} + +void _heap_insert(const struct _heap_type *, struct _heap *, void *); +void _heap_remove(const struct _heap_type *, struct _heap *, void *); +void *_heap_first(const struct _heap_type *, struct _heap *); +struct _heap_entry * + _heap_merge(const struct _heap_type *, + struct _heap_entry *, struct _heap_entry *); +void *_heap_extract(const struct _heap_type *, struct _heap *); +void *_heap_cextract(const struct _heap_type *, struct _heap *, + const void *); + +#define HEAP_INITIALIZER(_head) { { NULL } } + +#define HEAP_PROTOTYPE(_name, _type) \ +extern const struct _heap_type *const _name##_HEAP_TYPE; \ + \ +static __unused inline void \ +_name##_HEAP_INIT(struct _name *head) \ +{ \ + _heap_init(&head->heap); \ +} \ + \ +static __unused inline void \ +_name##_HEAP_INSERT(struct _name *head, struct _type *elm) \ +{ \ + _heap_insert(_name##_HEAP_TYPE, &head->heap, elm); \ +} \ + \ +static __unused inline void \ +_name##_HEAP_REMOVE(struct _name *head, struct _type *elm) \ +{ \ + _heap_remove(_name##_HEAP_TYPE, &head->heap, elm); \ +} \ + \ +static __unused inline struct _type * \ +_name##_HEAP_FIRST(struct _name *head) \ +{ \ + return _heap_first(_name##_HEAP_TYPE, &head->heap); \ +} \ + \ +static __unused inline void \ +_name##_HEAP_MERGE(struct _name *head1, struct _name *head2) \ +{ \ + head1->heap.h_root = _heap_merge(_name##_HEAP_TYPE, \ + head1->heap.h_root, head2->heap.h_root); \ +} \ + \ +static __unused inline struct _type * \ +_name##_HEAP_EXTRACT(struct _name *head) \ +{ \ + return _heap_extract(_name##_HEAP_TYPE, &head->heap); \ +} \ + \ +static __unused inline struct _type * \ +_name##_HEAP_CEXTRACT(struct _name *head, const struct _type *key) \ +{ \ + return _heap_cextract(_name##_HEAP_TYPE, &head->heap, key); \ +} \ + \ +static __unused inline int \ +_name##_HEAP_EMPTY(struct _name *head) \ +{ \ + return _heap_empty(&head->heap); \ +} + +#define HEAP_GENERATE(_name, _type, _field, _cmp) \ +static int \ +_name##_HEAP_COMPARE(const void *lptr, const void *rptr) \ +{ \ + const struct _type *l = lptr, *r = rptr; \ + return _cmp(l, r); \ +} \ +static const struct _heap_type _name##_HEAP_INFO = { \ + _name##_HEAP_COMPARE, \ + offsetof(struct _type, _field), \ +}; \ +const struct _heap_type *const _name##_HEAP_TYPE = &_name##_HEAP_INFO + +#define HEAP_INIT(_name, _h) _name##_HEAP_INIT((_h)) +#define HEAP_INSERT(_name, _h, _e) _name##_HEAP_INSERT((_h), (_e)) +#define HEAP_REMOVE(_name, _h, _e) _name##_HEAP_REMOVE((_h), (_e)) +#define HEAP_FIRST(_name, _h) _name##_HEAP_FIRST((_h)) +#define HEAP_MERGE(_name, _h1, _h2) _name##_HEAP_MERGE((_h1), (_h2)) +#define HEAP_EXTRACT(_name, _h) _name##_HEAP_EXTRACT((_h)) +#define HEAP_CEXTRACT(_name, _h, _k) _name##_HEAP_CEXTRACT((_h), (_k)) +#define HEAP_EMPTY(_name, _h) _name##_HEAP_EMPTY((_h)) + +#endif /* _HEAP_H_ */ Index: sys/systm.h =================================================================== RCS file: /cvs/src/sys/sys/systm.h,v diff -u -p -r1.171 systm.h --- sys/systm.h 28 May 2024 12:50:23 -0000 1.171 +++ sys/systm.h 25 May 2025 00:11:08 -0000 @@ -265,7 +265,7 @@ void cond_wait(struct cond *, const char void cond_signal(struct cond *); #define INFSLP UINT64_MAX -#define MAXTSLP (UINT64_MAX - 1) +#define MAXTSLP INT64_MAX struct mutex; struct rwlock; Index: sys/timeout.h =================================================================== RCS file: /cvs/src/sys/sys/timeout.h,v diff -u -p -r1.51 timeout.h --- sys/timeout.h 23 May 2025 23:56:14 -0000 1.51 +++ sys/timeout.h 25 May 2025 00:11:08 -0000 @@ -28,6 +28,7 @@ #define _SYS_TIMEOUT_H_ #include +#include struct circq { struct circq *next; /* next element */ @@ -35,14 +36,16 @@ struct circq { }; struct timeout { - struct circq to_list; /* timeout queue, don't move */ - struct timespec to_abstime; /* absolute time to run at */ + union { + HEAP_ENTRY(timeout) to_heap; + struct circq to_list; + } to_entry; /* timeout queue, don't move */ + uint64_t to_time; /* when to fire event */ void (*to_func)(void *); /* function to call */ void *to_arg; /* function argument */ #if 1 /* NKCOV > 0 */ struct process *to_process; /* kcov identifier */ #endif - int to_time; /* ticks on event */ int to_flags; /* misc flags */ int to_kclock; /* abstime's kernel clock */ }; @@ -51,10 +54,11 @@ struct timeout { * flags in the to_flags field. */ #define TIMEOUT_PROC 0x01 /* needs a process context */ -#define TIMEOUT_ONQUEUE 0x02 /* on any timeout queue */ -#define TIMEOUT_INITIALIZED 0x04 /* initialized */ -#define TIMEOUT_TRIGGERED 0x08 /* running or ran */ -#define TIMEOUT_MPSAFE 0x10 /* run without kernel lock */ +#define TIMEOUT_MPSAFE 0x02 /* run without kernel lock */ +#define TIMEOUT_ONQUEUE 0x04 /* on any timeout queue */ +#define TIMEOUT_ONHEAP 0x08 /* on any timeout queue */ +#define TIMEOUT_INITIALIZED 0x10 /* initialized */ +#define TIMEOUT_TRIGGERED 0x20 /* running or ran */ struct timeoutstat { uint64_t tos_added; /* timeout_add*(9) calls */ @@ -89,8 +93,6 @@ int timeout_sysctl(void *, size_t *, voi #define KCLOCK_MAX 1 #define TIMEOUT_INITIALIZER_FLAGS(_fn, _arg, _kclock, _flags) { \ - .to_list = { NULL, NULL }, \ - .to_abstime = { .tv_sec = 0, .tv_nsec = 0 }, \ .to_func = (_fn), \ .to_arg = (_arg), \ .to_time = 0, \