Index: arch/amd64/amd64/cpu.c =================================================================== RCS file: /cvs/src/sys/arch/amd64/amd64/cpu.c,v retrieving revision 1.175 diff -u -p -r1.175 cpu.c --- arch/amd64/amd64/cpu.c 31 Jul 2023 04:01:07 -0000 1.175 +++ arch/amd64/amd64/cpu.c 28 Aug 2023 02:55:00 -0000 @@ -1075,7 +1075,7 @@ cpu_hatch(void *v) lcr8(0); intr_enable(); - nanouptime(&ci->ci_schedstate.spc_runtime); + ci->ci_schedstate.spc_runtime = nsecuptime(); splx(s); lapic_startclock(); Index: arch/amd64/include/cpu_full.h =================================================================== RCS file: /cvs/src/sys/arch/amd64/include/cpu_full.h,v retrieving revision 1.5 diff -u -p -r1.5 cpu_full.h --- arch/amd64/include/cpu_full.h 17 May 2019 19:07:47 -0000 1.5 +++ arch/amd64/include/cpu_full.h 28 Aug 2023 02:55:00 -0000 @@ -58,6 +58,7 @@ CTASSERT(_ALIGN(sizeof(struct x86_64_tss /* verify expected alignment */ CTASSERT(offsetof(struct cpu_info_full, cif_cpu.ci_PAGEALIGN) % PAGE_SIZE == 0); +CTASSERT(offsetof(struct cpu_info_full, cif_cpu.ci_mds_tmp) % 32 == 0); /* verify total size is multiple of page size */ CTASSERT(sizeof(struct cpu_info_full) % PAGE_SIZE == 0); Index: kern/init_main.c =================================================================== RCS file: /cvs/src/sys/kern/init_main.c,v retrieving revision 1.321 diff -u -p -r1.321 init_main.c --- kern/init_main.c 15 Jun 2023 22:18:06 -0000 1.321 +++ kern/init_main.c 28 Aug 2023 02:55:04 -0000 @@ -481,15 +481,13 @@ main(void *framep) /* * Now can look at time, having had a chance to verify the time - * from the file system. Reset p->p_rtime as it may have been + * from the file system. Reset proc runtime as it may have been * munched in mi_switch() after the time got set. */ LIST_FOREACH(pr, &allprocess, ps_list) { nanouptime(&pr->ps_start); - TAILQ_FOREACH(p, &pr->ps_threads, p_thr_link) { - nanouptime(&p->p_cpu->ci_schedstate.spc_runtime); - timespecclear(&p->p_rtime); - } + TAILQ_FOREACH(p, &pr->ps_threads, p_thr_link) + p->p_cpu->ci_schedstate.spc_runtime = nsecuptime(); } uvm_swap_init(); Index: kern/kern_acct.c =================================================================== RCS file: /cvs/src/sys/kern/kern_acct.c,v retrieving revision 1.47 diff -u -p -r1.47 kern_acct.c --- kern/kern_acct.c 14 Aug 2022 01:58:27 -0000 1.47 +++ kern/kern_acct.c 28 Aug 2023 02:55:04 -0000 @@ -70,7 +70,7 @@ /* * Internal accounting functions. */ -comp_t encode_comp_t(u_long, u_long); +comp_t encode_comp_t(uint64_t); int acct_start(void); void acct_thread(void *); void acct_shutdown(void); @@ -164,13 +164,11 @@ out: * "acct.h" header file.) */ int -acct_process(struct proc *p) +acct_process(struct proc *p, struct process *pr) { struct acct acct; - struct process *pr = p->p_p; - struct rusage *r; - struct timespec booted, elapsed, realstart, st, tmp, uptime, ut; - int t; + struct krusage *ru = &pr->ps_ru; + struct timespec booted, elapsed, realstart, uptime; struct vnode *vp; int error = 0; @@ -196,9 +194,10 @@ acct_process(struct proc *p) memcpy(acct.ac_comm, pr->ps_comm, sizeof acct.ac_comm); /* (2) The amount of user and system time that was used */ - calctsru(&pr->ps_tu, &ut, &st, NULL); - acct.ac_utime = encode_comp_t(ut.tv_sec, ut.tv_nsec); - acct.ac_stime = encode_comp_t(st.tv_sec, st.tv_nsec); + acct.ac_utime = + encode_comp_t(USEC_TO_NSEC(ru->kru_counters[kru_uticks] * tick)); + acct.ac_stime = + encode_comp_t(USEC_TO_NSEC(ru->kru_counters[kru_sticks] * tick)); /* (3) The elapsed time the command ran (and its starting time) */ nanouptime(&uptime); @@ -206,19 +205,14 @@ acct_process(struct proc *p) timespecadd(&booted, &pr->ps_start, &realstart); acct.ac_btime = realstart.tv_sec; timespecsub(&uptime, &pr->ps_start, &elapsed); - acct.ac_etime = encode_comp_t(elapsed.tv_sec, elapsed.tv_nsec); + acct.ac_etime = encode_comp_t(TIMESPEC_TO_NSEC(&elapsed)); /* (4) The average amount of memory used */ - r = &p->p_ru; - timespecadd(&ut, &st, &tmp); - t = tmp.tv_sec * hz + tmp.tv_nsec / (1000 * tick); - if (t) - acct.ac_mem = (r->ru_ixrss + r->ru_idrss + r->ru_isrss) / t; - else - acct.ac_mem = 0; + acct.ac_mem = 0; /* XXX we don't track memory usage */ /* (5) The number of disk I/O operations done */ - acct.ac_io = encode_comp_t(r->ru_inblock + r->ru_oublock, 0); + acct.ac_io = encode_comp_t(1000000000 * + (ru->kru_counters[kru_inblock] + ru->kru_counters[kru_oublock])); /* (6) The UID and GID of the process */ acct.ac_uid = pr->ps_ucred->cr_ruid; @@ -260,17 +254,17 @@ out: #define MAXFRACT ((1 << MANTSIZE) - 1) /* Maximum fractional value. */ comp_t -encode_comp_t(u_long s, u_long ns) +encode_comp_t(uint64_t ns) { int exp, rnd; + uint64_t s; exp = 0; rnd = 0; - s *= AHZ; - s += ns / (1000000000 / AHZ); /* Maximize precision. */ + s = ns / (1000000000 / AHZ); /* Maximize precision. */ while (s > MAXFRACT) { - rnd = s & (1 << (EXPSIZE - 1)); /* Round up? */ + rnd = s & (1 << (EXPSIZE - 1)); /* Round up? */ s >>= EXPSIZE; /* Base 8 exponent == 3 bit shift. */ exp++; } Index: kern/kern_exec.c =================================================================== RCS file: /cvs/src/sys/kern/kern_exec.c,v retrieving revision 1.250 diff -u -p -r1.250 kern_exec.c --- kern/kern_exec.c 10 Jul 2023 03:31:57 -0000 1.250 +++ kern/kern_exec.c 28 Aug 2023 02:55:04 -0000 @@ -49,6 +49,7 @@ #include #include #include +#include #include #include #include @@ -273,6 +274,7 @@ sys_execve(struct proc *p, void *v, regi struct ps_strings arginfo; struct vmspace *vm = p->p_vmspace; struct vnode *otvp; + unsigned int gen; if (vm->vm_execve && (pc >= vm->vm_execve_end || pc < vm->vm_execve)) { @@ -524,6 +526,8 @@ sys_execve(struct proc *p, void *v, regi strlcpy(pr->ps_comm, nid.ni_cnd.cn_nameptr, sizeof(pr->ps_comm)); pr->ps_acflag &= ~AFORK; + LLTRACE(lltrace_pidname, p); + /* record proc's vnode, for use by sysctl */ otvp = pr->ps_textvp; vref(pack.ep_vp); @@ -678,8 +682,12 @@ sys_execve(struct proc *p, void *v, regi } /* reset CPU time usage for the thread, but not the process */ - timespecclear(&p->p_tu.tu_runtime); - p->p_tu.tu_uticks = p->p_tu.tu_sticks = p->p_tu.tu_iticks = 0; + gen = pc_mprod_enter(&p->p_ru_lock); + p->p_ru.kru_counters[kru_runtime] = 0; + p->p_ru.kru_counters[kru_uticks] = 0; + p->p_ru.kru_counters[kru_sticks] = 0; + p->p_ru.kru_counters[kru_iticks] = 0; + pc_mprod_leave(&p->p_ru_lock, gen); memset(p->p_name, 0, sizeof p->p_name); Index: kern/kern_exit.c =================================================================== RCS file: /cvs/src/sys/kern/kern_exit.c,v retrieving revision 1.211 diff -u -p -r1.211 kern_exit.c --- kern/kern_exit.c 25 Apr 2023 18:14:06 -0000 1.211 +++ kern/kern_exit.c 28 Aug 2023 02:55:04 -0000 @@ -117,7 +117,6 @@ void exit1(struct proc *p, int xexit, int xsig, int flags) { struct process *pr, *qr, *nqr; - struct rusage *rup; int s; atomic_setbits_int(&p->p_flag, P_WEXIT); @@ -168,16 +167,6 @@ exit1(struct proc *p, int xexit, int xsi stopprofclock(pr); } - rup = pr->ps_ru; - if (rup == NULL) { - rup = pool_get(&rusage_pool, PR_WAITOK | PR_ZERO); - if (pr->ps_ru == NULL) { - pr->ps_ru = rup; - } else { - pool_put(&rusage_pool, rup); - rup = pr->ps_ru; - } - } p->p_siglist = 0; if ((p->p_flag & P_THREAD) == 0) pr->ps_siglist = 0; @@ -201,9 +190,6 @@ exit1(struct proc *p, int xexit, int xsi semexit(pr); #endif killjobc(pr); -#ifdef ACCOUNTING - acct_process(p); -#endif #ifdef KTRACE /* release trace file */ @@ -296,10 +282,6 @@ exit1(struct proc *p, int xexit, int xsi } } - /* add thread's accumulated rusage into the process's total */ - ruadd(rup, &p->p_ru); - tuagg(pr, p); - /* * clear %cpu usage during swap */ @@ -307,13 +289,6 @@ exit1(struct proc *p, int xexit, int xsi if ((p->p_flag & P_THREAD) == 0) { /* - * Final thread has died, so add on our children's rusage - * and calculate the total times - */ - calcru(&pr->ps_tu, &rup->ru_utime, &rup->ru_stime, NULL); - ruadd(rup, &pr->ps_cru); - - /* * Notify parent that we're gone. If we're not going to * become a zombie, reparent to process 1 (init) so that * we can wake our original parent to possibly unblock @@ -443,6 +418,10 @@ reaper(void *arg) } else { struct process *pr = p->p_p; +#ifdef ACCOUNTING + acct_process(p, pr); +#endif + /* Release the rest of the process's vmspace */ uvm_exit(pr); @@ -512,8 +491,10 @@ loop: if (statusp != NULL) *statusp = W_EXITCODE(pr->ps_xexit, pr->ps_xsig); - if (rusage != NULL) - memcpy(rusage, pr->ps_ru, sizeof(*rusage)); + if (rusage != NULL) { + krusage_export(rusage, + pr->ps_ru_maxrss, &pr->ps_ru); + } if ((options & WNOWAIT) == 0) proc_finish_wait(q, p); return (0); @@ -708,8 +689,9 @@ sys_waitid(struct proc *q, void *v, regi void proc_finish_wait(struct proc *waiter, struct proc *p) { - struct process *pr, *tr; - struct rusage *rup; + struct process *pr, *tr, *ppr; + unsigned int gen; + size_t i; /* * If we got the child via a ptrace 'attach', @@ -725,9 +707,18 @@ proc_finish_wait(struct proc *waiter, st wakeup(tr); } else { scheduler_wait_hook(waiter, p); - rup = &waiter->p_p->ps_cru; - ruadd(rup, pr->ps_ru); LIST_REMOVE(pr, ps_list); /* off zombprocess */ + + ppr = waiter->p_p; + gen = pc_mprod_enter(&ppr->ps_cru_lock); + if (ppr->ps_cru_maxrss < pr->ps_ru_maxrss) + ppr->ps_cru_maxrss = pr->ps_ru_maxrss; + for (i = 0; i < kru_ncounters; i++) { + ppr->ps_cru.kru_counters[i] += + pr->ps_ru.kru_counters[i]; + } + pc_mprod_leave(&ppr->ps_cru_lock, gen); + freepid(pr->ps_pid); process_zap(pr); } @@ -818,7 +809,6 @@ process_zap(struct process *pr) KASSERT(pr->ps_threadcnt == 1); if (pr->ps_ptstat != NULL) free(pr->ps_ptstat, M_SUBPROC, sizeof(*pr->ps_ptstat)); - pool_put(&rusage_pool, pr->ps_ru); KASSERT(TAILQ_EMPTY(&pr->ps_threads)); sigactsfree(pr->ps_sigacts); lim_free(pr->ps_limit); Index: kern/kern_lock.c =================================================================== RCS file: /cvs/src/sys/kern/kern_lock.c,v retrieving revision 1.72 diff -u -p -r1.72 kern_lock.c --- kern/kern_lock.c 26 Apr 2022 15:31:14 -0000 1.72 +++ kern/kern_lock.c 28 Aug 2023 02:55:04 -0000 @@ -24,6 +24,8 @@ #include #include #include +#include +#include #include @@ -443,3 +452,90 @@ _mtx_init_flags(struct mutex *m, int ipl _mtx_init(m, ipl); } #endif /* WITNESS */ + +void +pc_lock_init(struct pc_lock *pcl) +{ + pcl->pcl_gen = 0; +} + +unsigned int +pc_mprod_enter(struct pc_lock *pcl) +{ + unsigned int gen, ngen, ogen; + + gen = pcl->pcl_gen; + for (;;) { + while (gen & 1) { + CPU_BUSY_CYCLE(); + gen = pcl->pcl_gen; + } + + ngen = 1 + gen; + ogen = atomic_cas_uint(&pcl->pcl_gen, gen, ngen); + if (gen == ogen) + break; + + CPU_BUSY_CYCLE(); + gen = ogen; + } + + membar_enter_after_atomic(); + return (ngen); +} + +void +pc_mprod_leave(struct pc_lock *pcl, unsigned int gen) +{ + membar_exit(); + pcl->pcl_gen = ++gen; +} + +unsigned int +pc_sprod_enter(struct pc_lock *pcl) +{ + unsigned int gen; + + gen = pcl->pcl_gen; + pcl->pcl_gen = ++gen; + membar_producer(); + + return (gen); +} + +void +pc_sprod_leave(struct pc_lock *pcl, unsigned int gen) +{ + membar_producer(); + pcl->pcl_gen = ++gen; +} + +void +pc_cons_enter(struct pc_lock *pcl, unsigned int *genp) +{ + unsigned int gen; + + gen = pcl->pcl_gen; + while (gen & 1) { + CPU_BUSY_CYCLE(); + gen = pcl->pcl_gen; + } + + membar_consumer(); + *genp = gen; +} + +int +pc_cons_leave(struct pc_lock *pcl, unsigned int *genp) +{ + unsigned int gen; + + membar_consumer(); + + gen = pcl->pcl_gen; + if (gen == *genp) + return (0); + + *genp = gen; + return (EBUSY); +} Index: kern/kern_proc.c =================================================================== RCS file: /cvs/src/sys/kern/kern_proc.c,v retrieving revision 1.94 diff -u -p -r1.94 kern_proc.c --- kern/kern_proc.c 2 Jan 2023 23:09:48 -0000 1.94 +++ kern/kern_proc.c 28 Aug 2023 02:55:04 -0000 @@ -69,7 +69,6 @@ struct proclist allproc; struct pool proc_pool; struct pool process_pool; -struct pool rusage_pool; struct pool ucred_pool; struct pool pgrp_pool; struct pool session_pool; @@ -105,8 +104,6 @@ procinit(void) PR_WAITOK, "procpl", NULL); pool_init(&process_pool, sizeof(struct process), 0, IPL_NONE, PR_WAITOK, "processpl", NULL); - pool_init(&rusage_pool, sizeof(struct rusage), 0, IPL_NONE, - PR_WAITOK, "zombiepl", NULL); pool_init(&ucred_pool, sizeof(struct ucred), 0, IPL_MPFLOOR, 0, "ucredpl", NULL); pool_init(&pgrp_pool, sizeof(struct pgrp), 0, IPL_NONE, Index: kern/kern_resource.c =================================================================== RCS file: /cvs/src/sys/kern/kern_resource.c,v retrieving revision 1.77 diff -u -p -r1.77 kern_resource.c --- kern/kern_resource.c 4 Feb 2023 19:33:03 -0000 1.77 +++ kern/kern_resource.c 28 Aug 2023 02:55:04 -0000 @@ -367,86 +367,86 @@ sys_getrlimit(struct proc *p, void *v, r return (error); } -void -tuagg_sub(struct tusage *tup, struct proc *p) +static inline void +krusage_counters_proc(struct krusage *kru, struct proc *p, uint64_t runtime) { - timespecadd(&tup->tu_runtime, &p->p_rtime, &tup->tu_runtime); - tup->tu_uticks += p->p_uticks; - tup->tu_sticks += p->p_sticks; - tup->tu_iticks += p->p_iticks; + kru->kru_counters[kru_runtime] += runtime; + kru->kru_counters[kru_uticks] += p->p_uticks; + kru->kru_counters[kru_sticks] += p->p_sticks; + kru->kru_counters[kru_iticks] += p->p_iticks; } -/* - * Aggregate a single thread's immediate time counts into the running - * totals for the thread and process - */ void -tuagg_unlocked(struct process *pr, struct proc *p) +krusage_proc(struct proc *p, uint64_t runtime) { - tuagg_sub(&pr->ps_tu, p); - tuagg_sub(&p->p_tu, p); - timespecclear(&p->p_rtime); + struct process *ps = p->p_p; + unsigned int gen; + + gen = pc_mprod_enter(&p->p_ru_lock); + krusage_counters_proc(&p->p_ru, p, runtime); + pc_mprod_leave(&p->p_ru_lock, gen); + + gen = pc_mprod_enter(&ps->ps_ru_lock); + krusage_counters_proc(&ps->ps_ru, p, runtime); + pc_mprod_leave(&ps->ps_ru_lock, gen); + p->p_uticks = 0; p->p_sticks = 0; p->p_iticks = 0; } void -tuagg(struct process *pr, struct proc *p) +krusage_inc(struct proc *p, enum krusage_counter c) { - int s; - - SCHED_LOCK(s); - tuagg_unlocked(pr, p); - SCHED_UNLOCK(s); -} - -/* - * Transform the running time and tick information in a struct tusage - * into user, system, and interrupt time usage. - */ -void -calctsru(struct tusage *tup, struct timespec *up, struct timespec *sp, - struct timespec *ip) -{ - u_quad_t st, ut, it; - - st = tup->tu_sticks; - ut = tup->tu_uticks; - it = tup->tu_iticks; - - if (st + ut + it == 0) { - timespecclear(up); - timespecclear(sp); - if (ip != NULL) - timespecclear(ip); - return; - } + struct process *ps = p->p_p; + unsigned int gen; - st = st * 1000000000 / stathz; - sp->tv_sec = st / 1000000000; - sp->tv_nsec = st % 1000000000; - ut = ut * 1000000000 / stathz; - up->tv_sec = ut / 1000000000; - up->tv_nsec = ut % 1000000000; - if (ip != NULL) { - it = it * 1000000000 / stathz; - ip->tv_sec = it / 1000000000; - ip->tv_nsec = it % 1000000000; - } + gen = pc_mprod_enter(&p->p_ru_lock); + p->p_ru.kru_counters[c]++; + pc_mprod_leave(&p->p_ru_lock, gen); + + gen = pc_mprod_enter(&ps->ps_ru_lock); + ps->ps_ru.kru_counters[c]++; + pc_mprod_leave(&ps->ps_ru_lock, gen); +} + +uint64_t +krusage_runtime(struct krusage *kru, struct pc_lock *pcl) +{ +#if 0 && defined(__LP64__) + return (kru->kru_counters[kru_runtime]); +#else + uint64_t runtime; + unsigned int gen; + + pc_cons_enter(pcl, &gen); + do { + runtime = kru->kru_counters[kru_runtime]; + } while (pc_cons_leave(pcl, &gen) != 0); + + return (runtime); +#endif /* defined(__LP64__) */ } void -calcru(struct tusage *tup, struct timeval *up, struct timeval *sp, - struct timeval *ip) -{ - struct timespec u, s, i; - - calctsru(tup, &u, &s, ip != NULL ? &i : NULL); - TIMESPEC_TO_TIMEVAL(up, &u); - TIMESPEC_TO_TIMEVAL(sp, &s); - if (ip != NULL) - TIMESPEC_TO_TIMEVAL(ip, &i); +krusage_export(struct rusage *rup, uint64_t maxrss, const struct krusage *kru) +{ + USEC_TO_TIMEVAL(kru->kru_counters[kru_uticks] * tick, &rup->ru_utime); + USEC_TO_TIMEVAL(kru->kru_counters[kru_sticks] * tick, &rup->ru_stime); + rup->ru_maxrss = maxrss; + rup->ru_ixrss = 0; + rup->ru_idrss = 0; + rup->ru_isrss = 0; + rup->ru_minflt = kru->kru_counters[kru_minflt]; + rup->ru_majflt = kru->kru_counters[kru_majflt]; + rup->ru_nswap = kru->kru_counters[kru_nswap]; + rup->ru_inblock = kru->kru_counters[kru_inblock]; + rup->ru_oublock = kru->kru_counters[kru_oublock]; + rup->ru_msgsnd = kru->kru_counters[kru_msgsnd]; + rup->ru_msgrcv = kru->kru_counters[kru_msgrcv]; + rup->ru_nsignals = kru->kru_counters[kru_nsignals]; + rup->ru_nvcsw = kru->kru_counters[kru_nvcsw]; + rup->ru_nivcsw = kru->kru_counters[kru_nivcsw]; } int @@ -473,55 +473,42 @@ sys_getrusage(struct proc *p, void *v, r int dogetrusage(struct proc *p, int who, struct rusage *rup) { + unsigned long maxrss; struct process *pr = p->p_p; - struct proc *q; + unsigned int gen; switch (who) { - case RUSAGE_SELF: - /* start with the sum of dead threads, if any */ - if (pr->ps_ru != NULL) - *rup = *pr->ps_ru; - else - memset(rup, 0, sizeof(*rup)); - - /* add on all living threads */ - TAILQ_FOREACH(q, &pr->ps_threads, p_thr_link) { - ruadd(rup, &q->p_ru); - tuagg(pr, q); - } - - calcru(&pr->ps_tu, &rup->ru_utime, &rup->ru_stime, NULL); + pc_cons_enter(&pr->ps_ru_lock, &gen); + do { + krusage_export(rup, pr->ps_ru_maxrss, &pr->ps_ru); + } while (pc_cons_leave(&pr->ps_ru_lock, &gen) != 0); break; case RUSAGE_THREAD: - *rup = p->p_ru; - calcru(&p->p_tu, &rup->ru_utime, &rup->ru_stime, NULL); + pc_cons_enter(&pr->ps_ru_lock, &gen); + do { + maxrss = pr->ps_ru_maxrss; + } while (pc_cons_leave(&pr->ps_ru_lock, &gen) != 0); + + pc_cons_enter(&p->p_ru_lock, &gen); + do { + krusage_export(rup, maxrss, &p->p_ru); + } while (pc_cons_leave(&p->p_ru_lock, &gen) != 0); break; case RUSAGE_CHILDREN: - *rup = pr->ps_cru; + pc_cons_enter(&pr->ps_cru_lock, &gen); + do { + krusage_export(rup, pr->ps_cru_maxrss, &pr->ps_cru); + } while (pc_cons_leave(&pr->ps_cru_lock, &gen) != 0); break; default: return (EINVAL); } - return (0); -} -void -ruadd(struct rusage *ru, struct rusage *ru2) -{ - long *ip, *ip2; - int i; - - timeradd(&ru->ru_utime, &ru2->ru_utime, &ru->ru_utime); - timeradd(&ru->ru_stime, &ru2->ru_stime, &ru->ru_stime); - if (ru->ru_maxrss < ru2->ru_maxrss) - ru->ru_maxrss = ru2->ru_maxrss; - ip = &ru->ru_first; ip2 = &ru2->ru_first; - for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--) - *ip++ += *ip2++; + return (0); } /* @@ -533,18 +520,15 @@ rucheck(void *arg) { struct rlimit rlim; struct process *pr = arg; - time_t runtime; - int s; + uint64_t runtime; - KERNEL_ASSERT_LOCKED(); - - SCHED_LOCK(s); - runtime = pr->ps_tu.tu_runtime.tv_sec; - SCHED_UNLOCK(s); + runtime = krusage_runtime(&pr->ps_ru, &pr->ps_ru_lock); mtx_enter(&pr->ps_mtx); rlim = pr->ps_limit->pl_rlimit[RLIMIT_CPU]; mtx_leave(&pr->ps_mtx); + + KERNEL_ASSERT_LOCKED(); if ((rlim_t)runtime >= rlim.rlim_cur) { if ((rlim_t)runtime >= rlim.rlim_max) { Index: kern/kern_sched.c =================================================================== RCS file: /cvs/src/sys/kern/kern_sched.c,v retrieving revision 1.86 diff -u -p -r1.86 kern_sched.c --- kern/kern_sched.c 14 Aug 2023 08:33:24 -0000 1.86 +++ kern/kern_sched.c 28 Aug 2023 02:55:04 -0000 @@ -230,13 +232,10 @@ void sched_exit(struct proc *p) { struct schedstate_percpu *spc = &curcpu()->ci_schedstate; - struct timespec ts; struct proc *idle; int s; - nanouptime(&ts); - timespecsub(&ts, &spc->spc_runtime, &ts); - timespecadd(&p->p_rtime, &ts, &p->p_rtime); + krusage_proc(p, nsecuptime() - spc->spc_runtime); if (ISSET(spc->spc_schedflags, SPCF_ITIMER)) { atomic_clearbits_int(&spc->spc_schedflags, SPCF_ITIMER); @@ -645,10 +644,11 @@ sched_peg_curproc(struct cpu_info *ci) struct proc *p = curproc; int s; + krusage_inc(p, kru_nvcsw); + SCHED_LOCK(s); atomic_setbits_int(&p->p_flag, P_CPUPEG); setrunqueue(ci, p, p->p_usrpri); - p->p_ru.ru_nvcsw++; mi_switch(); SCHED_UNLOCK(s); } Index: kern/kern_sig.c =================================================================== RCS file: /cvs/src/sys/kern/kern_sig.c,v retrieving revision 1.313 diff -u -p -r1.313 kern_sig.c --- kern/kern_sig.c 16 Aug 2023 07:55:52 -0000 1.313 +++ kern/kern_sig.c 28 Aug 2023 02:55:04 -0000 @@ -769,7 +769,7 @@ out: void postsig_done(struct proc *p, int signum, sigset_t catchmask, int reset) { - p->p_ru.ru_nsignals++; + krusage_inc(p, kru_nsignals); atomic_setbits_int(&p->p_sigmask, catchmask); if (reset != 0) { sigset_t mask = sigmask(signum); Index: kern/kern_synch.c =================================================================== RCS file: /cvs/src/sys/kern/kern_synch.c,v retrieving revision 1.198 diff -u -p -r1.198 kern_synch.c --- kern/kern_synch.c 16 Aug 2023 07:55:52 -0000 1.198 +++ kern/kern_synch.c 28 Aug 2023 02:55:04 -0000 @@ -49,6 +51,7 @@ #include #include #include +#include #include @@ -401,7 +419,7 @@ sleep_finish(int timo, int do_sleep) if (do_sleep) { KASSERT(p->p_stat == SSLEEP || p->p_stat == SSTOP); - p->p_ru.ru_nvcsw++; + krusage_inc(p, kru_nvcsw); mi_switch(); } else { KASSERT(p->p_stat == SONPROC || p->p_stat == SSLEEP || @@ -522,6 +540,7 @@ unsleep(struct proc *p) p->p_wchan = NULL; TRACEPOINT(sched, unsleep, p->p_tid + THREAD_PID_OFFSET, p->p_p->ps_pid); + LLTRACE(lltrace_runnable, p); } } @@ -566,6 +585,8 @@ sys_sched_yield(struct proc *p, void *v, uint8_t newprio; int s; + krusage_inc(p, kru_nvcsw); + SCHED_LOCK(s); /* * If one of the threads of a multi-threaded process called @@ -576,7 +597,6 @@ sys_sched_yield(struct proc *p, void *v, TAILQ_FOREACH(q, &p->p_p->ps_threads, p_thr_link) newprio = max(newprio, q->p_runpri); setrunqueue(p->p_cpu, p, newprio); - p->p_ru.ru_nvcsw++; mi_switch(); SCHED_UNLOCK(s); Index: kern/kern_sysctl.c =================================================================== RCS file: /cvs/src/sys/kern/kern_sysctl.c,v retrieving revision 1.418 diff -u -p -r1.418 kern_sysctl.c --- kern/kern_sysctl.c 16 Jul 2023 03:01:31 -0000 1.418 +++ kern/kern_sysctl.c 28 Aug 2023 02:55:04 -0000 @@ -1787,7 +1787,7 @@ fill_kproc(struct process *pr, struct ki if ((pr->ps_flags & PS_ZOMBIE) == 0) { if ((pr->ps_flags & PS_EMBRYO) == 0 && vm != NULL) ki->p_vm_rssize = vm_resident_count(vm); - calctsru(isthread ? &p->p_tu : &pr->ps_tu, &ut, &st, NULL); + //calctsru(isthread ? &p->p_tu : &pr->ps_tu, &ut, &st, NULL); ki->p_uutime_sec = ut.tv_sec; ki->p_uutime_usec = ut.tv_nsec/1000; ki->p_ustime_sec = st.tv_sec; Index: kern/kern_time.c =================================================================== RCS file: /cvs/src/sys/kern/kern_time.c,v retrieving revision 1.164 diff -u -p -r1.164 kern_time.c --- kern/kern_time.c 5 Aug 2023 20:07:55 -0000 1.164 +++ kern/kern_time.c 28 Aug 2023 02:55:04 -0000 @@ -47,6 +47,7 @@ #include #include #include +#include #include #include @@ -112,7 +113,9 @@ settime(const struct timespec *ts) int clock_gettime(struct proc *p, clockid_t clock_id, struct timespec *tp) { + struct process *ps; struct proc *q; + uint64_t nsec; int error = 0; switch (clock_id) { @@ -127,16 +130,13 @@ clock_gettime(struct proc *p, clockid_t nanouptime(tp); break; case CLOCK_PROCESS_CPUTIME_ID: - nanouptime(tp); - timespecsub(tp, &curcpu()->ci_schedstate.spc_runtime, tp); - timespecadd(tp, &p->p_p->ps_tu.tu_runtime, tp); - timespecadd(tp, &p->p_rtime, tp); + ps = p->p_p; + nsec = krusage_runtime(&ps->ps_ru, &ps->ps_ru_lock); + NSEC_TO_TIMESPEC(nsec, tp); break; case CLOCK_THREAD_CPUTIME_ID: - nanouptime(tp); - timespecsub(tp, &curcpu()->ci_schedstate.spc_runtime, tp); - timespecadd(tp, &p->p_tu.tu_runtime, tp); - timespecadd(tp, &p->p_rtime, tp); + nsec = krusage_runtime(&p->p_ru, &p->p_ru_lock); + NSEC_TO_TIMESPEC(nsec, tp); break; default: /* check for clock from pthread_getcpuclockid() */ @@ -145,8 +145,10 @@ clock_gettime(struct proc *p, clockid_t q = tfind_user(__CLOCK_PTID(clock_id), p->p_p); if (q == NULL) error = ESRCH; - else - *tp = q->p_tu.tu_runtime; + else { + nsec = krusage_runtime(&q->p_ru, &q->p_ru_lock); + NSEC_TO_TIMESPEC(nsec, tp); + } KERNEL_UNLOCK(); } else error = EINVAL; Index: kern/sched_bsd.c =================================================================== RCS file: /cvs/src/sys/kern/sched_bsd.c,v retrieving revision 1.83 diff -u -p -r1.83 sched_bsd.c --- kern/sched_bsd.c 19 Aug 2023 11:14:11 -0000 1.83 +++ kern/sched_bsd.c 28 Aug 2023 02:55:04 -0000 @@ -317,9 +317,10 @@ yield(void) struct proc *p = curproc; int s; + krusage_inc(p, kru_nvcsw); + SCHED_LOCK(s); setrunqueue(p->p_cpu, p, p->p_usrpri); - p->p_ru.ru_nvcsw++; mi_switch(); SCHED_UNLOCK(s); } @@ -336,9 +337,10 @@ preempt(void) struct proc *p = curproc; int s; + krusage_inc(p, kru_nivcsw); + SCHED_LOCK(s); setrunqueue(p->p_cpu, p, p->p_usrpri); - p->p_ru.ru_nivcsw++; mi_switch(); SCHED_UNLOCK(s); } @@ -349,13 +351,14 @@ mi_switch(void) struct schedstate_percpu *spc = &curcpu()->ci_schedstate; struct proc *p = curproc; struct proc *nextproc; - struct process *pr = p->p_p; - struct timespec ts; + uint64_t ts, runtime; #ifdef MULTIPROCESSOR int hold_count; int sched_count; #endif + //LLTRACE(lltrace_sched_enter); + assertwaitok(); KASSERT(p->p_stat != SONPROC); @@ -376,22 +379,21 @@ mi_switch(void) * Compute the amount of time during which the current * process was running, and add that to its total so far. */ - nanouptime(&ts); - if (timespeccmp(&ts, &spc->spc_runtime, <)) { + ts = nsecuptime(); + if (ts < spc->spc_runtime) { #if 0 - printf("uptime is not monotonic! " - "ts=%lld.%09lu, runtime=%lld.%09lu\n", - (long long)tv.tv_sec, tv.tv_nsec, - (long long)spc->spc_runtime.tv_sec, - spc->spc_runtime.tv_nsec); + printf("uptime is not monotonic! ts=%lluns, runtime=%lluns\n", + ts, spc->spc_runtime); #endif + runtime = 0; + spc->spc_runtime_nonmono++; } else { - timespecsub(&ts, &spc->spc_runtime, &ts); - timespecadd(&p->p_rtime, &ts, &p->p_rtime); + runtime = ts - spc->spc_runtime; + spc->spc_runtime = ts; } /* add the time counts for this thread to the process's total */ - tuagg_unlocked(pr, p); + krusage_proc(p, runtime); /* Stop any optional clock interrupts. */ if (ISSET(spc->spc_schedflags, SPCF_ITIMER)) { @@ -457,8 +464,6 @@ mi_switch(void) atomic_setbits_int(&spc->spc_schedflags, SPCF_PROFCLOCK); clockintr_advance(spc->spc_profclock, profclock_period); } - - nanouptime(&spc->spc_runtime); #ifdef MULTIPROCESSOR /* Index: kern/tty.c =================================================================== RCS file: /cvs/src/sys/kern/tty.c,v retrieving revision 1.176 diff -u -p -r1.176 tty.c --- kern/tty.c 14 Aug 2022 01:58:28 -0000 1.176 +++ kern/tty.c 28 Aug 2023 02:55:04 -0000 @@ -2152,7 +2152,8 @@ ttyinfo(struct tty *tp) { struct process *pr, *pickpr; struct proc *p, *pick; - struct timespec utime, stime; + uint64_t uticks, sticks; + unsigned int gen; int tmp; if (ttycheckoutq(tp,0) == 0) @@ -2214,21 +2215,11 @@ update_pickpr: pickpr->ps_vmspace != NULL) rss = vm_resident_count(pickpr->ps_vmspace); - calctsru(&pickpr->ps_tu, &utime, &stime, NULL); - - /* Round up and print user time. */ - utime.tv_nsec += 5000000; - if (utime.tv_nsec >= 1000000000) { - utime.tv_sec += 1; - utime.tv_nsec -= 1000000000; - } - - /* Round up and print system time. */ - stime.tv_nsec += 5000000; - if (stime.tv_nsec >= 1000000000) { - stime.tv_sec += 1; - stime.tv_nsec -= 1000000000; - } + pc_cons_enter(&pickpr->ps_ru_lock, &gen); + do { + uticks = pickpr->ps_ru.kru_counters[kru_uticks]; + sticks = pickpr->ps_ru.kru_counters[kru_sticks]; + } while (pc_cons_leave(&pickpr->ps_ru_lock, &gen) != 0); /* * Find the most active thread: @@ -2271,10 +2262,10 @@ update_pick: pick->p_wmesg ? pick->p_wmesg : "iowait"; ttyprintf(tp, - " cmd: %s %d [%s] %lld.%02ldu %lld.%02lds %d%% %ldk\n", + " cmd: %s %d [%s] %llu.%02llu %lld.%02llus %d%% %ldk\n", pickpr->ps_comm, pickpr->ps_pid, state, - (long long)utime.tv_sec, utime.tv_nsec / 10000000, - (long long)stime.tv_sec, stime.tv_nsec / 10000000, + uticks / hz, ((uticks % hz) * 100) / hz, + sticks / hz, ((sticks % hz) * 100) / hz, calc_pctcpu / 100, rss); } tp->t_rocount = 0; /* so pending input will be retyped if BS */ Index: kern/uipc_socket.c =================================================================== RCS file: /cvs/src/sys/kern/uipc_socket.c,v retrieving revision 1.309 diff -u -p -r1.309 uipc_socket.c --- kern/uipc_socket.c 8 Aug 2023 22:07:25 -0000 1.309 +++ kern/uipc_socket.c 28 Aug 2023 02:55:04 -0000 @@ -562,7 +562,7 @@ sosend(struct socket *so, struct mbuf *a return (EINVAL); } if (uio && uio->uio_procp) - uio->uio_procp->p_ru.ru_msgsnd++; + krusage_inc(uio->uio_procp, kru_msgsnd); if (control) { /* * In theory clen should be unsigned (since control->m_len is). @@ -922,7 +922,7 @@ dontblock: * corruption. */ if (uio->uio_procp) - uio->uio_procp->p_ru.ru_msgrcv++; + krusage_inc(uio->uio_procp, kru_msgrcv); KASSERT(m == so->so_rcv.sb_mb); SBLASTRECORDCHK(&so->so_rcv, "soreceive 1"); SBLASTMBUFCHK(&so->so_rcv, "soreceive 1"); Index: kern/vfs_bio.c =================================================================== RCS file: /cvs/src/sys/kern/vfs_bio.c,v retrieving revision 1.212 diff -u -p -r1.212 vfs_bio.c --- kern/vfs_bio.c 26 Apr 2023 15:13:52 -0000 1.212 +++ kern/vfs_bio.c 28 Aug 2023 02:55:04 -0000 @@ -443,7 +443,7 @@ bio_doread(struct vnode *vp, daddr_t blk bcstats.numreads++; VOP_STRATEGY(bp->b_vp, bp); /* Pay for the read. */ - curproc->p_ru.ru_inblock++; /* XXX */ + krusage_inc(curproc, kru_inblock); /* XXX */ } else if (async) { brelse(bp); } @@ -673,7 +673,7 @@ bread_cluster(struct vnode *vp, daddr_t bcstats.pendingreads++; bcstats.numreads++; VOP_STRATEGY(bp->b_vp, bp); - curproc->p_ru.ru_inblock++; + krusage_inc(curproc, kru_inblock); out: return (biowait(*rbpp)); @@ -742,7 +742,7 @@ bwrite(struct buf *bp) if (wasdelayed) { reassignbuf(bp); } else - curproc->p_ru.ru_oublock++; + krusage_inc(curproc, kru_oublock); /* Initiate disk write. Make sure the appropriate party is charged. */ @@ -806,7 +806,7 @@ bdwrite(struct buf *bp) buf_flip_dma(bp); reassignbuf(bp); splx(s); - curproc->p_ru.ru_oublock++; /* XXX */ + krusage_inc(curproc, kru_oublock); /* XXX */ } /* The "write" is done, so mark and release the buffer. */ Index: nfs/nfs_vnops.c =================================================================== RCS file: /cvs/src/sys/nfs/nfs_vnops.c,v retrieving revision 1.193 diff -u -p -r1.193 nfs_vnops.c --- nfs/nfs_vnops.c 26 Apr 2023 10:00:37 -0000 1.193 +++ nfs/nfs_vnops.c 28 Aug 2023 02:55:04 -0000 @@ -3139,7 +3139,7 @@ nfs_writebp(struct buf *bp, int force) buf_undirty(bp); if ((oldflags & B_ASYNC) && !(oldflags & B_DELWRI) && p) - ++p->p_ru.ru_oublock; + krusage_inc(p, kru_oublock); bp->b_vp->v_numoutput++; splx(s); @@ -3217,7 +3217,7 @@ nfs_writebp(struct buf *bp, int force) bp->b_flags |= B_RAW; rtval = biowait(bp); if (!(oldflags & B_DELWRI) && p) { - ++p->p_ru.ru_oublock; + krusage_inc(p, kru_oublock); } brelse(bp); return (rtval); Index: sys/acct.h =================================================================== RCS file: /cvs/src/sys/sys/acct.h,v retrieving revision 1.13 diff -u -p -r1.13 acct.h --- sys/acct.h 21 Feb 2023 14:31:07 -0000 1.13 +++ sys/acct.h 28 Aug 2023 02:55:04 -0000 @@ -77,6 +77,6 @@ struct acct { #define AHZ 64 #ifdef _KERNEL -int acct_process(struct proc *p); +int acct_process(struct proc *, struct process *); void acct_shutdown(void); #endif Index: sys/pclock.h =================================================================== RCS file: sys/pclock.h diff -N sys/pclock.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/pclock.h 28 Aug 2023 02:55:04 -0000 @@ -0,0 +1,49 @@ +/* $OpenBSD$ */ + +/* + * Copyright (c) 2023 David Gwynne + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef _SYS_PCLOCK_H +#define _SYS_PCLOCK_H + +#include + +struct pc_lock { + volatile unsigned int pcl_gen; +}; + +#ifdef _KERNEL + +#define PC_LOCK_INITIALIZER() { .pcl_gen = 0 } + +void pc_lock_init(struct pc_lock *); + +/* single (non-interlocking) producer */ +unsigned int pc_sprod_enter(struct pc_lock *); +void pc_sprod_leave(struct pc_lock *, unsigned int); + +/* multiple (interlocking) producers */ +unsigned int pc_mprod_enter(struct pc_lock *); +void pc_mprod_leave(struct pc_lock *, unsigned int); + +/* consumer */ +void pc_cons_enter(struct pc_lock *, unsigned int *); +__warn_unused_result int + pc_cons_leave(struct pc_lock *, unsigned int *); + +#endif /* _KERNEL */ + +#endif /* _SYS_RWLOCK_H */ Index: sys/proc.h =================================================================== RCS file: /cvs/src/sys/sys/proc.h,v retrieving revision 1.347 diff -u -p -r1.347 proc.h --- sys/proc.h 5 Aug 2023 20:07:56 -0000 1.347 +++ sys/proc.h 28 Aug 2023 02:55:04 -0000 @@ -47,7 +47,10 @@ #include /* For struct timeout */ #include /* For struct klist */ #include /* For struct mutex */ +#include /* For struct pc_lock */ +#if 0 #include /* For struct rusage */ +#endif #include /* For struct rwlock */ #include /* For struct sigio */ @@ -86,15 +89,31 @@ struct pgrp { }; /* - * time usage: accumulated times in ticks - * Once a second, each thread's immediate counts (p_[usi]ticks) are - * accumulated into these. - */ -struct tusage { - struct timespec tu_runtime; /* Realtime. */ - uint64_t tu_uticks; /* Statclock hits in user mode. */ - uint64_t tu_sticks; /* Statclock hits in system mode. */ - uint64_t tu_iticks; /* Statclock hits processing intr. */ + * kernel representation of rusage. + */ + +enum krusage_counter { + kru_runtime = 0, /* Realtime in nsec */ + kru_uticks, /* Statclock hits in user mode. */ + kru_sticks, /* Statclock hits in system mode. */ + kru_iticks, /* Statclock hits processing intr. */ + + kru_minflt, /* page reclaims */ + kru_majflt, /* page faults */ + kru_nswap, /* swaps */ + kru_inblock, /* block input operations */ + kru_oublock, /* block output operations */ + kru_msgsnd, /* messages sent */ + kru_msgrcv, /* messages received */ + kru_nsignals, /* signals received */ + kru_nvcsw, /* voluntary context switches */ + kru_nivcsw, /* involuntary " */ + + kru_ncounters +}; + +struct krusage { + uint64_t kru_counters[kru_ncounters]; }; /* @@ -188,9 +207,14 @@ struct process { int ps_ptmask; /* Ptrace event mask */ struct ptrace_state *ps_ptstat;/* Ptrace state */ - struct rusage *ps_ru; /* sum of stats for dead threads. */ - struct tusage ps_tu; /* accumulated times. */ - struct rusage ps_cru; /* sum of stats for reaped children */ + struct pc_lock ps_ru_lock; + unsigned long ps_ru_maxrss; /* max resident set size */ + struct krusage ps_ru; /* sum of stats for dead threads. */ + + struct pc_lock ps_cru_lock; + unsigned long ps_cru_maxrss; /* max resident set size */ + struct krusage ps_cru; /* sum of stats for reaped children */ + struct itimerspec ps_timer[3]; /* [m] ITIMER_REAL timer */ /* [T] ITIMER_{VIRTUAL,PROF} timers */ struct timeout ps_rucheck_to; /* [] resource limit check timer */ @@ -352,9 +376,8 @@ struct proc { u_int p_iticks; /* Statclock hits processing intr. */ struct cpu_info * volatile p_cpu; /* [S] CPU we're running on. */ - struct rusage p_ru; /* Statistics */ - struct tusage p_tu; /* accumulated times. */ - struct timespec p_rtime; /* Real time. */ + struct pc_lock p_ru_lock; + struct krusage p_ru; /* Statistics */ struct plimit *p_limit; /* [l] read ref. of p_p->ps_limit */ struct kcov_dev *p_kd; /* kcov device handle */ @@ -518,7 +541,6 @@ extern struct proc *syncerproc; /* file extern struct pool process_pool; /* memory pool for processes */ extern struct pool proc_pool; /* memory pool for procs */ -extern struct pool rusage_pool; /* memory pool for zombies */ extern struct pool ucred_pool; /* memory pool for ucreds */ extern struct pool session_pool; /* memory pool for sessions */ extern struct pool pgrp_pool; /* memory pool for pgrps */ @@ -621,6 +643,12 @@ void cpuset_intersection(struct cpuset * void cpuset_complement(struct cpuset *, struct cpuset *, struct cpuset *); int cpuset_cardinality(struct cpuset *); struct cpu_info *cpuset_first(struct cpuset *); + +void krusage_inc(struct proc *, enum krusage_counter); +void krusage_export(struct rusage *, + uint64_t, const struct krusage *); +void krusage_proc(struct proc *, uint64_t); +uint64_t krusage_runtime(struct krusage *, struct pc_lock *); #endif /* _KERNEL */ #endif /* !_SYS_PROC_H_ */ Index: sys/resource.h =================================================================== RCS file: /cvs/src/sys/sys/resource.h,v retrieving revision 1.14 diff -u -p -r1.14 resource.h --- sys/resource.h 25 Oct 2013 04:42:48 -0000 1.14 +++ sys/resource.h 28 Aug 2023 02:55:04 -0000 @@ -59,7 +59,6 @@ struct rusage { struct timeval ru_utime; /* user time used */ struct timeval ru_stime; /* system time used */ long ru_maxrss; /* max resident set size */ -#define ru_first ru_ixrss long ru_ixrss; /* integral shared text memory size */ long ru_idrss; /* integral unshared data " */ long ru_isrss; /* integral unshared stack " */ @@ -73,7 +72,6 @@ struct rusage { long ru_nsignals; /* signals received */ long ru_nvcsw; /* voluntary context switches */ long ru_nivcsw; /* involuntary " */ -#define ru_last ru_nivcsw }; /* Index: sys/resourcevar.h =================================================================== RCS file: /cvs/src/sys/sys/resourcevar.h,v retrieving revision 1.27 diff -u -p -r1.27 resourcevar.h --- sys/resourcevar.h 25 Jul 2023 18:16:19 -0000 1.27 +++ sys/resourcevar.h 28 Aug 2023 02:55:04 -0000 @@ -67,8 +67,7 @@ extern uint32_t profclock_period; void addupc_intr(struct proc *, u_long, u_long); void addupc_task(struct proc *, u_long, u_int); void profclock(struct clockintr *, void *); -void tuagg_unlocked(struct process *, struct proc *); -void tuagg(struct process *, struct proc *); +void tuagg_proc(struct proc *, uint64_t); struct tusage; void calctsru(struct tusage *, struct timespec *, struct timespec *, struct timespec *); @@ -107,7 +106,7 @@ lim_cur(int which) rlim_t lim_cur_proc(struct proc *, int); -void ruadd(struct rusage *, struct rusage *); void rucheck(void *); + #endif #endif /* !_SYS_RESOURCEVAR_H_ */ Index: sys/sched.h =================================================================== RCS file: /cvs/src/sys/sys/sched.h,v retrieving revision 1.61 diff -u -p -r1.61 sched.h --- sys/sched.h 11 Aug 2023 22:02:50 -0000 1.61 +++ sys/sched.h 28 Aug 2023 02:55:04 -0000 @@ -92,15 +92,18 @@ struct clockintr; struct smr_entry; +struct lltrace_cpu; /* * Per-CPU scheduler state. */ struct schedstate_percpu { + struct lltrace_cpu *spc_lltrace; struct proc *spc_idleproc; /* idle proc for this cpu */ TAILQ_HEAD(prochead, proc) spc_qs[SCHED_NQS]; LIST_HEAD(,proc) spc_deadproc; - struct timespec spc_runtime; /* time curproc started running */ + uint64_t spc_runtime; /* time curproc started running (ns) */ + uint64_t spc_runtime_nonmono; /* count when time went backwards */ volatile int spc_schedflags; /* flags; see below */ u_int spc_schedticks; /* ticks for schedclock() */ u_int64_t spc_cp_time[CPUSTATES]; /* CPU state statistics */ Index: sys/sysctl.h =================================================================== RCS file: /cvs/src/sys/sys/sysctl.h,v retrieving revision 1.234 diff -u -p -r1.234 sysctl.h --- sys/sysctl.h 4 Jul 2023 11:14:00 -0000 1.234 +++ sys/sysctl.h 28 Aug 2023 02:55:04 -0000 @@ -591,6 +591,7 @@ struct kinfo_vmentry { #define FILL_KPROC(kp, copy_str, p, pr, uc, pg, paddr, \ praddr, sess, vm, lim, sa, isthread, show_addresses) \ do { \ + struct timeval __tv; \ memset((kp), 0, sizeof(*(kp))); \ \ if (show_addresses) { \ @@ -600,7 +601,6 @@ do { \ (kp)->p_vmspace = PTRTOINT64((pr)->ps_vmspace); \ (kp)->p_sigacts = PTRTOINT64((pr)->ps_sigacts); \ (kp)->p_sess = PTRTOINT64((pg)->pg_session); \ - (kp)->p_ru = PTRTOINT64((pr)->ps_ru); \ } \ (kp)->p_stats = 0; \ (kp)->p_exitsig = 0; \ @@ -625,20 +625,24 @@ do { \ \ (kp)->p_estcpu = (p)->p_estcpu; \ if (isthread) { \ - (kp)->p_rtime_sec = (p)->p_tu.tu_runtime.tv_sec; \ - (kp)->p_rtime_usec = (p)->p_tu.tu_runtime.tv_nsec/1000; \ + NSEC_TO_TIMEVAL((p)->p_ru.kru_counters[kru_runtime], \ + &__tv); \ + (kp)->p_rtime_sec = __tv.tv_sec; \ + (kp)->p_rtime_usec = __tv.tv_usec; \ (kp)->p_tid = (p)->p_tid + THREAD_PID_OFFSET; \ - (kp)->p_uticks = (p)->p_tu.tu_uticks; \ - (kp)->p_sticks = (p)->p_tu.tu_sticks; \ - (kp)->p_iticks = (p)->p_tu.tu_iticks; \ + (kp)->p_uticks = (p)->p_ru.kru_counters[kru_uticks]; \ + (kp)->p_sticks = (p)->p_ru.kru_counters[kru_sticks]; \ + (kp)->p_iticks = (p)->p_ru.kru_counters[kru_iticks]; \ strlcpy((kp)->p_name, (p)->p_name, sizeof((kp)->p_name)); \ } else { \ - (kp)->p_rtime_sec = (pr)->ps_tu.tu_runtime.tv_sec; \ - (kp)->p_rtime_usec = (pr)->ps_tu.tu_runtime.tv_nsec/1000; \ + NSEC_TO_TIMEVAL((pr)->ps_ru.kru_counters[kru_runtime], \ + &__tv); \ + (kp)->p_rtime_sec = __tv.tv_sec; \ + (kp)->p_rtime_usec = __tv.tv_usec; \ (kp)->p_tid = -1; \ - (kp)->p_uticks = (pr)->ps_tu.tu_uticks; \ - (kp)->p_sticks = (pr)->ps_tu.tu_sticks; \ - (kp)->p_iticks = (pr)->ps_tu.tu_iticks; \ + (kp)->p_uticks = (pr)->ps_ru.kru_counters[kru_uticks]; \ + (kp)->p_sticks = (pr)->ps_ru.kru_counters[kru_sticks]; \ + (kp)->p_iticks = (pr)->ps_ru.kru_counters[kru_iticks]; \ } \ (kp)->p_cpticks = (p)->p_cpticks; \ \ @@ -701,26 +705,46 @@ do { \ \ if (((pr)->ps_flags & PS_ZOMBIE) == 0) { \ struct timeval __tv; \ + unsigned int __g; \ + uint64_t __nsec; \ \ (kp)->p_uvalid = 1; \ \ - (kp)->p_uru_maxrss = (p)->p_ru.ru_maxrss; \ - (kp)->p_uru_ixrss = (p)->p_ru.ru_ixrss; \ - (kp)->p_uru_idrss = (p)->p_ru.ru_idrss; \ - (kp)->p_uru_isrss = (p)->p_ru.ru_isrss; \ - (kp)->p_uru_minflt = (p)->p_ru.ru_minflt; \ - (kp)->p_uru_majflt = (p)->p_ru.ru_majflt; \ - (kp)->p_uru_nswap = (p)->p_ru.ru_nswap; \ - (kp)->p_uru_inblock = (p)->p_ru.ru_inblock; \ - (kp)->p_uru_oublock = (p)->p_ru.ru_oublock; \ - (kp)->p_uru_msgsnd = (p)->p_ru.ru_msgsnd; \ - (kp)->p_uru_msgrcv = (p)->p_ru.ru_msgrcv; \ - (kp)->p_uru_nsignals = (p)->p_ru.ru_nsignals; \ - (kp)->p_uru_nvcsw = (p)->p_ru.ru_nvcsw; \ - (kp)->p_uru_nivcsw = (p)->p_ru.ru_nivcsw; \ + pc_cons_enter(&(pr)->ps_ru_lock, &__g); \ + do { \ + (kp)->p_uru_maxrss = (pr)->ps_ru_maxrss; \ + } while (pc_cons_leave(&(pr)->ps_ru_lock, &__g) != 0); \ + \ + (kp)->p_uru_ixrss = 0; \ + (kp)->p_uru_idrss = 0; \ + (kp)->p_uru_isrss = 0; \ + \ + pc_cons_enter(&(p)->p_ru_lock, &__g); \ + do { \ + (kp)->p_uru_minflt = \ + (p)->p_ru.kru_counters[kru_minflt]; \ + (kp)->p_uru_majflt = \ + (p)->p_ru.kru_counters[kru_majflt]; \ + (kp)->p_uru_nswap = \ + (p)->p_ru.kru_counters[kru_nswap]; \ + (kp)->p_uru_inblock = \ + (p)->p_ru.kru_counters[kru_inblock]; \ + (kp)->p_uru_oublock = \ + (p)->p_ru.kru_counters[kru_oublock]; \ + (kp)->p_uru_msgsnd = \ + (p)->p_ru.kru_counters[kru_msgsnd]; \ + (kp)->p_uru_msgrcv = \ + (p)->p_ru.kru_counters[kru_msgrcv]; \ + (kp)->p_uru_nsignals = \ + (p)->p_ru.kru_counters[kru_nsignals]; \ + (kp)->p_uru_nvcsw = \ + (p)->p_ru.kru_counters[kru_nvcsw]; \ + (kp)->p_uru_nivcsw = \ + (p)->p_ru.kru_counters[kru_nivcsw]; \ + __nsec = (p)->p_ru.kru_counters[kru_runtime]; \ + } while (pc_cons_leave(&(p)->p_ru_lock, &__g) != 0); \ \ - timeradd(&(pr)->ps_cru.ru_utime, \ - &(pr)->ps_cru.ru_stime, &__tv); \ + NSEC_TO_TIMEVAL(__nsec, &__tv); \ (kp)->p_uctime_sec = __tv.tv_sec; \ (kp)->p_uctime_usec = __tv.tv_usec; \ } \ Index: ufs/ext2fs/ext2fs_bmap.c =================================================================== RCS file: /cvs/src/sys/ufs/ext2fs/ext2fs_bmap.c,v retrieving revision 1.28 diff -u -p -r1.28 ext2fs_bmap.c --- ufs/ext2fs/ext2fs_bmap.c 12 Dec 2021 09:14:59 -0000 1.28 +++ ufs/ext2fs/ext2fs_bmap.c 28 Aug 2023 02:55:04 -0000 @@ -227,7 +227,7 @@ ext2fs_bmaparray(struct vnode *vp, daddr bp->b_blkno = blkptrtodb(ump, daddr); bp->b_flags |= B_READ; VOP_STRATEGY(bp->b_vp, bp); - curproc->p_ru.ru_inblock++; /* XXX */ + krusage_inc(curproc, kru_inblock); /* XXX */ bcstats.pendingreads++; if ((error = biowait(bp)) != 0) { brelse(bp); Index: ufs/ext2fs/ext2fs_inode.c =================================================================== RCS file: /cvs/src/sys/ufs/ext2fs/ext2fs_inode.c,v retrieving revision 1.66 diff -u -p -r1.66 ext2fs_inode.c --- ufs/ext2fs/ext2fs_inode.c 12 Aug 2022 14:30:53 -0000 1.66 +++ ufs/ext2fs/ext2fs_inode.c 28 Aug 2023 02:55:04 -0000 @@ -454,7 +454,7 @@ ext2fs_indirtrunc(struct inode *ip, int3 vp = ITOV(ip); bp = getblk(vp, lbn, (int)fs->e2fs_bsize, 0, INFSLP); if (!(bp->b_flags & (B_DONE | B_DELWRI))) { - curproc->p_ru.ru_inblock++; /* pay for read */ + krusage_inc(curproc, kru_inblock); /* pay for read */ bcstats.pendingreads++; bcstats.numreads++; bp->b_flags |= B_READ; Index: ufs/ffs/ffs_inode.c =================================================================== RCS file: /cvs/src/sys/ufs/ffs/ffs_inode.c,v retrieving revision 1.81 diff -u -p -r1.81 ffs_inode.c --- ufs/ffs/ffs_inode.c 12 Dec 2021 09:14:59 -0000 1.81 +++ ufs/ffs/ffs_inode.c 28 Aug 2023 02:55:04 -0000 @@ -481,7 +481,7 @@ ffs_indirtrunc(struct inode *ip, daddr_t vp = ITOV(ip); bp = getblk(vp, lbn, (int)fs->fs_bsize, 0, INFSLP); if (!(bp->b_flags & (B_DONE | B_DELWRI))) { - curproc->p_ru.ru_inblock++; /* pay for read */ + krusage_inc(curproc, kru_inblock); /* pay for read */ bcstats.pendingreads++; bcstats.numreads++; bp->b_flags |= B_READ; Index: ufs/ufs/ufs_bmap.c =================================================================== RCS file: /cvs/src/sys/ufs/ufs/ufs_bmap.c,v retrieving revision 1.37 diff -u -p -r1.37 ufs_bmap.c --- ufs/ufs/ufs_bmap.c 12 Dec 2021 09:14:59 -0000 1.37 +++ ufs/ufs/ufs_bmap.c 28 Aug 2023 02:55:04 -0000 @@ -174,7 +174,7 @@ ufs_bmaparray(struct vnode *vp, daddr_t bcstats.pendingreads++; bcstats.numreads++; VOP_STRATEGY(bp->b_vp, bp); - curproc->p_ru.ru_inblock++; /* XXX */ + krusage_inc(curproc, kru_inblock); /* XXX */ if ((error = biowait(bp)) != 0) { brelse(bp); return (error); Index: uvm/uvm_fault.c =================================================================== RCS file: /cvs/src/sys/uvm/uvm_fault.c,v retrieving revision 1.133 diff -u -p -r1.133 uvm_fault.c --- uvm/uvm_fault.c 4 Nov 2022 09:36:44 -0000 1.133 +++ uvm/uvm_fault.c 28 Aug 2023 02:55:04 -0000 @@ -285,11 +285,7 @@ uvmfault_anonget(struct uvm_faultinfo *u /* Increment the counters.*/ counters_inc(uvmexp_counters, flt_anget); - if (anon->an_page) { - curproc->p_ru.ru_minflt++; - } else { - curproc->p_ru.ru_majflt++; - } + krusage_inc(curproc, anon->an_page ? kru_minflt : kru_majflt); error = 0; /* @@ -491,6 +487,7 @@ uvmfault_update_stats(struct uvm_faultin { struct vm_map *map; struct proc *p; + struct process *pr; vsize_t res; map = ufi->orig_map; @@ -514,8 +511,13 @@ uvmfault_update_stats(struct uvm_faultin /* Convert res from pages to kilobytes. */ res <<= (PAGE_SHIFT - 10); - if (p->p_ru.ru_maxrss < res) - p->p_ru.ru_maxrss = res; + pr = p->p_p; + if (pr->ps_ru_maxrss < res) { + unsigned int gen = pc_mprod_enter(&pr->ps_ru_lock); + if (pr->ps_ru_maxrss < res) + pr->ps_ru_maxrss = res; + pc_mprod_leave(&pr->ps_ru_lock, gen); + } } } @@ -577,6 +579,8 @@ uvm_fault(vm_map_t orig_map, vaddr_t vad struct vm_page *pages[UVM_MAXRANGE]; int error; + LLTRACE(lltrace_trap, LLTRACE_TRAP_PAGEFAULT); + counters_inc(uvmexp_counters, faults); TRACEPOINT(uvm, fault, vaddr, fault_type, access_type, NULL); @@ -641,6 +645,8 @@ uvm_fault(vm_map_t orig_map, vaddr_t vad } } + LLTRACE(lltrace_trapret, LLTRACE_TRAP_PAGEFAULT); + return error; } @@ -1270,12 +1276,12 @@ uvm_fault_lower(struct uvm_faultinfo *uf */ if (uobjpage) { /* update rusage counters */ - curproc->p_ru.ru_minflt++; + krusage_inc(curproc, kru_minflt); } else { int gotpages; /* update rusage counters */ - curproc->p_ru.ru_majflt++; + krusage_inc(curproc, kru_majflt); uvmfault_unlockall(ufi, amap, NULL);