Index: sys/arch/amd64/amd64/conf.c =================================================================== RCS file: /cvs/src/sys/arch/amd64/amd64/conf.c,v diff -u -p -r1.81 conf.c --- sys/arch/amd64/amd64/conf.c 12 Jun 2024 12:54:54 -0000 1.81 +++ sys/arch/amd64/amd64/conf.c 1 Jul 2024 05:41:23 -0000 @@ -137,6 +137,7 @@ cdev_decl(cy); #include "bktr.h" #include "ksyms.h" #include "kstat.h" +#include "llt.h" #include "usb.h" #include "uhid.h" #include "fido.h" @@ -215,7 +216,8 @@ struct cdevsw cdevsw[] = cdev_notdef(), /* 28 was LKM */ cdev_notdef(), /* 29 */ cdev_dt_init(NDT,dt), /* 30: dynamic tracer */ - cdev_notdef(), /* 31 */ + cdev_lltrace_init(NLLT,lltrace), + /* 31: lltrace */ cdev_notdef(), /* 32 */ cdev_notdef(), /* 33 */ cdev_notdef(), /* 34 */ Index: sys/arch/amd64/amd64/intr.c =================================================================== RCS file: /cvs/src/sys/arch/amd64/amd64/intr.c,v diff -u -p -r1.60 intr.c --- sys/arch/amd64/amd64/intr.c 15 Jun 2024 18:01:44 -0000 1.60 +++ sys/arch/amd64/amd64/intr.c 1 Jul 2024 05:41:24 -0000 @@ -45,6 +45,7 @@ #include #include #include +#include #include #include @@ -540,6 +541,9 @@ intr_handler(struct intrframe *frame, st if (intr_suspended && (ih->ih_flags & IPL_WAKEUP) == 0) return 0; + LLTRACE_CPU(ci, lltrace_intr_enter, LLTRACE_INTR_T_HW, + ci->ci_isources[ih->ih_slot]->is_idtvec); + #ifdef MULTIPROCESSOR if (ih->ih_flags & IPL_MPSAFE) need_lock = 0; @@ -549,14 +553,22 @@ intr_handler(struct intrframe *frame, st if (need_lock) __mp_lock(&kernel_lock); #endif + floor = ci->ci_handled_intr_level; ci->ci_handled_intr_level = ih->ih_level; + + LLTRACE_CPU(ci, lltrace_fn_enter, ih->ih_fun); rc = (*ih->ih_fun)(ih->ih_arg ? ih->ih_arg : frame); + LLTRACE_CPU(ci, lltrace_fn_leave, ih->ih_fun); + ci->ci_handled_intr_level = floor; + #ifdef MULTIPROCESSOR if (need_lock) __mp_unlock(&kernel_lock); #endif + LLTRACE_CPU(ci, lltrace_intr_leave, LLTRACE_INTR_T_HW, + ci->ci_isources[ih->ih_slot]->is_idtvec); return rc; } Index: sys/arch/amd64/amd64/ipi.c =================================================================== RCS file: /cvs/src/sys/arch/amd64/amd64/ipi.c,v diff -u -p -r1.18 ipi.c --- sys/arch/amd64/amd64/ipi.c 10 Nov 2022 08:26:54 -0000 1.18 +++ sys/arch/amd64/amd64/ipi.c 1 Jul 2024 05:41:24 -0000 @@ -35,9 +35,10 @@ #include #include #include +#include +#include #include -#include #include #include #include @@ -45,6 +46,8 @@ void x86_send_ipi(struct cpu_info *ci, int ipimask) { + LLTRACE(lltrace_ipi, ci->ci_cpuid); + x86_atomic_setbits_u32(&ci->ci_ipis, ipimask); /* Don't send IPI to cpu which isn't (yet) running. */ @@ -57,6 +60,8 @@ x86_send_ipi(struct cpu_info *ci, int ip int x86_fast_ipi(struct cpu_info *ci, int ipi) { + LLTRACE(lltrace_ipi, ci->ci_cpuid); + if (!(ci->ci_flags & CPUF_RUNNING)) return (ENOENT); @@ -72,6 +77,8 @@ x86_broadcast_ipi(int ipimask) int count = 0; CPU_INFO_ITERATOR cii; + LLTRACE_CPU(self, lltrace_ipi, ~0); + CPU_INFO_FOREACH(cii, ci) { if (ci == self) continue; @@ -95,17 +102,22 @@ x86_ipi_handler(void) int bit; int floor; + LLTRACE_CPU(ci, lltrace_intr_enter, LLTRACE_INTR_T_IPI, 0); + floor = ci->ci_handled_intr_level; ci->ci_handled_intr_level = ci->ci_ilevel; pending = atomic_swap_uint(&ci->ci_ipis, 0); for (bit = 0; bit < X86_NIPI && pending; bit++) { if (pending & (1 << bit)) { - pending &= ~(1 << bit); + LLTRACE_CPU(ci, lltrace_fn_enter, ipifunc[bit]); (*ipifunc[bit])(ci); + LLTRACE_CPU(ci, lltrace_fn_leave, ipifunc[bit]); evcount_inc(&ipi_count); } } ci->ci_handled_intr_level = floor; + + LLTRACE_CPU(ci, lltrace_intr_leave, LLTRACE_INTR_T_IPI, 0); } Index: sys/arch/amd64/amd64/lapic.c =================================================================== RCS file: /cvs/src/sys/arch/amd64/amd64/lapic.c,v diff -u -p -r1.72 lapic.c --- sys/arch/amd64/amd64/lapic.c 3 Apr 2024 02:01:21 -0000 1.72 +++ sys/arch/amd64/amd64/lapic.c 1 Jul 2024 05:41:24 -0000 @@ -37,6 +37,7 @@ #include #include #include +#include #include @@ -476,12 +477,16 @@ lapic_clockintr(void *arg, struct intrfr struct cpu_info *ci = curcpu(); int floor; + LLTRACE_CPU(ci, lltrace_intr_enter, LLTRACE_INTR_T_CLOCK, 0); + floor = ci->ci_handled_intr_level; ci->ci_handled_intr_level = ci->ci_ilevel; clockintr_dispatch(&frame); ci->ci_handled_intr_level = floor; evcount_inc(&clk_count); + + LLTRACE_CPU(ci, lltrace_intr_leave, LLTRACE_INTR_T_CLOCK, 0); } void Index: sys/arch/amd64/amd64/softintr.c =================================================================== RCS file: /cvs/src/sys/arch/amd64/amd64/softintr.c,v diff -u -p -r1.10 softintr.c --- sys/arch/amd64/amd64/softintr.c 11 Sep 2020 09:27:09 -0000 1.10 +++ sys/arch/amd64/amd64/softintr.c 1 Jul 2024 05:41:24 -0000 @@ -37,6 +37,7 @@ #include #include #include +#include #include @@ -82,6 +83,8 @@ softintr_dispatch(int which) struct x86_soft_intrhand *sih; int floor; + LLTRACE_CPU(ci, lltrace_intr_enter, LLTRACE_INTR_T_SW, which); + floor = ci->ci_handled_intr_level; ci->ci_handled_intr_level = ci->ci_ilevel; @@ -99,12 +102,15 @@ softintr_dispatch(int which) uvmexp.softs++; mtx_leave(&si->softintr_lock); - + LLTRACE_CPU(ci, lltrace_fn_enter, sih->sih_fn); (*sih->sih_fn)(sih->sih_arg); + LLTRACE_CPU(ci, lltrace_fn_leave, sih->sih_fn); } KERNEL_UNLOCK(); ci->ci_handled_intr_level = floor; + + LLTRACE_CPU(ci, lltrace_intr_leave, LLTRACE_INTR_T_SW, which); } /* Index: sys/arch/arm64/arm64/conf.c =================================================================== RCS file: /cvs/src/sys/arch/arm64/arm64/conf.c,v diff -u -p -r1.24 conf.c --- sys/arch/arm64/arm64/conf.c 12 Jun 2024 02:50:25 -0000 1.24 +++ sys/arch/arm64/arm64/conf.c 1 Jul 2024 05:41:24 -0000 @@ -91,6 +91,7 @@ cdev_decl(lpt); #include "bktr.h" #include "ksyms.h" #include "kstat.h" +#include "llt.h" #include "usb.h" #include "uhid.h" #include "fido.h" @@ -156,7 +157,8 @@ struct cdevsw cdevsw[] = cdev_notdef(), /* 28 was LKM */ cdev_notdef(), /* 29 */ cdev_dt_init(NDT,dt), /* 30: dynamic tracer */ - cdev_notdef(), /* 31 */ + cdev_lltrace_init(NLLT,lltrace), + /* 31: lltrace */ cdev_notdef(), /* 32 */ cdev_notdef(), /* 33 */ cdev_notdef(), /* 34 */ Index: sys/arch/arm64/dev/agintc.c =================================================================== RCS file: /cvs/src/sys/arch/arm64/dev/agintc.c,v diff -u -p -r1.58 agintc.c --- sys/arch/arm64/dev/agintc.c 23 Jun 2024 21:58:34 -0000 1.58 +++ sys/arch/arm64/dev/agintc.c 1 Jul 2024 05:41:24 -0000 @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -1121,7 +1122,11 @@ agintc_run_handler(struct intrhand *ih, else arg = frame; + LLTRACE(lltrace_irq, ih->ih_ipl == IPL_IPI ? LLTRACE_IRQ_IPI : 0, + ih->ih_irq); handled = ih->ih_func(arg); + LLTRACE(lltrace_irqret, ih->ih_ipl == IPL_IPI ? LLTRACE_IRQ_IPI : 0, + ih->ih_irq); if (handled) ih->ih_count.ec_count++; @@ -1466,6 +1471,8 @@ agintc_send_ipi(struct cpu_info *ci, int { struct agintc_softc *sc = agintc_sc; uint64_t sendmask; + + LLTRACE(lltrace_ipi, ci->ci_cpuid); if (ci == curcpu() && id == ARM_IPI_NOP) return; Index: sys/conf/GENERIC =================================================================== RCS file: /cvs/src/sys/conf/GENERIC,v diff -u -p -r1.296 GENERIC --- sys/conf/GENERIC 5 May 2024 07:26:58 -0000 1.296 +++ sys/conf/GENERIC 1 Jul 2024 05:41:24 -0000 @@ -81,6 +81,7 @@ pseudo-device endrun 1 # EndRun line dis pseudo-device vnd 4 # vnode disk devices pseudo-device ksyms 1 # kernel symbols device pseudo-device kstat # kernel statistics device +pseudo-device llt # low-level tracing device # clonable devices pseudo-device bpfilter # packet filter Index: sys/conf/files =================================================================== RCS file: /cvs/src/sys/conf/files,v diff -u -p -r1.733 files --- sys/conf/files 12 Jun 2024 12:54:54 -0000 1.733 +++ sys/conf/files 1 Jul 2024 05:41:24 -0000 @@ -597,6 +597,9 @@ file dev/ksyms.c ksyms needs-flag pseudo-device kstat file dev/kstat.c kstat needs-flag +pseudo-device llt +file dev/lltrace.c llt needs-flag + pseudo-device fuse file miscfs/fuse/fuse_device.c fuse needs-flag file miscfs/fuse/fuse_file.c fuse Index: sys/dev/lltrace.c =================================================================== RCS file: sys/dev/lltrace.c diff -N sys/dev/lltrace.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/dev/lltrace.c 1 Jul 2024 05:41:24 -0000 @@ -0,0 +1,1082 @@ +/* $OpenBSD$ */ + +/* + * Copyright (c) 2022 The University of Queensland + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * This code was written by David Gwynne as part + * of the Information Technology Infrastructure Group (ITIG) in the + * Faculty of Engineering, Architecture and Information Technology + * (EAIT). + * + * It was heavily inspired by the KUTrace (kernel/userland tracing) + * framework by Richard L. Sites. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +#if defined(__amd64__) || defined(__i386__) + +static inline unsigned int +lltrace_cas(unsigned int *p, unsigned int e, unsigned int n) +{ + __asm volatile("cmpxchgl %2, %1" + : "=a" (e), "=m" (*p) + : "r" (n), "a" (e), "m" (*p)); + + return (e); +} + +static inline uint64_t +lltrace_ts(void) +{ + unsigned int hi, lo; + + __asm volatile("lfence; rdtsc" : "=d" (hi), "=a" (lo)); + + return (lo & (LLTRACE_TS_MASK << LLTRACE_TS_SHIFT)); +} + +static inline uint64_t +lltrace_ts_long(void) +{ + return (rdtsc_lfence() & ~LLTRACE_MASK(LLTRACE_TS_SHIFT)); +} + +#elif defined(__aarch64__) + +#define lltrace_cas(_p, _e, _n) atomic_cas_uint((_p), (_e), (_n)) + +static inline uint64_t +lltrace_ts_long(void) +{ + uint64_t ts; + + __asm volatile("mrs %x0, cntvct_el0" : "=r" (ts)); + + return (ts << LLTRACE_TS_SHIFT); +} + +static inline uint64_t +lltrace_ts(void) +{ + uint64_t ts = ltrace_ts_long(); + + return (ts & (LLTRACE_TS_MASK << LLTRACE_TS_SHIFT)); +} + +#else /* not x86 or arm64 */ + +#error not supported (yet) + +static unsigned int +lltrace_cas(unsigned int *p, unsigned int e, unsigned int n) +{ + unsigned int o; + int s; + + s = intr_disable(); + o = *p; + if (o == e) + *p = n; + intr_restore(s); + + return (o); +} + +static inline uint64_t +lltrace_ts(void) +{ + return (countertime()); +} + +static inline uint64_t +lltrace_ts_long(void) +{ + return (countertime()); +} + +#endif + +#define LLTRACE_MB2NBUF(_mb) \ + (((_mb) * (1U << 20)) / sizeof(struct lltrace_buffer)) +#define LLTRACE_NBUF2MB(_nbuf) \ + (((_nbuf) * sizeof(struct lltrace_buffer)) / (1U << 20)) + +#define LLTRACE_BLEN_DEFAULT 16 + +struct lltrace_cpu { + SIMPLEQ_ENTRY(lltrace_cpu) + llt_entry; + struct lltrace_buffer llt_buffer; + unsigned int llt_slot; + unsigned int llt_pid; + unsigned int llt_tid; + uint64_t llt_wakeid; +}; + +SIMPLEQ_HEAD(lltrace_cpu_list, lltrace_cpu); + +struct lltrace_softc { + unsigned int sc_running; + unsigned int sc_mode; + struct rwlock sc_lock; + unsigned int sc_nbuffers; + + unsigned int sc_free; + unsigned int sc_used; + struct lltrace_cpu **sc_ring; + struct lltrace_cpu *sc_buffers; + + unsigned int sc_read; + unsigned int sc_reading; + struct selinfo sc_sel; + + uint64_t sc_boottime; + uint64_t sc_monotime; +}; + +static int lltrace_start(struct lltrace_softc *, struct proc *); +static int lltrace_stop(struct lltrace_softc *, struct proc *); +static int lltrace_flush(struct lltrace_softc *); + +static struct lltrace_softc *lltrace_sc; + +int +lltattach(int num) +{ + return (0); +} + +int +lltraceopen(dev_t dev, int flag, int mode, struct proc *p) +{ + struct lltrace_softc *sc; + int error; + + if (minor(dev) != 0) + return (ENXIO); + + error = suser(p); + if (error != 0) + return (error); + + if (lltrace_sc != NULL) + return (EBUSY); + + sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_CANFAIL|M_ZERO); + if (sc == NULL) + return (ENOMEM); + + sc->sc_running = 0; + sc->sc_nbuffers = LLTRACE_MB2NBUF(LLTRACE_BLEN_DEFAULT); + + rw_init(&sc->sc_lock, "lltlk"); + + sc->sc_read = 0; + sc->sc_reading = 0; + klist_init_rwlock(&sc->sc_sel.si_note, &sc->sc_lock); + + /* commit */ + if (atomic_cas_ptr(&lltrace_sc, NULL, sc) != NULL) { + free(sc, M_DEVBUF, sizeof(*sc)); + return (EBUSY); + } + + return (0); +} + +int +lltraceclose(dev_t dev, int flag, int mode, struct proc *p) +{ + struct lltrace_softc *sc = lltrace_sc; + + rw_enter_write(&sc->sc_lock); + lltrace_stop(sc, p); + lltrace_flush(sc); + rw_exit_write(&sc->sc_lock); + + lltrace_sc = NULL; + membar_sync(); + + free(sc, M_DEVBUF, sizeof(*sc)); + + return (0); +} + +static int +lltrace_fionread(struct lltrace_softc *sc) +{ + int canread; + + rw_enter_read(&sc->sc_lock); + canread = !sc->sc_running && (sc->sc_buffers != NULL) && + (sc->sc_read < sc->sc_nbuffers); + rw_exit_read(&sc->sc_lock); + + return (canread ? sizeof(struct lltrace_buffer) : 0); +} + +static void +lltrace_cpu_init(struct lltrace_cpu *llt, struct lltrace_softc *sc, + struct cpu_info *ci, unsigned int pid, unsigned int tid, uint64_t wakeid) +{ + struct lltrace_header *llh; + + llh = (struct lltrace_header *)&llt->llt_buffer; + llh->h_cpu = cpu_number(); + llh->h_idletid = ci->ci_schedstate.spc_idleproc->p_tid; + llh->h_boottime = sc->sc_boottime; + llh->h_start_cy = lltrace_ts_long(); + llh->h_start_ns = nsecuptime() - sc->sc_monotime; + llh->h_end_cy = 0; + llh->h_end_ns = 0; + llh->h_pid = pid; + llh->h_tid = tid; + llh->h_zero = 0; + + llt->llt_pid = pid; + llt->llt_tid = tid; + llt->llt_slot = 8; + llt->llt_wakeid = wakeid; +} + +static void +lltrace_cpu_fini(struct lltrace_cpu *llt, struct lltrace_softc *sc) +{ + struct lltrace_header *llh; + + llh = (struct lltrace_header *)&llt->llt_buffer; + llh->h_end_cy = lltrace_ts_long(); + llh->h_end_ns = nsecuptime() - sc->sc_monotime; +} + +static int +lltrace_set_mode(struct lltrace_softc *sc, unsigned int mode) +{ + int error; + + if (mode >= LLTRACE_MODE_COUNT) + return (EINVAL); + + error = rw_enter(&sc->sc_lock, RW_WRITE|RW_INTR); + if (error != 0) + return (error); + + if (sc->sc_running) + error = EBUSY; + else + sc->sc_mode = mode; + + rw_exit(&sc->sc_lock); + return (error); +} + +static int +lltrace_set_blen(struct lltrace_softc *sc, unsigned int blen) +{ + int error; + unsigned int nbuffers; + + if (blen < LLTRACE_BLEN_MIN || blen > LLTRACE_BLEN_MAX) + return (EINVAL); + + /* convert megabytes to the number of buffers */ + nbuffers = LLTRACE_MB2NBUF(blen); + if (nbuffers <= ncpus) + EINVAL; + + error = rw_enter(&sc->sc_lock, RW_WRITE|RW_INTR); + if (error != 0) + return (error); + + if (sc->sc_buffers != NULL) + error = EBUSY; + else + sc->sc_nbuffers = nbuffers; + + rw_exit(&sc->sc_lock); + return (error); +} + +static int +lltrace_start(struct lltrace_softc *sc, struct proc *p) +{ + struct process *ps = p->p_p; + struct bintime boottime; + unsigned int i; + size_t sz; + struct lltrace_cpu_list l = SIMPLEQ_HEAD_INITIALIZER(l); + struct lltrace_cpu *llt; + struct cpu_info *ci; + CPU_INFO_ITERATOR cii; + unsigned int pid, tid; + + if (sc->sc_running) + return EINVAL; + + if (sc->sc_nbuffers <= (ncpus * 2 + 1)) + return (EINVAL); + + lltrace_flush(sc); + + sc->sc_monotime = nsecuptime(); + + binboottime(&boottime); + sc->sc_boottime = BINTIME_TO_NSEC(&boottime) + sc->sc_monotime; + + sz = roundup(sc->sc_nbuffers * sizeof(*sc->sc_buffers), PAGE_SIZE); + sc->sc_buffers = km_alloc(sz, &kv_any, &kp_dirty, &kd_waitok); + if (sc->sc_buffers == NULL) + return (ENOMEM); + sc->sc_ring = mallocarray(sc->sc_nbuffers, sizeof(*sc->sc_ring), + M_DEVBUF, M_WAITOK); + for (i = 0; i < sc->sc_nbuffers; i++) { + llt = &sc->sc_buffers[i]; + llt->llt_slot = 0; + sc->sc_ring[i] = llt; + } + + sc->sc_free = 0; /* next slot to pull a free buffer from */ + sc->sc_used = 0; /* next slot to put a used buffer in */ + + CPU_INFO_FOREACH(cii, ci) { + i = sc->sc_free++; /* can't wrap yet */ + + llt = sc->sc_ring[i]; + sc->sc_ring[i] = NULL; + + SIMPLEQ_INSERT_HEAD(&l, llt, llt_entry); + } + + tid = p->p_tid; + pid = ps->ps_pid; + if (ISSET(ps->ps_flags, PS_SYSTEM)) + pid |= (1U << 31); + + CPU_INFO_FOREACH(cii, ci) { + sched_peg_curproc(ci); + + llt = SIMPLEQ_FIRST(&l); + SIMPLEQ_REMOVE_HEAD(&l, llt_entry); + + lltrace_cpu_init(llt, sc, ci, pid, tid, 0x1); + lltrace_pidname(llt, p); + + membar_producer(); + ci->ci_schedstate.spc_lltrace = llt; + } + atomic_clearbits_int(&p->p_flag, P_CPUPEG); + + sc->sc_running = 1; + + return (0); +} + +static int +lltrace_stop(struct lltrace_softc *sc, struct proc *p) +{ + struct lltrace_cpu *llt; + struct cpu_info *ci; + CPU_INFO_ITERATOR cii; + unsigned long s; + + if (!sc->sc_running) + return (EALREADY); + + sc->sc_running = 0; + + /* visit each cpu to take llt away safely */ + CPU_INFO_FOREACH(cii, ci) { + sched_peg_curproc(ci); + + s = intr_disable(); + llt = ci->ci_schedstate.spc_lltrace; + ci->ci_schedstate.spc_lltrace = NULL; + intr_restore(s); + + lltrace_cpu_fini(llt, sc); + } + atomic_clearbits_int(&p->p_flag, P_CPUPEG); + + return (0); +} + +static int +lltrace_flush(struct lltrace_softc *sc) +{ + size_t sz; + + rw_assert_wrlock(&sc->sc_lock); + if (sc->sc_running) + return (EBUSY); + + if (sc->sc_buffers == NULL) + return (0); + + sz = roundup(sc->sc_nbuffers * sizeof(*sc->sc_buffers), PAGE_SIZE); + km_free(sc->sc_buffers, sz, &kv_any, &kp_dirty); + free(sc->sc_ring, M_DEVBUF, sc->sc_nbuffers * sizeof(*sc->sc_ring)); + + sc->sc_buffers = NULL; + sc->sc_ring = NULL; + sc->sc_read = 0; + + return (0); +} + +int +lltraceioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p) +{ + struct lltrace_softc *sc = lltrace_sc; + int error = 0; + + KERNEL_UNLOCK(); + + switch (cmd) { + case FIONREAD: + *(int *)data = lltrace_fionread(sc); + break; + case FIONBIO: + /* vfs tracks this for us if we let it */ + break; + + case LLTIOCSTART: + error = rw_enter(&sc->sc_lock, RW_WRITE|RW_INTR); + if (error != 0) + break; + error = lltrace_start(sc, p); + rw_exit(&sc->sc_lock); + break; + case LLTIOCSTOP: + error = rw_enter(&sc->sc_lock, RW_WRITE|RW_INTR); + if (error != 0) + break; + error = lltrace_stop(sc, p); + rw_exit(&sc->sc_lock); + break; + case LLTIOCFLUSH: + error = rw_enter(&sc->sc_lock, RW_WRITE|RW_INTR); + if (error != 0) + break; + error = lltrace_flush(sc); + rw_exit(&sc->sc_lock); + break; + + case LLTIOCSBLEN: + error = lltrace_set_blen(sc, *(unsigned int *)data); + break; + case LLTIOCGBLEN: + *(unsigned int *)data = LLTRACE_NBUF2MB(sc->sc_nbuffers); + break; + + case LLTIOCSMODE: + error = lltrace_set_mode(sc, *(unsigned int *)data); + break; + case LLTIOCGMODE: + *(unsigned int *)data = sc->sc_mode; + break; + + default: + error = ENOTTY; + break; + } + + KERNEL_LOCK(); + + return (error); +} + +int +lltraceread(dev_t dev, struct uio *uio, int ioflag) +{ + struct lltrace_softc *sc = lltrace_sc; + struct lltrace_cpu *llt; + unsigned int slot; + int error; + + KERNEL_UNLOCK(); + + error = rw_enter(&sc->sc_lock, RW_WRITE|RW_INTR); + if (error != 0) + goto lock; + + if (sc->sc_running) { + if (ISSET(ioflag, IO_NDELAY)) { + error = EWOULDBLOCK; + goto unlock; + } + + do { + sc->sc_reading++; + error = rwsleep_nsec(&sc->sc_reading, &sc->sc_lock, + PRIBIO|PCATCH, "lltread", INFSLP); + sc->sc_reading--; + if (error != 0) + goto unlock; + } while (sc->sc_running); + } + + if (sc->sc_buffers == NULL) { + error = 0; + goto unlock; + } + + slot = sc->sc_read; + for (;;) { + if (slot >= sc->sc_nbuffers) { + error = 0; + goto unlock; + } + + llt = &sc->sc_buffers[slot]; + KASSERT(llt->llt_slot <= nitems(llt->llt_buffer.llt_slots)); + if (llt->llt_slot > 0) + break; + + slot++; + } + + error = uiomove(&llt->llt_buffer, + llt->llt_slot * sizeof(llt->llt_buffer.llt_slots[0]), uio); + if (error != 0) + goto unlock; + + sc->sc_read = slot + 1; + +unlock: + rw_exit(&sc->sc_lock); +lock: + KERNEL_LOCK(); + return (error); +} + +static void +lltrace_filt_detach(struct knote *kn) +{ + struct lltrace_softc *sc = kn->kn_hook; + + klist_remove(&sc->sc_sel.si_note, kn); +} + +static int +lltrace_filt_event(struct knote *kn, long hint) +{ + struct lltrace_softc *sc = kn->kn_hook; + int canread; + + canread = !sc->sc_running && (sc->sc_buffers != NULL) && + (sc->sc_read < sc->sc_nbuffers); + + kn->kn_data = canread ? sizeof(struct lltrace_buffer) : 0; + + return (canread); +} + +static int +lltrace_filt_modify(struct kevent *kev, struct knote *kn) +{ + struct lltrace_softc *sc = kn->kn_hook; + int active; + + rw_enter_write(&sc->sc_lock); + active = knote_modify_fn(kev, kn, lltrace_filt_event); + rw_exit_write(&sc->sc_lock); + + return (active); +} + +static int +lltrace_filt_process(struct knote *kn, struct kevent *kev) +{ + struct lltrace_softc *sc = kn->kn_hook; + int active; + + rw_enter_write(&sc->sc_lock); + active = knote_process_fn(kn, kev, lltrace_filt_event); + rw_exit_write(&sc->sc_lock); + + return (active); +} + +static const struct filterops lltrace_filtops = { + .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, + .f_attach = NULL, + .f_detach = lltrace_filt_detach, + .f_event = lltrace_filt_event, + .f_modify = lltrace_filt_modify, + .f_process = lltrace_filt_process, +}; + +int +lltracekqfilter(dev_t dev, struct knote *kn) +{ + struct lltrace_softc *sc = lltrace_sc; + struct klist *klist; + + switch (kn->kn_filter) { + case EVFILT_READ: + klist = &sc->sc_sel.si_note; + kn->kn_fop = &lltrace_filtops; + break; + default: + return (EINVAL); + } + + kn->kn_hook = sc; + klist_insert(klist, kn); + + return (0); +} + +static struct lltrace_cpu * +lltrace_next(struct lltrace_cpu *llt) +{ + struct lltrace_softc *sc = lltrace_sc; + struct cpu_info *ci = curcpu(); + struct lltrace_cpu *nllt; + unsigned int slot, oslot, nslot; + + /* check if we were preempted */ + nllt = ci->ci_schedstate.spc_lltrace; + if (nllt != llt) { + /* something preempted us and swapped buffers already */ + return (nllt); + } + + slot = sc->sc_free; + for (;;) { + nslot = slot + 1; + if (nslot > sc->sc_nbuffers) { + if (sc->sc_mode == LLTRACE_MODE_HEAD) + return (NULL); + } + + oslot = atomic_cas_uint(&sc->sc_free, slot, nslot); + if (slot == oslot) + break; + + slot = oslot; + } + + slot %= sc->sc_nbuffers; + nllt = sc->sc_ring[slot]; + sc->sc_ring[slot] = NULL; + + slot = sc->sc_used; + for (;;) { + nslot = slot + 1; + + oslot = atomic_cas_uint(&sc->sc_used, slot, nslot); + if (slot == oslot) + break; + + slot = oslot; + } + + lltrace_cpu_init(nllt, sc, ci, llt->llt_pid, llt->llt_tid, + llt->llt_wakeid); + lltrace_cpu_fini(llt, sc); + + slot %= sc->sc_nbuffers; + sc->sc_ring[slot] = llt; + + ci->ci_schedstate.spc_lltrace = nllt; + + return (nllt); +} + +static struct lltrace_cpu * +lltrace_insert_record(struct lltrace_cpu *llt, uint64_t type, uint64_t record, + const uint64_t *extra, unsigned int n) +{ + unsigned int slot, oslot, nslot; + uint64_t *slots; + + record |= type << LLTRACE_TYPE_SHIFT; + record |= n++ << LLTRACE_LEN_SHIFT; + + slot = llt->llt_slot; + for (;;) { + nslot = slot + n; + if (nslot > nitems(llt->llt_buffer.llt_slots)) { + unsigned long s; + + s = intr_disable(); + llt = lltrace_next(llt); + intr_restore(s); + + if (llt == NULL) + return (NULL); + + slot = llt->llt_slot; + continue; + } + + oslot = lltrace_cas(&llt->llt_slot, slot, nslot); + if (slot == oslot) + break; + + slot = oslot; + } + + slots = llt->llt_buffer.llt_slots + slot; + *slots = record; + while (n > 1) { + *(++slots) = *(extra++); + n--; + } + + return (llt); +} + +static struct lltrace_cpu * +lltrace_insert(struct lltrace_cpu *llt, uint64_t type, uint64_t record, + const uint64_t *extra, unsigned int n) +{ + record |= lltrace_ts(); + return (lltrace_insert_record(llt, type, record, extra, n)); +} + +void +lltrace_statclock(struct lltrace_cpu *llt, int usermode, unsigned long pc) +{ +#if 0 + uint64_t event = usermode ? LLTRACE_EVENT_PC_U : LLTRACE_EVENT_PC_K; + uint64_t extra[1] = { pc }; + + lltrace_insert(llt, (event | nitems(extra)) << LLTRACE_EVENT_SHIFT, + extra, nitems(extra)); +#endif +} + +void +lltrace_syscall(struct lltrace_cpu *llt, register_t code, + size_t argsize, const register_t *args) +{ + uint64_t record = LLTRACE_EVENT_PHASE_START << + LLTRACE_EVENT_PHASE_SHIFT; + record |= LLTRACE_EVENT_CLASS_SYSCALL << + LLTRACE_EVENT_CLASS_SHIFT; + record |= ((uint64_t)code & LLTRACE_SYSCALL_MASK) << + LLTRACE_SYSCALL_SHIFT; + + if (argsize > 0) + record |= (uint64_t)args[0] << LLTRACE_SYSCALL_V_SHIFT; + + lltrace_insert(llt, LLTRACE_TYPE_EVENT, record, NULL, 0); +} + +void +lltrace_sysret(struct lltrace_cpu *llt, register_t code, + int error, const register_t retvals[2]) +{ + uint64_t record; + + record = LLTRACE_EVENT_PHASE_END << + LLTRACE_EVENT_PHASE_SHIFT; + record |= LLTRACE_EVENT_CLASS_SYSCALL << + LLTRACE_EVENT_CLASS_SHIFT; + record |= ((uint64_t)code & LLTRACE_SYSCALL_MASK) << + LLTRACE_SYSCALL_SHIFT; + record |= (uint64_t)error << LLTRACE_SYSCALL_V_SHIFT; + + llt = lltrace_insert(llt, LLTRACE_TYPE_EVENT, record, NULL, 0); + if (llt == NULL) { + struct lltrace_softc *sc = lltrace_sc; + + rw_enter_write(&sc->sc_lock); + if (sc->sc_running) + lltrace_stop(sc, curproc); + + knote_locked(&sc->sc_sel.si_note, 0); + if (sc->sc_reading) + wakeup(&sc->sc_reading); + rw_exit_write(&sc->sc_lock); + } +} + +struct lltrace_cpu * +lltrace_pidname(struct lltrace_cpu *llt, struct proc *p) +{ + struct process *ps = p->p_p; + uint64_t record; + uint64_t extra[3]; + unsigned int l, n; + + CTASSERT(sizeof(extra) == sizeof(ps->ps_comm)); + + record = LLTRACE_ID_TYPE_TID << LLTRACE_ID_TYPE_SHIFT; + record |= (uint64_t)p->p_tid << LLTRACE_ID_TID_SHIFT; + record |= (uint64_t)ps->ps_pid << LLTRACE_ID_TID_PID_SHIFT; + if (ISSET(ps->ps_flags, PS_SYSTEM)) + record |= LLTRACE_ID_TID_SYSTEM; + + extra[0] = extra[1] = extra[2] = 0; /* memset */ + l = strlcpy((char *)extra, p->p_p->ps_comm, sizeof(extra)); + + /* turn the string length into the number of slots we need */ + n = howmany(l, sizeof(uint64_t)); + + return (lltrace_insert_record(llt, LLTRACE_TYPE_ID, record, extra, n)); +} + +void +lltrace_switch(struct lltrace_cpu *llt, struct proc *op, struct proc *np) +{ + struct process *nps = np->p_p; + uint64_t state; + uint64_t record; + unsigned int pid; + unsigned int wake; + + llt = lltrace_pidname(llt, np); + if (llt == NULL) + return; + + record = LLTRACE_EVENT_PHASE_INSTANT << + LLTRACE_EVENT_PHASE_SHIFT; + record |= LLTRACE_EVENT_CLASS_SCHED << + LLTRACE_EVENT_CLASS_SHIFT; + record |= (uint64_t)np->p_tid << LLTRACE_EVENT_DATA_SHIFT; + + /* record what we think the state of the outgoing thread is */ + if (op == NULL) + state = LLTRACE_SCHED_STATE_DEAD; + else if (ISSET(op->p_flag, P_WEXIT)) + state = LLTRACE_SCHED_STATE_DYING; + else if (ISSET(op->p_flag, P_WSLEEP)) + state = LLTRACE_SCHED_STATE_SUSPENDED; + else + state = LLTRACE_SCHED_STATE_BLOCKED; + + record |= (state << LLTRACE_SCHED_STATE_SHIFT); + + pid = nps->ps_pid; + if (ISSET(nps->ps_flags, PS_SYSTEM)) + pid |= (1U << 31); + + llt->llt_pid = pid; + llt->llt_tid = np->p_tid; + + wake = np->p_wakeid != 0; + + lltrace_insert(llt, LLTRACE_TYPE_EVENT, record, &np->p_wakeid, wake); + + if (wake) + np->p_wakeid = 0; +} + +void +lltrace_runnable(struct lltrace_cpu *llt, struct proc *p) +{ + uint64_t record; + uint64_t wakeid; + + llt = lltrace_pidname(llt, p); + if (llt == NULL) + return; + + record = LLTRACE_EVENT_PHASE_INSTANT << + LLTRACE_EVENT_PHASE_SHIFT; + record |= LLTRACE_EVENT_CLASS_WAKE << + LLTRACE_EVENT_CLASS_SHIFT; + record |= (uint64_t)p->p_tid << LLTRACE_EVENT_DATA_SHIFT; + + wakeid = (uint64_t)cpu_number() << 48; + wakeid |= (llt->llt_wakeid += 2) & LLTRACE_MASK(48); + p->p_wakeid = wakeid; + + lltrace_insert(llt, LLTRACE_TYPE_EVENT, record, &p->p_wakeid, 1); +} + +void +lltrace_sched_enter(struct lltrace_cpu *llt) +{ + uint64_t record = LLTRACE_EVENT_PHASE_START << + LLTRACE_EVENT_PHASE_SHIFT; + record |= LLTRACE_EVENT_CLASS_SCHED << + LLTRACE_EVENT_CLASS_SHIFT; + + lltrace_insert(llt, LLTRACE_TYPE_EVENT, record, NULL, 0); +} + +void +lltrace_sched_leave(struct lltrace_cpu *llt) +{ + uint64_t record = LLTRACE_EVENT_PHASE_END << + LLTRACE_EVENT_PHASE_SHIFT; + record |= LLTRACE_EVENT_CLASS_SCHED << + LLTRACE_EVENT_CLASS_SHIFT; + + lltrace_insert(llt, LLTRACE_TYPE_EVENT, record, NULL, 0); +} + +void +lltrace_idle(struct lltrace_cpu *llt, unsigned int idle) +{ + uint64_t record = + (idle ? LLTRACE_EVENT_PHASE_START : LLTRACE_EVENT_PHASE_END) << + LLTRACE_EVENT_PHASE_SHIFT; + record |= LLTRACE_EVENT_CLASS_IDLE << LLTRACE_EVENT_CLASS_SHIFT; + + lltrace_insert(llt, LLTRACE_TYPE_EVENT, record, NULL, 0); +} + +void +lltrace_event_start(struct lltrace_cpu *llt, unsigned int class) +{ + uint64_t record = LLTRACE_EVENT_PHASE_START << + LLTRACE_EVENT_PHASE_SHIFT; + record |= class << LLTRACE_EVENT_CLASS_SHIFT; + + lltrace_insert(llt, LLTRACE_TYPE_EVENT, record, NULL, 0); +} + +void +lltrace_event_end(struct lltrace_cpu *llt, unsigned int class) +{ + uint64_t record = LLTRACE_EVENT_PHASE_END << + LLTRACE_EVENT_PHASE_SHIFT; + record |= class << LLTRACE_EVENT_CLASS_SHIFT; + + lltrace_insert(llt, LLTRACE_TYPE_EVENT, record, NULL, 0); +} + +static inline void +lltrace_intr(struct lltrace_cpu *llt, uint64_t phase, + uint64_t type, uint64_t data) +{ + uint64_t record = phase << LLTRACE_EVENT_PHASE_SHIFT; + record |= LLTRACE_EVENT_CLASS_INTR << LLTRACE_EVENT_CLASS_SHIFT; + record |= type << LLTRACE_INTR_T_SHIFT; + record |= data << LLTRACE_INTR_DATA_SHIFT; + + lltrace_insert(llt, LLTRACE_TYPE_EVENT, record, NULL, 0); +} + +void +lltrace_ipi(struct lltrace_cpu *llt, unsigned int cpu) +{ + lltrace_intr(llt, LLTRACE_EVENT_PHASE_INSTANT, + LLTRACE_INTR_T_IPI, cpu); +} + +void +lltrace_intr_enter(struct lltrace_cpu *llt, unsigned int type, unsigned int vec) +{ + lltrace_intr(llt, LLTRACE_EVENT_PHASE_START, type, vec); +} + +void +lltrace_intr_leave(struct lltrace_cpu *llt, unsigned int type, unsigned int vec) +{ + lltrace_intr(llt, LLTRACE_EVENT_PHASE_END, type, vec); +} + +void +lltrace_lock(struct lltrace_cpu *llt, void *lock, + unsigned int type, unsigned int step) +{ + uint64_t record = (uint64_t)type << LLTRACE_LK_TYPE_SHIFT; + record |= (uint64_t)step << LLTRACE_LK_PHASE_SHIFT; + record |= (uint64_t)lock << LLTRACE_LK_ADDR_SHIFT; + + lltrace_insert(llt, LLTRACE_TYPE_LOCKING, record, NULL, 0); +} + +void +lltrace_count(struct lltrace_cpu *llt, unsigned int t, unsigned int v) +{ + uint64_t record; + + record = LLTRACE_EVENT_PHASE_INSTANT << LLTRACE_EVENT_PHASE_SHIFT; + record |= LLTRACE_EVENT_CLASS_COUNT << LLTRACE_EVENT_CLASS_SHIFT; + record |= (uint64_t)t << LLTRACE_COUNT_T_SHIFT; + record |= (uint64_t)v << LLTRACE_COUNT_V_SHIFT; + + lltrace_insert(llt, LLTRACE_TYPE_EVENT, record, NULL, 0); +} + +void +lltrace_mark(struct lltrace_cpu *llt) +{ +#if 0 + uint64_t record = LLTRACE_EVENT_MARK << LLTRACE_EVENT_SHIFT; + + lltrace_insert(llt, record, NULL, 0); +#endif +} + +static void +lltrace_fn(struct lltrace_cpu *llt, unsigned int phase, void *fn) +{ + uint64_t record = (uint64_t)phase << LLTRACE_EVENT_PHASE_SHIFT; + record |= LLTRACE_EVENT_CLASS_FUNC << LLTRACE_EVENT_CLASS_SHIFT; + /* 32 bits is enough to identify most symbols */ + record |= (uint64_t)fn << LLTRACE_EVENT_DATA_SHIFT; + + lltrace_insert(llt, LLTRACE_TYPE_EVENT, record, NULL, 0); +} + +void +lltrace_fn_enter(struct lltrace_cpu *llt, void *fn) +{ + lltrace_fn(llt, LLTRACE_EVENT_PHASE_START, fn); +} + +void +lltrace_fn_leave(struct lltrace_cpu *llt, void *fn) +{ + lltrace_fn(llt, LLTRACE_EVENT_PHASE_END, fn); +} + +void +__cyg_profile_func_enter(void *fn, void *pc) +{ + struct lltrace_cpu *llt; + + llt = lltrace_enter(); + if (llt == NULL) + return; + + lltrace_fn_enter(llt, fn); +} + +void +__cyg_profile_func_exit(void *fn, void *pc) +{ + struct lltrace_cpu *llt; + + llt = lltrace_enter(); + if (llt == NULL) + return; + + lltrace_fn_leave(llt, fn); +} Index: sys/kern/kern_clockintr.c =================================================================== RCS file: /cvs/src/sys/kern/kern_clockintr.c,v diff -u -p -r1.70 kern_clockintr.c --- sys/kern/kern_clockintr.c 25 Feb 2024 19:15:50 -0000 1.70 +++ sys/kern/kern_clockintr.c 1 Jul 2024 05:41:24 -0000 @@ -30,6 +30,7 @@ #include #include #include +#include void clockintr_cancel_locked(struct clockintr *); void clockintr_hardclock(struct clockrequest *, void *, void *); @@ -209,7 +210,9 @@ clockintr_dispatch(void *frame) cq->cq_running = cl; mtx_leave(&cq->cq_mtx); + LLTRACE_CPU(ci, lltrace_fn_enter, func); func(request, frame, arg); + LLTRACE_CPU(ci, lltrace_fn_leave, func); mtx_enter(&cq->cq_mtx); cq->cq_running = NULL; Index: sys/kern/kern_exec.c =================================================================== RCS file: /cvs/src/sys/kern/kern_exec.c,v diff -u -p -r1.255 kern_exec.c --- sys/kern/kern_exec.c 2 Apr 2024 08:39:16 -0000 1.255 +++ sys/kern/kern_exec.c 1 Jul 2024 05:41:24 -0000 @@ -49,6 +49,7 @@ #include #include #include +#include #include #include #include @@ -542,6 +543,8 @@ sys_execve(struct proc *p, void *v, regi memset(pr->ps_comm, 0, sizeof(pr->ps_comm)); strlcpy(pr->ps_comm, nid.ni_cnd.cn_nameptr, sizeof(pr->ps_comm)); pr->ps_acflag &= ~AFORK; + + LLTRACE(lltrace_pidname, p); /* record proc's vnode, for use by sysctl */ otvp = pr->ps_textvp; Index: sys/kern/kern_lock.c =================================================================== RCS file: /cvs/src/sys/kern/kern_lock.c,v diff -u -p -r1.74 kern_lock.c --- sys/kern/kern_lock.c 29 May 2024 18:55:45 -0000 1.74 +++ sys/kern/kern_lock.c 1 Jul 2024 05:41:24 -0000 @@ -24,6 +24,7 @@ #include #include #include +#include #include @@ -129,6 +130,7 @@ __mp_lock(struct __mp_lock *mpl) { struct __mp_lock_cpu *cpu = &mpl->mpl_cpus[cpu_number()]; unsigned long s; + unsigned int depth; #ifdef WITNESS if (!__mp_lock_held(mpl, curcpu())) @@ -136,15 +138,22 @@ __mp_lock(struct __mp_lock *mpl) LOP_EXCLUSIVE | LOP_NEWORDER, NULL); #endif + s = intr_disable(); - if (cpu->mplc_depth++ == 0) + depth = cpu->mplc_depth++; + if (depth == 0) { + LLTRACE(lltrace_lock, mpl, LLTRACE_LK_K, LLTRACE_LK_A_START); cpu->mplc_ticket = atomic_inc_int_nv(&mpl->mpl_users); + } intr_restore(s); __mp_lock_spin(mpl, cpu->mplc_ticket); membar_enter_after_atomic(); WITNESS_LOCK(&mpl->mpl_lock_obj, LOP_EXCLUSIVE); + + if (depth == 0) + LLTRACE(lltrace_lock, mpl, LLTRACE_LK_K, LLTRACE_LK_A_EXCL); } void @@ -164,6 +173,7 @@ __mp_unlock(struct __mp_lock *mpl) s = intr_disable(); if (--cpu->mplc_depth == 0) { + LLTRACE(lltrace_lock, mpl, LLTRACE_LK_K, LLTRACE_LK_R_EXCL); membar_exit(); mpl->mpl_ticket++; } @@ -180,6 +190,8 @@ __mp_release_all(struct __mp_lock *mpl) int i; #endif + LLTRACE(lltrace_lock, mpl, LLTRACE_LK_K, LLTRACE_LK_R_EXCL); + s = intr_disable(); rv = cpu->mplc_depth; #ifdef WITNESS @@ -251,29 +263,60 @@ __mtx_init(struct mutex *mtx, int wantip void mtx_enter(struct mutex *mtx) { - struct schedstate_percpu *spc = &curcpu()->ci_schedstate; + struct cpu_info *owner, *ci = curcpu(); + struct schedstate_percpu *spc = &ci->ci_schedstate; + int s; #ifdef MP_LOCKDEBUG int nticks = __mp_lock_spinout; #endif +#if NLLT > 0 + unsigned int lltev = LLTRACE_LK_I_EXCL; +#endif WITNESS_CHECKORDER(MUTEX_LOCK_OBJECT(mtx), LOP_EXCLUSIVE | LOP_NEWORDER, NULL); - spc->spc_spinning++; - while (mtx_enter_try(mtx) == 0) { + if (mtx->mtx_wantipl != IPL_NONE) + s = splraise(mtx->mtx_wantipl); + + owner = atomic_cas_ptr(&mtx->mtx_owner, NULL, ci); +#ifdef DIAGNOSTIC + if (__predict_false(owner == ci)) + panic("mtx %p: locking against myself", mtx); +#endif + if (owner != NULL) { + LLTRACE_SPC(spc, lltrace_lock, mtx, LLTRACE_LK_MTX, + LLTRACE_LK_A_START); + + spc->spc_spinning++; do { - CPU_BUSY_CYCLE(); + do { + CPU_BUSY_CYCLE(); #ifdef MP_LOCKDEBUG - if (--nticks == 0) { - db_printf("%s: %p lock spun out\n", - __func__, mtx); - db_enter(); - nticks = __mp_lock_spinout; - } + if (--nticks == 0) { + db_printf("%s: %p lock spun out\n", + __func__, mtx); + db_enter(); + nticks = __mp_lock_spinout; + } +#endif + } while (mtx->mtx_owner != NULL); + } while (atomic_cas_ptr(&mtx->mtx_owner, NULL, ci) != NULL); + spc->spc_spinning--; + +#if NLLT > 0 + lltev = LLTRACE_LK_A_EXCL; #endif - } while (mtx->mtx_owner != NULL); } - spc->spc_spinning--; + + membar_enter_after_atomic(); + if (mtx->mtx_wantipl != IPL_NONE) + mtx->mtx_oldipl = s; +#ifdef DIAGNOSTIC + ci->ci_mutex_level++; +#endif + WITNESS_LOCK(MUTEX_LOCK_OBJECT(mtx), LOP_EXCLUSIVE); + LLTRACE_SPC(spc, lltrace_lock, mtx, LLTRACE_LK_MTX, lltev); } int @@ -302,12 +345,15 @@ mtx_enter_try(struct mutex *mtx) ci->ci_mutex_level++; #endif WITNESS_LOCK(MUTEX_LOCK_OBJECT(mtx), LOP_EXCLUSIVE); + LLTRACE_CPU(ci, lltrace_lock, mtx, LLTRACE_LK_MTX, + LLTRACE_LK_I_EXCL); return (1); } if (mtx->mtx_wantipl != IPL_NONE) splx(s); + LLTRACE_CPU(ci, lltrace_lock, mtx, LLTRACE_LK_MTX, LLTRACE_LK_I_FAIL); return (0); } #else @@ -337,6 +383,7 @@ mtx_enter(struct mutex *mtx) ci->ci_mutex_level++; #endif WITNESS_LOCK(MUTEX_LOCK_OBJECT(mtx), LOP_EXCLUSIVE); + LLTRACE(lltrace_lock, mtx, LLTRACE_LK_MTX, LLTRACE_LK_I_EXCL); } int @@ -357,6 +404,7 @@ mtx_leave(struct mutex *mtx) return; MUTEX_ASSERT_LOCKED(mtx); + LLTRACE(lltrace_lock, mtx, LLTRACE_LK_MTX, LLTRACE_LK_R_EXCL); WITNESS_UNLOCK(MUTEX_LOCK_OBJECT(mtx), LOP_EXCLUSIVE); #ifdef DIAGNOSTIC Index: sys/kern/kern_rwlock.c =================================================================== RCS file: /cvs/src/sys/kern/kern_rwlock.c,v diff -u -p -r1.50 kern_rwlock.c --- sys/kern/kern_rwlock.c 14 Jul 2023 07:07:08 -0000 1.50 +++ sys/kern/kern_rwlock.c 1 Jul 2024 05:41:24 -0000 @@ -25,6 +25,7 @@ #include #include #include +#include void rw_do_exit(struct rwlock *, unsigned long); @@ -110,6 +111,7 @@ rw_enter_read(struct rwlock *rwl) membar_enter_after_atomic(); WITNESS_CHECKORDER(&rwl->rwl_lock_obj, LOP_NEWORDER, NULL); WITNESS_LOCK(&rwl->rwl_lock_obj, 0); + LLTRACE(lltrace_lock, rwl, LLTRACE_LK_RW, LLTRACE_LK_I_SHARED); } } @@ -126,6 +128,7 @@ rw_enter_write(struct rwlock *rwl) WITNESS_CHECKORDER(&rwl->rwl_lock_obj, LOP_EXCLUSIVE | LOP_NEWORDER, NULL); WITNESS_LOCK(&rwl->rwl_lock_obj, LOP_EXCLUSIVE); + LLTRACE(lltrace_lock, rwl, LLTRACE_LK_RW, LLTRACE_LK_I_EXCL); } } @@ -135,6 +138,7 @@ rw_exit_read(struct rwlock *rwl) unsigned long owner; rw_assert_rdlock(rwl); + LLTRACE(lltrace_lock, rwl, LLTRACE_LK_RW, LLTRACE_LK_R_SHARED); WITNESS_UNLOCK(&rwl->rwl_lock_obj, 0); membar_exit_before_atomic(); @@ -150,6 +154,7 @@ rw_exit_write(struct rwlock *rwl) unsigned long owner; rw_assert_wrlock(rwl); + LLTRACE(lltrace_lock, rwl, LLTRACE_LK_RW, LLTRACE_LK_R_SHARED); WITNESS_UNLOCK(&rwl->rwl_lock_obj, LOP_EXCLUSIVE); membar_exit_before_atomic(); @@ -249,6 +254,7 @@ rw_enter(struct rwlock *rwl, int flags) op = &rw_ops[(flags & RW_OPMASK) - 1]; inc = op->inc + RW_PROC(curproc) * op->proc_mult; + LLTRACE(lltrace_lock, rwl, LLTRACE_LK_RW, LLTRACE_LK_A_START); retry: while (__predict_false(((o = rwl->rwl_owner) & op->check) != 0)) { unsigned long set = o | op->wait_set; @@ -272,8 +278,10 @@ retry: rw_enter_diag(rwl, flags); - if (flags & RW_NOSLEEP) - return (EBUSY); + if (flags & RW_NOSLEEP) { + error = EBUSY; + goto abort; + } prio = op->wait_prio; if (flags & RW_INTR) @@ -285,15 +293,28 @@ retry: error = sleep_finish(0, do_sleep); if ((flags & RW_INTR) && (error != 0)) - return (error); - if (flags & RW_SLEEPFAIL) - return (EAGAIN); + goto abort; + if (flags & RW_SLEEPFAIL) { + error = EAGAIN; + goto abort; + } } if (__predict_false(rw_cas(&rwl->rwl_owner, o, o + inc))) goto retry; membar_enter_after_atomic(); + if (flags & RW_DOWNGRADE) { + WITNESS_DOWNGRADE(&rwl->rwl_lock_obj, lop_flags); + LLTRACE(lltrace_lock, rwl, LLTRACE_LK_RW, + LLTRACE_LK_DOWNGRADE); + } else { + WITNESS_LOCK(&rwl->rwl_lock_obj, lop_flags); + LLTRACE(lltrace_lock, rwl, LLTRACE_LK_RW, + ISSET(flags, RW_WRITE) ? + LLTRACE_LK_A_EXCL : LLTRACE_LK_A_SHARED); + } + /* * If old lock had RWLOCK_WAIT and RWLOCK_WRLOCK set, it means we * downgraded a write lock and had possible read waiter, wake them @@ -303,12 +324,10 @@ retry: (RWLOCK_WRLOCK|RWLOCK_WAIT))) wakeup(rwl); - if (flags & RW_DOWNGRADE) - WITNESS_DOWNGRADE(&rwl->rwl_lock_obj, lop_flags); - else - WITNESS_LOCK(&rwl->rwl_lock_obj, lop_flags); - return (0); +abort: + LLTRACE(lltrace_lock, rwl, LLTRACE_LK_RW, LLTRACE_LK_A_ABORT); + return (error); } void @@ -325,6 +344,8 @@ rw_exit(struct rwlock *rwl) rw_assert_wrlock(rwl); else rw_assert_rdlock(rwl); + LLTRACE(lltrace_lock, rwl, LLTRACE_LK_RW, + wrlock ? LLTRACE_LK_R_EXCL : LLTRACE_LK_R_SHARED); WITNESS_UNLOCK(&rwl->rwl_lock_obj, wrlock ? LOP_EXCLUSIVE : 0); membar_exit_before_atomic(); Index: sys/kern/kern_sched.c =================================================================== RCS file: /cvs/src/sys/kern/kern_sched.c,v diff -u -p -r1.96 kern_sched.c --- sys/kern/kern_sched.c 3 Jun 2024 12:48:25 -0000 1.96 +++ sys/kern/kern_sched.c 1 Jul 2024 05:41:24 -0000 @@ -191,7 +191,10 @@ sched_idle(void *v) wakeup(spc); } #endif + + LLTRACE(lltrace_idle, 1); cpu_idle_cycle(); + LLTRACE(lltrace_idle, 0); } cpu_idle_leave(); cpuset_del(&sched_idle_cpus, ci); @@ -250,6 +253,7 @@ sched_toidle(void) uvmexp.swtch++; TRACEPOINT(sched, off__cpu, idle->p_tid + THREAD_PID_OFFSET, idle->p_p->ps_pid); + LLTRACE(lltrace_switch, NULL, idle); cpu_switchto(NULL, idle); panic("cpu_switchto returned"); } @@ -595,6 +599,7 @@ sched_proc_to_cpu_cost(struct cpu_info * if (cpuset_isset(&sched_queued_cpus, ci)) cost += spc->spc_nrun * sched_cost_runnable; +#if 0 /* * Try to avoid the primary cpu as it handles hardware interrupts. * @@ -603,6 +608,7 @@ sched_proc_to_cpu_cost(struct cpu_info * */ if (CPU_IS_PRIMARY(ci)) cost += sched_cost_runnable; +#endif /* * If the proc is on this cpu already, lower the cost by how much Index: sys/kern/kern_sensors.c =================================================================== RCS file: /cvs/src/sys/kern/kern_sensors.c,v diff -u -p -r1.40 kern_sensors.c --- sys/kern/kern_sensors.c 5 Dec 2022 23:18:37 -0000 1.40 +++ sys/kern/kern_sensors.c 1 Jul 2024 05:41:24 -0000 @@ -27,6 +27,7 @@ #include #include #include +#include #include #include "hotplug.h" @@ -260,8 +261,11 @@ sensor_task_work(void *xst) atomic_inc_int(&sensors_running); rw_enter_write(&st->lock); period = st->period; - if (period > 0 && !sensors_quiesced) + if (period > 0 && !sensors_quiesced) { + LLTRACE(lltrace_fn_enter, st->func); st->func(st->arg); + LLTRACE(lltrace_fn_leave, st->func); + } rw_exit_write(&st->lock); if (atomic_dec_int_nv(&sensors_running) == 0) { if (sensors_quiesced) Index: sys/kern/kern_synch.c =================================================================== RCS file: /cvs/src/sys/kern/kern_synch.c,v diff -u -p -r1.205 kern_synch.c --- sys/kern/kern_synch.c 3 Jun 2024 12:48:25 -0000 1.205 +++ sys/kern/kern_synch.c 1 Jul 2024 05:41:24 -0000 @@ -37,6 +37,8 @@ * @(#)kern_synch.c 8.6 (Berkeley) 1/21/94 */ +#include "llt.h" + #include #include #include @@ -522,6 +524,7 @@ unsleep(struct proc *p) p->p_wmesg = NULL; TRACEPOINT(sched, unsleep, p->p_tid + THREAD_PID_OFFSET, p->p_p->ps_pid); + LLTRACE(lltrace_runnable, p); } } @@ -558,6 +561,7 @@ wakeup_n(const volatile void *ident, int TAILQ_REMOVE(&wakeq, p, p_runq); TRACEPOINT(sched, unsleep, p->p_tid + THREAD_PID_OFFSET, p->p_p->ps_pid); + LLTRACE(lltrace_runnable, p); if (p->p_stat == SSLEEP) setrunnable(p); } Index: sys/kern/kern_task.c =================================================================== RCS file: /cvs/src/sys/kern/kern_task.c,v diff -u -p -r1.35 kern_task.c --- sys/kern/kern_task.c 14 May 2024 08:26:13 -0000 1.35 +++ sys/kern/kern_task.c 1 Jul 2024 05:41:24 -0000 @@ -24,6 +24,7 @@ #include #include #include +#include #include "kcov.h" #if NKCOV > 0 @@ -443,7 +444,9 @@ taskq_thread(void *xtq) #if NKCOV > 0 kcov_remote_enter(KCOV_REMOTE_COMMON, work.t_process); #endif + LLTRACE(lltrace_fn_enter, work.t_func); (*work.t_func)(work.t_arg); + LLTRACE(lltrace_fn_leave, work.t_func); #if NKCOV > 0 kcov_remote_leave(KCOV_REMOTE_COMMON, work.t_process); #endif Index: sys/kern/kern_timeout.c =================================================================== RCS file: /cvs/src/sys/kern/kern_timeout.c,v diff -u -p -r1.97 kern_timeout.c --- sys/kern/kern_timeout.c 23 Feb 2024 16:51:39 -0000 1.97 +++ sys/kern/kern_timeout.c 1 Jul 2024 05:41:24 -0000 @@ -35,6 +35,7 @@ #include /* _Q_INVALIDATE */ #include #include +#include #ifdef DDB #include @@ -661,7 +662,9 @@ timeout_run(struct timeout *to) #if NKCOV > 0 kcov_remote_enter(KCOV_REMOTE_COMMON, kcov_process); #endif + LLTRACE(lltrace_fn_enter, fn); fn(arg); + LLTRACE(lltrace_fn_leave, fn); #if NKCOV > 0 kcov_remote_leave(KCOV_REMOTE_COMMON, kcov_process); #endif @@ -739,6 +742,8 @@ softclock(void *arg) int need_proc_mp; #endif + //LLTRACE(lltrace_irq, LLTRACE_IRQ_BOTTOM_HALF, 0); + first_new = NULL; new = 0; @@ -773,6 +778,8 @@ softclock(void *arg) if (need_proc_mp) wakeup(&timeout_proc_mp); #endif + + //LLTRACE(lltrace_irqret, LLTRACE_IRQ_BOTTOM_HALF, 0); } void Index: sys/kern/sched_bsd.c =================================================================== RCS file: /cvs/src/sys/kern/sched_bsd.c,v diff -u -p -r1.93 sched_bsd.c --- sys/kern/sched_bsd.c 3 Jun 2024 12:48:25 -0000 1.93 +++ sys/kern/sched_bsd.c 1 Jul 2024 05:41:24 -0000 @@ -351,6 +351,8 @@ mi_switch(void) int hold_count; #endif + LLTRACE(lltrace_sched_enter); + KASSERT(p->p_stat != SONPROC); SCHED_ASSERT_LOCKED(); @@ -411,14 +413,19 @@ mi_switch(void) uvmexp.swtch++; TRACEPOINT(sched, off__cpu, nextproc->p_tid + THREAD_PID_OFFSET, nextproc->p_p->ps_pid); + LLTRACE(lltrace_switch, p, nextproc); cpu_switchto(p, nextproc); TRACEPOINT(sched, on__cpu, NULL); + + //LLTRACE(lltrace_pidname, p); } else { TRACEPOINT(sched, remain__cpu, NULL); p->p_stat = SONPROC; } clear_resched(curcpu()); + + LLTRACE(lltrace_sched_leave); SCHED_ASSERT_LOCKED(); Index: sys/sys/conf.h =================================================================== RCS file: /cvs/src/sys/sys/conf.h,v diff -u -p -r1.163 conf.h --- sys/sys/conf.h 11 Jun 2024 01:49:17 -0000 1.163 +++ sys/sys/conf.h 1 Jul 2024 05:41:24 -0000 @@ -326,6 +326,21 @@ extern struct cdevsw cdevsw[]; (dev_type_stop((*))) enodev, 0, \ (dev_type_mmap((*))) enodev } +/* open, close, read, ioctl, poll, kqfilter */ +#define cdev_lltrace_init(c,n) { \ + .d_open = dev_init(c,n,open), \ + .d_close = dev_init(c,n,close), \ + .d_read = dev_init(c,n,read), \ + .d_write = (dev_type_write((*))) enodev, \ + .d_ioctl = dev_init(c,n,ioctl), \ + .d_stop = (dev_type_stop((*))) enodev, \ + .d_tty = NULL, \ + .d_mmap = (dev_type_mmap((*))) enodev, \ + .d_type = 0, \ + .d_flags = 0, \ + .d_kqfilter = dev_init(c,n,kqfilter), \ +} + /* open, close, read, write, ioctl, stop, tty, mmap, kqfilter */ #define cdev_wsdisplay_init(c,n) { \ dev_init(c,n,open), dev_init(c,n,close), dev_init(c,n,read), \ @@ -615,6 +630,7 @@ cdev_decl(wsmux); cdev_decl(ksyms); cdev_decl(kstat); +cdev_decl(lltrace); cdev_decl(bio); cdev_decl(vscsi); Index: sys/sys/lltrace.h =================================================================== RCS file: sys/sys/lltrace.h diff -N sys/sys/lltrace.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/sys/lltrace.h 1 Jul 2024 05:41:24 -0000 @@ -0,0 +1,297 @@ +/* $OpenBSD$ */ + +/* + * Copyright (c) 2022 David Gwynne + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef _SYS_LLTRACE_H_ +#define _SYS_LLTRACE_H_ + +/* + * lltrace is heavily based KUTrace (kernel/userland tracing) by + * Richard L. Sites. + */ + +#define LLTRACE_NSLOTS 8192 + +struct lltrace_buffer { + uint64_t llt_slots[LLTRACE_NSLOTS]; +}; + +#define LLTIOCSTART _IO('t',128) +#define LLTIOCSTOP _IO('t',129) +#define LLTIOCFLUSH _IO('t',130) + +/* + * trace until all the buffers are used, or trace and reuse buffers. + */ +#define LLTRACE_MODE_HEAD 0 +#define LLTRACE_MODE_TAIL 1 +#define LLTRACE_MODE_COUNT 2 + +#define LLTIOCSMODE _IOW('t', 131, unsigned int) +#define LLTIOCGMODE _IOR('t', 131, unsigned int) + +/* + * how much memory in MB to allocate for lltrace_buffer structs + * during tracing. + */ + +#define LLTRACE_BLEN_MIN 1 +#define LLTRACE_BLEN_MAX 128 + +#define LLTIOCSBLEN _IOW('t', 132, unsigned int) +#define LLTIOCGBLEN _IOR('t', 132, unsigned int) + +/* + * lltrace collects kernel events in per-CPU buffers. + */ + +/* + * The first 8 words of the per-CPU buffer are dedicated to metadata + * about the CPU and the period of time over which events were + * collected. + */ + +struct lltrace_header { + /* slots[0] */ + uint32_t h_cpu; + uint32_t h_idletid; + + /* slots[1] */ + uint64_t h_boottime; + + /* slots[2] */ + uint64_t h_start_cy; + /* slots[3] */ + uint64_t h_start_ns; + /* slots[4] */ + uint64_t h_end_cy; + /* slots[5] */ + uint64_t h_end_ns; + + /* slots[6] */ + uint32_t h_pid; + uint32_t h_tid; + /* slots[7] */ + uint64_t h_zero; +}; + +#define LLTRACE_MASK(_w) ((1ULL << (_w)) - 1) + +#define LLTRACE_TYPE_SHIFT 0 +#define LLTRACE_TYPE_WIDTH 3 +#define LLTRACE_TYPE_MASK LLTRACE_MASK(LLTRACE_TYPE_WIDTH) + +#define LLTRACE_TYPE_ID 0x0ULL +#define LLTRACE_TYPE_EVENT 0x1ULL +#define LLTRACE_TYPE_LOCKING 0x2ULL + +#define LLTRACE_LEN_SHIFT (LLTRACE_TYPE_SHIFT + LLTRACE_TYPE_WIDTH) +#define LLTRACE_LEN_WIDTH 3 +#define LLTRACE_LEN_MASK LLTRACE_MASK(LLTRACE_LEN_WIDTH) + +/* most records have a timestamp */ +#define LLTRACE_TS_TYPES ( \ + (1 << LLTRACE_TYPE_EVENT) | \ + (1 << LLTRACE_TYPE_LOCKING) \ + ) + +#define LLTRACE_TS_SHIFT (LLTRACE_LEN_SHIFT + LLTRACE_LEN_WIDTH) +#define LLTRACE_TS_WIDTH 20 +#define LLTRACE_TS_MASK LLTRACE_MASK(20) + +/* + * id records + */ + +/* tid record contains pid and kthread flag, followed by proc name */ +#define LLTRACE_ID_TYPE_SHIFT (LLTRACE_LEN_SHIFT + LLTRACE_LEN_WIDTH) +#define LLTRACE_ID_TYPE_WIDTH 6 +#define LLTRACE_ID_TYPE_MASK LLTRACE_MASK(3) +#define LLTRACE_ID_TYPE_TID 0x0 + +#define LLTRACE_ID_TID_SHIFT (LLTRACE_ID_TYPE_SHIFT + LLTRACE_ID_TYPE_WIDTH) +#define LLTRACE_ID_TID_WIDTH 20 /* >= than 19 bit TID_MASK */ +#define LLTRACE_ID_TID_MASK LLTRACE_MASK(LLTRACE_ID_TID_WIDTH) + +#define LLTRACE_ID_TID_PID_SHIFT 32 +#define LLTRACE_ID_TID_PID_WIDTH 20 /* >= whatever kernel pid range is */ +#define LLTRACE_ID_TID_PID_MASK LLTRACE_MASK(LLTRACE_ID_TID_PID_WIDTH) +#define LLTRACE_ID_TID_SYSTEM (1ULL << 63) /* kernel thread */ + +/* + * event records + */ + +#define LLTRACE_EVENT_PHASE_SHIFT (LLTRACE_TS_SHIFT + LLTRACE_TS_WIDTH) +#define LLTRACE_EVENT_PHASE_WIDTH 2 +#define LLTRACE_EVENT_PHASE_MASK LLTRACE_MASK(LLTRACE_EVENT_PHASE_WIDTH) +#define LLTRACE_EVENT_PHASE_INSTANT 0x0 +#define LLTRACE_EVENT_PHASE_START 0x1 +#define LLTRACE_EVENT_PHASE_STEP 0x2 +#define LLTRACE_EVENT_PHASE_END 0x3 + +#define LLTRACE_EVENT_CLASS_WIDTH 4 +#define LLTRACE_EVENT_CLASS_SHIFT \ + (LLTRACE_EVENT_PHASE_SHIFT + LLTRACE_EVENT_PHASE_WIDTH) +#define LLTRACE_EVENT_CLASS_MASK LLTRACE_MASK(LLTRACE_EVENT_CLASS_WIDTH) +#define LLTRACE_EVENT_CLASS_SYSCALL 0 +#define LLTRACE_EVENT_CLASS_IDLE 1 +#define LLTRACE_EVENT_CLASS_PAGEFAULT 2 +#define LLTRACE_EVENT_CLASS_INTR 3 +#define LLTRACE_EVENT_CLASS_SCHED 4 +#define LLTRACE_EVENT_CLASS_FUNC 5 +#define LLTRACE_EVENT_CLASS_WAKE 6 +#define LLTRACE_EVENT_CLASS_COUNT 7 + +#define LLTRACE_EVENT_DATA_SHIFT \ + (LLTRACE_EVENT_CLASS_SHIFT + LLTRACE_EVENT_CLASS_WIDTH) +#define LLTRACE_EVENT_DATA_SHIFT_CHECK 32 + +#define LLTRACE_SYSCALL_SHIFT LLTRACE_EVENT_DATA_SHIFT +#define LLTRACE_SYSCALL_WIDTH 10 +#define LLTRACE_SYSCALL_MASK LLTRACE_MASK(LLTRACE_SYSCALL_WIDTH) + +#define LLTRACE_SCHED_TID_SHIFT LLTRACE_EVENT_DATA_SHIFT +#define LLTRACE_SCHED_TID_WIDTH LLTRACE_ID_TID_WIDTH +#define LLTRACE_SCHED_TID_MASK LLTRACE_MASK(LLTRACE_SCHED_TID_WIDTH) +#define LLTRACE_SCHED_STATE_SHIFT \ + (LLTRACE_EVENT_DATA_SHIFT + LLTRACE_ID_TID_WIDTH) +#define LLTRACE_SCHED_STATE_WIDTH 4 +#define LLTRACE_SCHED_STATE_MASK LLTRACE_MASK(LLTRACE_SCHED_STATE_WIDTH) +#define LLTRACE_SCHED_STATE_NEW 0 +#define LLTRACE_SCHED_STATE_RUNNING 1 +#define LLTRACE_SCHED_STATE_SUSPENDED 2 +#define LLTRACE_SCHED_STATE_BLOCKED 3 +#define LLTRACE_SCHED_STATE_DYING 4 +#define LLTRACE_SCHED_STATE_DEAD 5 + +#define LLTRACE_SYSCALL_V_SHIFT \ + (LLTRACE_SYSCALL_SHIFT + LLTRACE_SYSCALL_WIDTH) + +#define LLTRACE_INTR_T_SHIFT LLTRACE_EVENT_DATA_SHIFT +#define LLTRACE_INTR_T_WIDTH 2 +#define LLTRACE_INTR_T_MASK LLTRACE_MASK(LLTRACE_INTR_T_WIDTH) +#define LLTRACE_INTR_T_HW 0ULL +#define LLTRACE_INTR_T_SW 1ULL +#define LLTRACE_INTR_T_IPI 2ULL +#define LLTRACE_INTR_T_CLOCK 3ULL + +#define LLTRACE_INTR_DATA_SHIFT \ + (LLTRACE_INTR_T_SHIFT + LLTRACE_INTR_T_WIDTH) + +/* record a count of something */ +#define LLTRACE_COUNT_T_SHIFT LLTRACE_EVENT_DATA_SHIFT +#define LLTRACE_COUNT_T_WIDTH 8 +#define LLTRACE_COUNT_T_MASK LLTRACE_MASK(LLTRACE_COUNT_T_WIDTH) + +#define LLTRACE_COUNT_T_PKTS_IFIQ 0 +#define LLTRACE_COUNT_T_PKTS_NETTQ 1 +#define LLTRACE_COUNT_T_PKTS_IFQ 2 +#define LLTRACE_COUNT_T_PKTS_QDROP 3 +#define LLTRACE_COUNT_T_PKTS_HDROP 4 + +#define LLTRACE_COUNT_V_SHIFT \ + (LLTRACE_COUNT_T_SHIFT + LLTRACE_COUNT_T_WIDTH) + +/* + * locking records + */ + +#define LLTRACE_LK_TYPE_SHIFT (LLTRACE_TS_SHIFT + LLTRACE_TS_WIDTH) +#define LLTRACE_LK_TYPE_WIDTH 2 +#define LLTRACE_LK_TYPE_MASK LLTRACE_MASK(LLTRACE_LK_TYPE_WIDTH) +#define LLTRACE_LK_RW 0x0 +#define LLTRACE_LK_MTX 0x1 +#define LLTRACE_LK_K 0x2 + +#define LLTRACE_LK_PHASE_SHIFT \ + (LLTRACE_LK_TYPE_SHIFT + LLTRACE_LK_TYPE_WIDTH) +#define LLTRACE_LK_PHASE_WIDTH 4 +#define LLTRACE_LK_PHASE_MASK LLTRACE_MASK(LLTRACE_LK_PHASE_WIDTH) +#define LLTRACE_LK_I_EXCL 0x0 /* instantly got wr lock */ +#define LLTRACE_LK_I_SHARED 0x1 /* instantly got rd lock */ +#define LLTRACE_LK_A_START 0x2 /* acquiring lock */ +#define LLTRACE_LK_A_EXCL 0x3 /* acquired wr lock */ +#define LLTRACE_LK_A_SHARED 0x4 /* acquired rd lock */ +#define LLTRACE_LK_A_ABORT 0x5 /* acquire aborted */ +#define LLTRACE_LK_DOWNGRADE 0x6 /* wr to rd lock */ +#define LLTRACE_LK_R_EXCL 0x7 /* released wr lock */ +#define LLTRACE_LK_R_SHARED 0x8 /* released rd lock */ +#define LLTRACE_LK_I_FAIL 0x9 /* try failed */ + +#define LLTRACE_LK_ADDR_SHIFT \ + (LLTRACE_LK_PHASE_SHIFT + LLTRACE_LK_PHASE_WIDTH) + +#ifdef _KERNEL + +struct lltrace_cpu; + +static inline struct lltrace_cpu * +lltrace_enter_spc(struct schedstate_percpu *spc) +{ + return (READ_ONCE(spc->spc_lltrace)); +} + +static inline struct lltrace_cpu * +lltrace_enter_cpu(struct cpu_info *ci) +{ + return lltrace_enter_spc(&ci->ci_schedstate); +} + +static inline struct lltrace_cpu * +lltrace_enter(void) +{ + return lltrace_enter_cpu(curcpu()); +} + +void lltrace_idle(struct lltrace_cpu *, unsigned int); +void lltrace_statclock(struct lltrace_cpu *, int, unsigned long); + +void lltrace_syscall(struct lltrace_cpu *, register_t, + size_t, const register_t *); +void lltrace_sysret(struct lltrace_cpu *, register_t, + int, const register_t [2]); +struct lltrace_cpu * + lltrace_pidname(struct lltrace_cpu *, struct proc *); +void lltrace_switch(struct lltrace_cpu *, struct proc *, struct proc *); +void lltrace_sched_enter(struct lltrace_cpu *); +void lltrace_sched_leave(struct lltrace_cpu *); +void lltrace_runnable(struct lltrace_cpu *, struct proc *); + +void lltrace_event_start(struct lltrace_cpu *, unsigned int); +void lltrace_event_end(struct lltrace_cpu *, unsigned int); +void lltrace_count(struct lltrace_cpu *, unsigned int, unsigned int); + +void lltrace_lock(struct lltrace_cpu *, void *, unsigned int, unsigned int); + +void lltrace_pkts(struct lltrace_cpu *, unsigned int, unsigned int); +void lltrace_mark(struct lltrace_cpu *); + +void lltrace_fn_enter(struct lltrace_cpu *, void *); +void lltrace_fn_leave(struct lltrace_cpu *, void *); + +/* MD bits */ + +void lltrace_ipi(struct lltrace_cpu *, unsigned int); +#define lltrace_ipi_bcast(_llt) lltrace_ipi((_llt), ~0U); + +void lltrace_intr_enter(struct lltrace_cpu *, unsigned int, unsigned int); +void lltrace_intr_leave(struct lltrace_cpu *, unsigned int, unsigned int); + +#endif /* _KERNEL */ + +#endif /* _SYS_LLTRACE_H_ */ Index: sys/sys/proc.h =================================================================== RCS file: /cvs/src/sys/sys/proc.h,v diff -u -p -r1.361 proc.h --- sys/sys/proc.h 20 May 2024 10:32:20 -0000 1.361 +++ sys/sys/proc.h 1 Jul 2024 05:41:24 -0000 @@ -355,6 +355,7 @@ struct proc { /* scheduling */ int p_cpticks; /* Ticks of cpu time. */ + uint64_t p_wakeid; /* [S] */ const volatile void *p_wchan; /* [S] Sleep address. */ struct timeout p_sleep_to;/* timeout for tsleep() */ const char *p_wmesg; /* [S] Reason for sleep. */ Index: sys/sys/sched.h =================================================================== RCS file: /cvs/src/sys/sys/sched.h,v diff -u -p -r1.72 sched.h --- sys/sys/sched.h 3 Jun 2024 12:48:25 -0000 1.72 +++ sys/sys/sched.h 1 Jul 2024 05:41:24 -0000 @@ -101,11 +101,13 @@ struct cpustats { #define SCHED_NQS 32 /* 32 run queues. */ struct smr_entry; +struct lltrace_cpu; /* * Per-CPU scheduler state. */ struct schedstate_percpu { + struct lltrace_cpu *spc_lltrace; struct proc *spc_idleproc; /* idle proc for this cpu */ TAILQ_HEAD(prochead, proc) spc_qs[SCHED_NQS]; LIST_HEAD(,proc) spc_deadproc; Index: sys/sys/syscall_mi.h =================================================================== RCS file: /cvs/src/sys/sys/syscall_mi.h,v diff -u -p -r1.34 syscall_mi.h --- sys/sys/syscall_mi.h 2 Jun 2024 15:31:57 -0000 1.34 +++ sys/sys/syscall_mi.h 1 Jul 2024 05:41:24 -0000 @@ -157,6 +157,7 @@ mi_syscall(struct proc *p, register_t co KERNEL_UNLOCK(); } #endif + LLTRACE_CPU(p->p_cpu, lltrace_syscall, code, callp->sy_argsize, argp); /* SP must be within MAP_STACK space */ if (!uvm_map_inentry(p, &p->p_spinentry, PROC_STACK(p), @@ -190,6 +191,7 @@ static inline void mi_syscall_return(struct proc *p, register_t code, int error, const register_t retval[2]) { + LLTRACE_CPU(p->p_cpu, lltrace_sysret, code, error, retval); #ifdef SYSCALL_DEBUG KERNEL_LOCK(); scdebug_ret(p, code, error, retval); @@ -217,12 +219,13 @@ mi_syscall_return(struct proc *p, regist static inline void mi_child_return(struct proc *p) { -#if defined(SYSCALL_DEBUG) || defined(KTRACE) || NDT > 0 +#if defined(SYSCALL_DEBUG) || defined(KTRACE) || NDT > 0 || NLLT > 0 int code = (p->p_flag & P_THREAD) ? SYS___tfork : (p->p_p->ps_flags & PS_PPWAIT) ? SYS_vfork : SYS_fork; const register_t child_retval[2] = { 0, 1 }; #endif + LLTRACE_CPU(p->p_cpu, lltrace_sysret, code, 0, child_retval); TRACEPOINT(sched, on__cpu, NULL); #ifdef SYSCALL_DEBUG Index: sys/sys/tracepoint.h =================================================================== RCS file: /cvs/src/sys/sys/tracepoint.h,v diff -u -p -r1.2 tracepoint.h --- sys/sys/tracepoint.h 28 Jun 2022 09:32:28 -0000 1.2 +++ sys/sys/tracepoint.h 1 Jul 2024 05:41:24 -0000 @@ -34,5 +34,33 @@ #define TRACEINDEX(func, index, args...) #endif /* NDT > 0 */ + +#include "llt.h" +#if NLLT > 0 +#include + +#define LLTRACE_SPC(_spc, _fn, ...) { \ + struct lltrace_cpu *_llt = lltrace_enter_spc((_spc)); \ + if (_llt != NULL) \ + (_fn)(_llt __VA_OPT__(,) __VA_ARGS__); \ +} while (0) + +#define LLTRACE_CPU(_ci, _fn, ...) { \ + struct lltrace_cpu *_llt = lltrace_enter_cpu((_ci)); \ + if (_llt != NULL) \ + (_fn)(_llt __VA_OPT__(,) __VA_ARGS__); \ +} while (0) + +#define LLTRACE(_fn, ...) { \ + struct lltrace_cpu *_llt = lltrace_enter(); \ + if (_llt != NULL) \ + (_fn)(_llt __VA_OPT__(,) __VA_ARGS__); \ +} while (0) + +#else /* NLLT > 0 */ + +#define LLTRACE(_fn, ...) + +#endif /* NLLT > 0 */ #endif /* _KERNEL */ #endif /* _SYS_TRACEPOINT_H_ */ Index: sys/uvm/uvm_fault.c =================================================================== RCS file: /cvs/src/sys/uvm/uvm_fault.c,v diff -u -p -r1.135 uvm_fault.c --- sys/uvm/uvm_fault.c 5 Sep 2023 05:08:26 -0000 1.135 +++ sys/uvm/uvm_fault.c 1 Jul 2024 05:41:24 -0000 @@ -576,6 +576,8 @@ uvm_fault(vm_map_t orig_map, vaddr_t vad struct vm_page *pages[UVM_MAXRANGE]; int error; + LLTRACE(lltrace_event_start, LLTRACE_EVENT_CLASS_PAGEFAULT); + counters_inc(uvmexp_counters, faults); TRACEPOINT(uvm, fault, vaddr, fault_type, access_type, NULL); @@ -639,6 +641,8 @@ uvm_fault(vm_map_t orig_map, vaddr_t vad } } } + + LLTRACE(lltrace_event_end, LLTRACE_EVENT_CLASS_PAGEFAULT); return error; } Index: usr.bin/lltextract/Makefile =================================================================== RCS file: usr.bin/lltextract/Makefile diff -N usr.bin/lltextract/Makefile --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ usr.bin/lltextract/Makefile 1 Jul 2024 05:41:24 -0000 @@ -0,0 +1,12 @@ +PROG= lltextract +SRCS= lltextract.c syscallnames.c names.c +SRCS+= heap.c +MAN= + +SYS_DIR= ${.CURDIR}/../../sys +CFLAGS+= -I${SYS_DIR} + +DEBUG= -g +WARNINGS= Yes + +.include Index: usr.bin/lltextract/fxt.h =================================================================== RCS file: usr.bin/lltextract/fxt.h diff -N usr.bin/lltextract/fxt.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ usr.bin/lltextract/fxt.h 1 Jul 2024 05:41:24 -0000 @@ -0,0 +1,30 @@ +#define FXT_T_METADATA 0 /* Metadata */ +#define FXT_T_INIT 1 /* Initialization */ +#define FXT_T_STRING 2 /* String */ +#define FXT_T_THREAD 3 /* Thread */ +#define FXT_T_EVENT 4 /* Event */ +#define FXT_T_BLOB 5 /* Blob */ +#define FXT_T_UOBJ 6 /* Userspace object */ +#define FXT_T_KOBJ 7 /* Kernel object */ +#define FXT_T_SCHED 8 /* Scheduling */ +#define FXT_T_LBLOB 15 /* Large BLOB */ + +#define FXT_H_TYPE_SHIFT 0 +#define FXT_H_TYPE_BITS 4 +#define FXT_H_SIZE_SHIFT 4 +#define FXT_H_SIZE_BITS 12 + +#define FXT_MAX_WORDS (1ULL << 12) + +#define FXT_RECORD(_type, _size) \ + (((_type) << FXT_H_TYPE_SHIFT) | ((_size) << FXT_H_SIZE_SHIFT)) + +#define FXT_H_METADATA_TYPE_SHIFT 16 +#define FXT_H_METADATA_TYPE_BITS 4 + +#define FXT_MD_RECORD(_size, _mdtype) (FXT_RECORD(FXT_T_METADATA, (_size)) | \ + ((_mdtype) << FXT_H_METADATA_TYPE_SHIFT)) + +#define FXT_INIT_MAGIC 0x0016547846040010 +#define FXT_INIT_RECORD(_f) FXT_RECORD(FXT_T_INIT, 2), (_f) + Index: usr.bin/lltextract/heap.c =================================================================== RCS file: usr.bin/lltextract/heap.c diff -N usr.bin/lltextract/heap.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ usr.bin/lltextract/heap.c 1 Jul 2024 05:41:24 -0000 @@ -0,0 +1,204 @@ +/* */ + +/* + * Copyright (c) 2017 David Gwynne + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include "heap.h" +#include + +static inline struct _heap_entry * +heap_n2e(const struct _heap_type *t, void *node) +{ + unsigned long addr = (unsigned long)node; + + return ((struct _heap_entry *)(addr + t->t_offset)); +} + +static inline void * +heap_e2n(const struct _heap_type *t, struct _heap_entry *he) +{ + unsigned long addr = (unsigned long)he; + + return ((void *)(addr - t->t_offset)); +} + +static struct _heap_entry * +_heap_entry_merge(const struct _heap_type *t, + struct _heap_entry *he1, struct _heap_entry *he2) +{ + struct _heap_entry *hi, *lo; + struct _heap_entry *child; + + if (he1 == NULL) + return (he2); + if (he2 == NULL) + return (he1); + + if (t->t_compare(heap_e2n(t, he1), heap_e2n(t, he2)) >= 0) { + hi = he1; + lo = he2; + } else { + lo = he1; + hi = he2; + } + + child = lo->he_child; + + hi->he_left = lo; + hi->he_nextsibling = child; + if (child != NULL) + child->he_left = hi; + lo->he_child = hi; + lo->he_left = NULL; + lo->he_nextsibling = NULL; + + return (lo); +} + +static inline void +_heap_sibling_remove(struct _heap_entry *he) +{ + if (he->he_left == NULL) + return; + + if (he->he_left->he_child == he) { + if ((he->he_left->he_child = he->he_nextsibling) != NULL) + he->he_nextsibling->he_left = he->he_left; + } else { + if ((he->he_left->he_nextsibling = he->he_nextsibling) != NULL) + he->he_nextsibling->he_left = he->he_left; + } + + he->he_left = NULL; + he->he_nextsibling = NULL; +} + +static inline struct _heap_entry * +_heap_2pass_merge(const struct _heap_type *t, struct _heap_entry *root) +{ + struct _heap_entry *node, *next = NULL; + struct _heap_entry *tmp, *list = NULL; + + node = root->he_child; + if (node == NULL) + return (NULL); + + root->he_child = NULL; + + /* first pass */ + for (next = node->he_nextsibling; next != NULL; + next = (node != NULL ? node->he_nextsibling : NULL)) { + tmp = next->he_nextsibling; + node = _heap_entry_merge(t, node, next); + + /* insert head */ + node->he_nextsibling = list; + list = node; + node = tmp; + } + + /* odd child case */ + if (node != NULL) { + node->he_nextsibling = list; + list = node; + } + + /* second pass */ + while (list->he_nextsibling != NULL) { + tmp = list->he_nextsibling->he_nextsibling; + list = _heap_entry_merge(t, list, list->he_nextsibling); + list->he_nextsibling = tmp; + } + + list->he_left = NULL; + list->he_nextsibling = NULL; + + return (list); +} + +void +_heap_insert(const struct _heap_type *t, struct _heap *h, void *node) +{ + struct _heap_entry *he = heap_n2e(t, node); + + he->he_left = NULL; + he->he_child = NULL; + he->he_nextsibling = NULL; + + h->h_root = _heap_entry_merge(t, h->h_root, he); +} + +void +_heap_remove(const struct _heap_type *t, struct _heap *h, void *node) +{ + struct _heap_entry *he = heap_n2e(t, node); + + if (he->he_left == NULL) { + _heap_extract(t, h); + return; + } + + _heap_sibling_remove(he); + h->h_root = _heap_entry_merge(t, h->h_root, _heap_2pass_merge(t, he)); +} + +void +_heap_merge(const struct _heap_type *t, struct _heap *h1, struct _heap *h2) +{ + h1->h_root = _heap_entry_merge(t, h1->h_root, h2->h_root); +} + +void * +_heap_first(const struct _heap_type *t, struct _heap *h) +{ + struct _heap_entry *first = h->h_root; + + if (first == NULL) + return (NULL); + + return (heap_e2n(t, first)); +} + +void * +_heap_extract(const struct _heap_type *t, struct _heap *h) +{ + struct _heap_entry *first = h->h_root; + + if (first == NULL) + return (NULL); + + h->h_root = _heap_2pass_merge(t, first); + + return (heap_e2n(t, first)); +} + +void * +_heap_cextract(const struct _heap_type *t, struct _heap *h, const void *key) +{ + struct _heap_entry *first = h->h_root; + void *node; + + if (first == NULL) + return (NULL); + + node = heap_e2n(t, first); + if (t->t_compare(node, key) > 0) + return (NULL); + + h->h_root = _heap_2pass_merge(t, first); + + return (node); +} Index: usr.bin/lltextract/heap.h =================================================================== RCS file: usr.bin/lltextract/heap.h diff -N usr.bin/lltextract/heap.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ usr.bin/lltextract/heap.h 1 Jul 2024 05:41:24 -0000 @@ -0,0 +1,140 @@ +/* */ + +/* + * Copyright (c) 2017 David Gwynne + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef _HEAP_H_ +#define _HEAP_H_ + +#include + +struct _heap_type { + int (*t_compare)(const void *, const void *); + unsigned int t_offset; /* offset of heap_entry in type */ +}; + +struct _heap_entry { + struct _heap_entry *he_left; + struct _heap_entry *he_child; + struct _heap_entry *he_nextsibling; +}; +#define HEAP_ENTRY(_entry) struct _heap_entry + +struct _heap { + struct _heap_entry *h_root; +}; + +#define HEAP_HEAD(_name) \ +struct _name { \ + struct _heap heap; \ +} + +static inline void +_heap_init(struct _heap *h) +{ + h->h_root = NULL; +} + +static inline int +_heap_empty(struct _heap *h) +{ + return (h->h_root == NULL); +} + +void _heap_insert(const struct _heap_type *, struct _heap *, void *); +void _heap_remove(const struct _heap_type *, struct _heap *, void *); +void _heap_merge(const struct _heap_type *, struct _heap *, struct _heap *); +void *_heap_first(const struct _heap_type *, struct _heap *); +void *_heap_extract(const struct _heap_type *, struct _heap *); +void *_heap_cextract(const struct _heap_type *, struct _heap *, + const void *); + +#define HEAP_INITIALIZER(_head) { { NULL } } + +#define HEAP_PROTOTYPE(_name, _type) \ +extern const struct _heap_type *const _name##_HEAP_TYPE; \ + \ +__unused static inline void \ +_name##_HEAP_INIT(struct _name *head) \ +{ \ + _heap_init(&head->heap); \ +} \ + \ +__unused static inline void \ +_name##_HEAP_INSERT(struct _name *head, struct _type *elm) \ +{ \ + _heap_insert(_name##_HEAP_TYPE, &head->heap, elm); \ +} \ + \ +__unused static inline void \ +_name##_HEAP_REMOVE(struct _name *head, struct _type *elm) \ +{ \ + _heap_remove(_name##_HEAP_TYPE, &head->heap, elm); \ +} \ + \ +__unused static inline struct _type * \ +_name##_HEAP_FIRST(struct _name *head) \ +{ \ + return _heap_first(_name##_HEAP_TYPE, &head->heap); \ +} \ + \ +__unused static inline void \ +_name##_HEAP_MERGE(struct _name *head1, struct _name *head2) \ +{ \ + _heap_merge(_name##_HEAP_TYPE, &head1->heap, &head2->heap); \ +} \ + \ +__unused static inline struct _type * \ +_name##_HEAP_EXTRACT(struct _name *head) \ +{ \ + return _heap_extract(_name##_HEAP_TYPE, &head->heap); \ +} \ + \ +__unused static inline struct _type * \ +_name##_HEAP_CEXTRACT(struct _name *head, const struct _type *key) \ +{ \ + return _heap_cextract(_name##_HEAP_TYPE, &head->heap, key); \ +} \ + \ +__unused static inline int \ +_name##_HEAP_EMPTY(struct _name *head) \ +{ \ + return _heap_empty(&head->heap); \ +} + +#define HEAP_GENERATE(_name, _type, _field, _cmp) \ +static int \ +_name##_HEAP_COMPARE(const void *lptr, const void *rptr) \ +{ \ + const struct _type *l = lptr, *r = rptr; \ + return _cmp(l, r); \ +} \ +static const struct _heap_type _name##_HEAP_INFO = { \ + _name##_HEAP_COMPARE, \ + offsetof(struct _type, _field), \ +}; \ +const struct _heap_type *const _name##_HEAP_TYPE = &_name##_HEAP_INFO + +#define HEAP_INIT(_name, _h) _name##_HEAP_INIT((_h)) +#define HEAP_INSERT(_name, _h, _e) _name##_HEAP_INSERT((_h), (_e)) +#define HEAP_REMOVE(_name, _h, _e) _name##_HEAP_REMOVE((_h), (_e)) +#define HEAP_FIRST(_name, _h) _name##_HEAP_FIRST((_h)) +#define HEAP_MERGE(_name, _h1, _h2) _name##_HEAP_MERGE((_h1), (_h2)) +#define HEAP_EXTRACT(_name, _h) _name##_HEAP_EXTRACT((_h)) +#define HEAP_CEXTRACT(_name, _h, _k) _name##_HEAP_CEXTRACT((_h), (_k)) +#define HEAP_EMPTY(_name, _h) _name##_HEAP_EMPTY((_h)) + +#endif /* _HEAP_H_ */ Index: usr.bin/lltextract/lltextract.c =================================================================== RCS file: usr.bin/lltextract/lltextract.c diff -N usr.bin/lltextract/lltextract.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ usr.bin/lltextract/lltextract.c 1 Jul 2024 05:41:24 -0000 @@ -0,0 +1,1823 @@ +/* $OpenBSD */ + +/* + * Copyright (c) 2022 The University of Queensland + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * + * Copyright 2021 Richard L. Sites + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include /* for SYS_MAXSYSCALL */ +#include /* for _MAXCOMLEN */ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include "heap.h" + +#include "fxt.h" +#include "lltextract.h" + +#ifndef nitems +#define nitems(_a) (sizeof((_a)) / sizeof((_a)[0])) +#endif + +#ifndef ISSET +#define ISSET(_a, _b) ((_a) & (_b)) +#endif + +#define THREAD_PID_OFFSET 100000 + +struct cytime { + uint64_t base_cy; + uint64_t base_ns; + uint64_t base_cy10; + uint64_t base_ns10; + + double slope; +}; + +struct ring { + uint64_t slots[8192]; +}; + +static void lltextract(size_t, const struct ring *); + +struct llt_pid { + /* this knows a lot about process names in the kernel */ + union { + uint64_t words[3]; + char str[_MAXCOMLEN]; + } _ps_comm; +#define ps_comm _ps_comm.str +#define ps_comm64 _ps_comm.words + unsigned int ps_comm_n; + unsigned int ps_strid; + + unsigned int ps_pid; + unsigned int ps_system; + uint64_t ps_fxtid; + + uint64_t ps_ts; + RBT_ENTRY(llt_pid) ps_entry; +}; + +RBT_HEAD(llt_pid_tree, llt_pid); + +static inline int +llt_pid_cmp(const struct llt_pid *a, const struct llt_pid *b) +{ + if (a->ps_pid > b->ps_pid) + return (1); + if (a->ps_pid < b->ps_pid) + return (-1); + return (0); +} + +struct llt_tid { + struct llt_pid *p_p; + unsigned int p_strid; + //unsigned int p_thrid; + unsigned int p_tid; + uint64_t p_fxtid; + + RBT_ENTRY(llt_tid) p_entry; +}; + +RBT_HEAD(llt_tid_tree, llt_tid); + +static inline int +llt_tid_cmp(const struct llt_tid *a, const struct llt_tid *b) +{ + if (a->p_tid > b->p_tid) + return (1); + if (a->p_tid < b->p_tid) + return (-1); + return (0); +} + +RBT_PROTOTYPE(llt_pid_tree, llt_pid, ps_entry, llt_pid_cmp); +RBT_PROTOTYPE(llt_tid_tree, llt_tid, p_entry, llt_tid_cmp); + +struct lltx_fxt_record { + HEAP_ENTRY(lltx_fxt_record) + entry; + uint64_t ts; + unsigned int n; + + /* followed by n * uint64_ts */ +}; + +HEAP_HEAD(lltx_fxt_heap); + +HEAP_PROTOTYPE(lltx_fxt_heap, lltx_fxt_record); + +__dead static void +usage(void) +{ + extern char *__progname; + + fprintf(stderr, "usage: %s [-v] -i infile -o outfile\n", + __progname); + + exit(1); +} + +static const uint64_t fxt_magic[] = { FXT_INIT_MAGIC }; +static const uint64_t fxt_init[2] = { FXT_INIT_RECORD(1000000000ULL) }; + +static FILE *ifile = stdin; +static FILE *ofile = stdout; +static int verbose = 0; + +static struct llt_pid_tree lltx_pids = RBT_INITIALIZER(); +static struct llt_tid_tree lltx_tids = RBT_INITIALIZER(); + +static void lltx_kobj_bsd(void); +static unsigned int lltx_str(const char *); + +static unsigned int lltx_strids; +static unsigned int lltx_strid_process; +static unsigned int lltx_strid_sched; +static unsigned int lltx_strid_wakeup; +static unsigned int lltx_strid_woken; +static unsigned int lltx_strid_unknown; +static unsigned int lltx_strid_acquire; +static unsigned int lltx_strid_symbol; +static unsigned int lltx_strid_offset; +static unsigned int lltx_strid_count; + +static const char str_process[] = "process"; +static const char str_sched[] = "sched"; +static const char str_wakeup[] = "wakeup"; +static const char str_woken[] = "woken"; +static const char str_unknown[] = "unknown"; +static const char str_acquire[] = "acquire"; +static const char str_symbol[] = "symbol"; +static const char str_offset[] = "offset"; +static const char str_count[] = "count"; + +static const char *str_locks[1 << LLTRACE_LK_TYPE_WIDTH] = { + [LLTRACE_LK_RW] = "rwlock", + [LLTRACE_LK_MTX] = "mutex", + [LLTRACE_LK_K] = "kernel", +}; +static unsigned int lltx_strids_locks[1 << LLTRACE_LK_TYPE_WIDTH]; + +static const char *str_lock_ops[1 << LLTRACE_LK_PHASE_WIDTH] = { + [LLTRACE_LK_I_EXCL] = "instant-exclusive", + [LLTRACE_LK_I_SHARED] = "instant-shared", + [LLTRACE_LK_A_START] = "acquire-start", + [LLTRACE_LK_A_EXCL] = "acquired-exclusive", + [LLTRACE_LK_A_SHARED] = "acquired-shared", + [LLTRACE_LK_A_ABORT] = "acquire-abort", + [LLTRACE_LK_DOWNGRADE] = "downgrade", + [LLTRACE_LK_R_EXCL] = "release-exclusive", + [LLTRACE_LK_R_SHARED] = "release-shared", + [LLTRACE_LK_I_FAIL] = "instant-fail", +}; +static unsigned int lltx_strids_lock_ops[1 << LLTRACE_LK_PHASE_WIDTH]; + +static struct lltx_fxt_heap lltx_records = HEAP_INITIALIZER(); + +static void +fxt_insert(uint64_t ts, const uint64_t *atoms, unsigned int n) +{ + struct lltx_fxt_record *r; + uint64_t *dst; + unsigned int i; + + r = malloc(sizeof(*r) + (sizeof(*atoms) * n)); + if (r == NULL) + err(1, "fxt_insert"); + + r->ts = ts; + r->n = n; + dst = (uint64_t *)(r + 1); + for (i = 0; i < n; i++) + dst[i] = atoms[i]; + + HEAP_INSERT(lltx_fxt_heap, &lltx_records, r); +} + +static struct lltx_fxt_record * +fxt_extract(void) +{ + return (HEAP_EXTRACT(lltx_fxt_heap, &lltx_records)); +} + +int +main(int argc, char *argv[]) +{ + const char *ifname = NULL; + const char *ofname = NULL; + const char *ofmode = "wx"; + struct ring ring; + size_t block = 0; + size_t rv; + size_t i; + + int ch; + + while ((ch = getopt(argc, argv, "fi:o:v")) != -1) { + switch (ch) { + case 'f': + ofmode = "w"; + break; + case 'i': + ifname = optarg; + break; + case 'o': + ofname = optarg; + break; + case 'v': + verbose++; + break; + default: + usage(); + /* NOTREACHED */ + } + } + + argc -= optind; + argv += optind; + + if (argc != 0) + usage(); + + if (ifname == NULL) + warnx("input file not specified"); + if (ofname == NULL) + warnx("output file not specified"); + if (ifname == NULL || ofname == NULL) + usage(); + + ifile = fopen(ifname, "r"); + if (ifile == NULL) + err(1, "%s", ifname); + + ofile = fopen(ofname, ofmode); + if (ofile == NULL) + err(1, "%s", ofname); + + rv = fwrite(fxt_magic, sizeof(fxt_magic[0]), nitems(fxt_magic), ofile); + if (rv == 0) + err(1, "%s fxt magic write", ofname); + + rv = fwrite(fxt_init, sizeof(fxt_init[0]), nitems(fxt_init), ofile); + if (rv == 0) + err(1, "%s fxt ts write", ofname); + + lltx_kobj_bsd(); + lltx_strid_process = lltx_str(str_process); + lltx_strid_sched = lltx_str(str_sched); + lltx_strid_wakeup = lltx_str(str_wakeup); + lltx_strid_woken = lltx_str(str_woken); + lltx_strid_unknown = lltx_str(str_unknown); + lltx_strid_acquire = lltx_str(str_acquire); + lltx_strid_symbol = lltx_str(str_symbol); + lltx_strid_offset = lltx_str(str_offset); + lltx_strid_count = lltx_str(str_count); + + for (i = 0; i < nitems(str_locks); i++) { + const char *str = str_locks[i]; + if (str == NULL) + continue; + lltx_strids_locks[i] = lltx_str(str); + } + + for (i = 0; i < nitems(str_lock_ops); i++) { + const char *str = str_lock_ops[i]; + if (str == NULL) + continue; + lltx_strids_lock_ops[i] = lltx_str(str); + } + +printf("[\n"); + for (;;) { + size_t nread = fread(&ring, sizeof(ring), 1, ifile); + if (nread == 0) { + if (ferror(ifile)) + errx(1, "error reading %s", ifname); + if (feof(ifile)) + break; + } + + lltextract(block++, &ring); + } + + { + struct llt_tid *p; + + RBT_FOREACH(p, llt_tid_tree, &lltx_tids) { + printf("### pid %u tid %u -> %llu %llu\n", + p->p_p->ps_pid, p->p_tid, + p->p_p->ps_fxtid, p->p_fxtid); + } + } + + { + struct lltx_fxt_record *r; + + while ((r = fxt_extract()) != NULL) { + uint64_t *atoms = (uint64_t *)(r + 1); + fwrite(atoms, sizeof(*atoms), r->n, ofile); + free(r); + } + } + + return (0); +} + + +static int +printable(int ch) +{ + if (ch == '\0') + return ('_'); + if (!isprint(ch)) + return ('~'); + + return (ch); +} + +static void +dump_slot(size_t slot, uint64_t v) +{ + uint8_t buf[sizeof(v)]; + size_t i; + + printf("## slot %4zu = 0x%016llx |", slot, v); + + memcpy(buf, &v, sizeof(buf)); + for (i = 0; i < sizeof(buf); i++) + putchar(printable(buf[i])); + + printf("|\n"); +} + +static void +dump_slots(const struct ring *ring, size_t slot, size_t n) +{ + n += slot; + while (slot < n) { + dump_slot(slot, ring->slots[slot]); + slot++; + } +} + +static void +cytime_init(struct cytime *ct, + uint64_t start_cy, uint64_t start_ns, uint64_t stop_cy, uint64_t stop_ns) +{ + uint64_t diff_cy = stop_cy - start_cy; + uint64_t diff_ns = stop_ns - start_ns; + + ct->base_cy = start_cy; + ct->base_ns = start_ns; + + ct->slope = (double)diff_ns / (double)diff_cy; + + if (verbose >= 1) { + printf("SetParams maps %18llucy ==> %18lluns\n", + start_cy, start_ns); + printf("SetParams maps %18llucy ==> %18lluns\n", + stop_cy, stop_ns); + printf(" diff %18llucy ==> %18lluns\n", + diff_cy, diff_ns); + printf("SetParams slope %f ns/cy\n", ct->slope); + } +} + +struct lltstate { + struct cytime ct; + + uint32_t cy32; + int64_t cy; + unsigned int cpu; + unsigned int idletid; + + uint64_t ns; + struct llt_tid *p; + + unsigned int idle; +}; + +#define TS32_SHIFT (32 - (LLTRACE_TS_WIDTH + LLTRACE_TS_SHIFT)) + +struct llevent { + size_t block; + size_t slot; + int64_t cy; + uint32_t cy32; +}; + +#if 0 +static void lltextract_mark(struct lltstate *, struct llevent *, + unsigned int, uint64_t); +static void lltextract_irq(struct lltstate *, struct llevent *, + unsigned int, uint64_t); +static void lltextract_syscall(struct lltstate *, struct llevent *, + unsigned int, uint64_t); +static void lltextract_sysret(struct lltstate *, struct llevent *, + unsigned int, uint64_t); +#endif + +static void lltx_id(struct lltstate *, struct llevent *, uint64_t, + const uint64_t *, unsigned int); +static void lltx_event(struct lltstate *, struct llevent *, uint64_t, + const uint64_t *, unsigned int); +static void lltx_locking(struct lltstate *, struct llevent *, uint64_t, + const uint64_t *, unsigned int); +static void lltx_idle(struct lltstate *, struct llevent *, unsigned int); + +static struct llt_tid * +lltx_tid(unsigned int tid) +{ + struct llt_tid *p; + struct llt_tid key = { .p_tid = tid }; + + p = RBT_FIND(llt_tid_tree, &lltx_tids, &key); + if (p != NULL) + return (p); + + p = malloc(sizeof(*p)); + if (p == NULL) + err(1, "llt tid alloc"); + + p->p_tid = tid; + + p->p_p = NULL; + p->p_strid = 0; + //p->p_thrid = 0; + p->p_fxtid = p->p_tid + THREAD_PID_OFFSET; + + if (RBT_INSERT(llt_tid_tree, &lltx_tids, p) != NULL) + errx(1, "llt tid %d insert failed", tid); + + return (p); +} + +static struct llt_tid * +lltx_tid_pid(unsigned int tid, unsigned int pid, unsigned int sys) +{ + struct llt_tid *p; + struct llt_pid *ps; + + p = lltx_tid(tid); + ps = p->p_p; + if (ps == NULL) { + struct llt_pid key = { .ps_pid = pid }; + + ps = RBT_FIND(llt_pid_tree, &lltx_pids, &key); + if (ps == NULL) { + ps = malloc(sizeof(*ps)); + if (ps == NULL) + err(1, "llt pid alloc"); + + ps->ps_pid = pid; + ps->ps_system = sys; + + ps->ps_strid = 0; + ps->ps_ts = 0; + + /* lie about kernel threads */ + ps->ps_fxtid = ps->ps_system ? 0 : ps->ps_pid; + + if (RBT_INSERT(llt_pid_tree, &lltx_pids, ps) != NULL) + errx(1, "llt pid %u insert failed", pid); + } + + p->p_p = ps; + p->p_fxtid = ps->ps_system ? ps->ps_pid : + (p->p_tid + THREAD_PID_OFFSET); + + if (!ps->ps_system) { + uint64_t atoms[4]; + + atoms[0] = FXT_T_KOBJ; + atoms[0] |= nitems(atoms) << FXT_H_SIZE_SHIFT; + atoms[0] |= 2ULL << 16; /* ZX_OBJ_TYPE_THREAD */ + atoms[0] |= 1ULL << 40; /* number of args */ + atoms[1] = p->p_fxtid; + atoms[2] = 8 | (2 << 4); /* koid */ + atoms[2] |= (uint64_t)lltx_strid_process << 16; + atoms[3] = ps->ps_fxtid; + + fwrite(atoms, sizeof(atoms[0]), nitems(atoms), ofile); + } + } else { + if (ps->ps_pid != pid) + errx(1, "tid %u has a new pid %u", tid, pid); + } + + return (p); +} + +static void +lltextract(size_t block, const struct ring *ring) +{ + const struct lltrace_header *llh = (struct lltrace_header *)ring; + struct lltstate state = { + .cpu = llh->h_cpu, + .idletid = llh->h_idletid, + .cy = 0, + .idle = LLTRACE_EVENT_PHASE_END, + }; + struct llevent lle; + unsigned int pid, sys; + + size_t slot, nslot; + uint32_t cy32; + int32_t cydiff; + + if (verbose >= 2) + dump_slots(ring, 0, 8); + + cytime_init(&state.ct, ring->slots[2], ring->slots[3], + ring->slots[4], ring->slots[5]); + + printf("{"); + printf("\"name\":\"cpu%u\",", state.cpu); + printf("\"cat\":\"lltrace\","); + printf("\"ph\":\"b\","); + printf("\"pid\":0,"); + printf("\"tid\":%u,", state.cpu); + printf("\"ts\":%lf,", (double)ring->slots[3] / 1000.0); + printf("\"id\":%zu", block); + printf("},\n"); + + printf("{"); + printf("\"name\":\"cpu%u\",", state.cpu); + printf("\"cat\":\"lltrace\","); + printf("\"ph\":\"e\","); + printf("\"pid\":0,"); + printf("\"tid\":%u,", state.cpu); + printf("\"ts\":%lf,", (double)ring->slots[5] / 1000.0); + printf("\"id\":%zu", block); + printf("},\n"); + + state.cy32 = ring->slots[2] << TS32_SHIFT; + state.ns = state.ct.base_ns; + + sys = llh->h_pid & (1U << 31); + pid = llh->h_pid & ~(1U << 31); + + state.p = lltx_tid_pid(llh->h_tid, pid, sys); + + for (slot = 8; slot < nitems(ring->slots); slot++) { + const uint64_t *slots = ring->slots + slot; + uint64_t record = slots[0]; + unsigned int type, len; + + if (verbose >= 2) + dump_slot(slot, record); + + if (record == 0) + return; + + type = (record >> LLTRACE_TYPE_SHIFT) & LLTRACE_TYPE_MASK; + len = (record >> LLTRACE_LEN_SHIFT) & LLTRACE_LEN_MASK; + + nslot = slot + len; + if (nslot >= nitems(ring->slots)) + errx(1, "slot %zu has %u extra", slot, len); + + if (verbose >= 2) { + dump_slots(ring, slot + 1, len); + printf("slot %4zu+%u type 0x%x\n", slot, len, type); + } + + if (ISSET(LLTRACE_TS_TYPES, 1U << type)) { + cy32 = record & (LLTRACE_TS_MASK << LLTRACE_TS_SHIFT); + cy32 <<= TS32_SHIFT; + cydiff = (cy32 - state.cy32); + cydiff >>= TS32_SHIFT; + + int64_t cy = state.cy + cydiff; + if (cydiff > 0) { + state.cy32 = cy32; + state.cy += cydiff; + } + //lle.cy = state.cy; + state.ns = state.ct.base_ns + (cy * state.ct.slope); + //state.ns = state.ct.base_cy + cy; + + if (verbose >= 2) { + printf("state.cy %llu state.cy32 %u diff %d (%.1f)\n", + state.cy, state.cy32, cydiff, cydiff * state.ct.slope); + } + + if (state.idle == LLTRACE_EVENT_PHASE_START) { + lltx_idle(&state, &lle, + LLTRACE_EVENT_PHASE_END); + } + } + + lle.block = block; + lle.slot = slot; + + switch (type) { + case LLTRACE_TYPE_ID: + lltx_id(&state, &lle, record, slots + 1, len); + break; + case LLTRACE_TYPE_EVENT: + lltx_event(&state, &lle, record, slots + 1, len); + break; + case LLTRACE_TYPE_LOCKING: + lltx_locking(&state, &lle, record, slots + 1, len); + break; + default: + warnx("slot %4zu+%u unknown type 0x%x ", + slot, len, type); + break; + } + + slot = nslot; + } +} + +static size_t +strtoatoms(uint64_t *atoms, size_t n, const char *str, size_t len) +{ + size_t natoms = (len + (sizeof(*atoms) - 1)) / sizeof(*atoms); + size_t nn = n + natoms; + size_t i; + + if (nn >= FXT_MAX_WORDS) + errx(1, "too far"); + + for (i = n; i < nn; i++) + atoms[i] = 0; + + memcpy(atoms + n, str, len); + + return (nn); +} + +static int +str64eq(const uint64_t *a, const uint64_t *b, size_t n) +{ + size_t i; + + for (i = 0; i < n; i++) { + if (a[i] != b[i]) + return (0); + } + + return (1); +} + +uint64_t fxt_atoms[128]; + +static void +lltx_id_tid(struct lltstate *state, struct llevent *lle, uint64_t record, + const uint64_t *extra, unsigned int extralen) +{ + unsigned int tid, pid, sys; + struct llt_tid *p; + struct llt_pid *ps; + unsigned int i; + size_t n; + + tid = (record >> LLTRACE_ID_TID_SHIFT) & LLTRACE_ID_TID_MASK; + pid = (record >> LLTRACE_ID_TID_PID_SHIFT) & LLTRACE_ID_TID_PID_MASK; + sys = !!ISSET(record, LLTRACE_ID_TID_SYSTEM); + + printf("#pn %zu[%zu] cpu %u %s pid %u tid %u", + lle->block, lle->slot, state->cpu, + sys ? "kernel" : "user", pid, tid); + if (extralen > 0) { + printf(" %.*s", + (int)(extralen * sizeof(*extra)), (const char *)extra); + } + printf("\n"); + + p = lltx_tid_pid(tid, pid, sys); + ps = p->p_p; + +// state->tid = tid; +// state->p = p; + + if (ps->ps_ts > state->ns) { + /* a later version of the info has already been reported */ + return; + } + ps->ps_ts = state->ns; + + if (extralen > nitems(ps->ps_comm64)) + errx(1, "pid %d name is too long", ps->ps_pid); + + if (ps->ps_comm_n == extralen && + str64eq(ps->ps_comm64, extra, extralen)) + return; + + for (i = 0; i < extralen; i++) + ps->ps_comm64[i] = extra[i]; + while (i < nitems(ps->ps_comm64)) + ps->ps_comm64[i++] = 0; + ps->ps_comm_n = extralen; + + fxt_atoms[0] = FXT_T_KOBJ; + + n = 1; + if (ps->ps_system) { + fxt_atoms[0] |= 2 << 16; /* ZX_OBJ_TYPE_THREAD */ + fxt_atoms[n++] = p->p_fxtid; + } else { + fxt_atoms[0] |= 1 << 16; /* ZX_OBJ_TYPE_PROCESS */ + fxt_atoms[n++] = ps->ps_fxtid; + } + for (i = 0; i < extralen; i++) + fxt_atoms[n++] = extra[i]; + fxt_atoms[0] |= n << 4; + fxt_atoms[0] |= ((1 << 15) | + strnlen(ps->ps_comm, ps->ps_comm_n * 8)) << 24; + + fwrite(fxt_atoms, sizeof(fxt_atoms[0]), n, ofile); +} + +static void +lltx_kobj_bsd(void) +{ + static const char name[] = "/bsd"; + size_t namelen = sizeof(name) - 1; /* - nul */ + size_t n; + + n = 1; + fxt_atoms[n++] = 0; /* pid 0 is the kernel */ + n = strtoatoms(fxt_atoms, n, name, namelen); + + fxt_atoms[0] = FXT_T_KOBJ; + fxt_atoms[0] |= 1 << 16; /* ZX_OBJ_TYPE_PROCESS */ + fxt_atoms[0] |= n << 4; + fxt_atoms[0] |= ((1 << 15) | namelen) << 24; + + fwrite(fxt_atoms, sizeof(fxt_atoms[0]), n, ofile); +} + +static unsigned int +lltx_str(const char *str) +{ + size_t len = strlen(str); + uint64_t strid = ++lltx_strids; + size_t n; + + n = strtoatoms(fxt_atoms, 1, str, len); + + fxt_atoms[0] = FXT_T_STRING | (n << 4); + fxt_atoms[0] |= strid << 16; + fxt_atoms[0] |= (uint64_t)len << 32; + + fwrite(fxt_atoms, sizeof(fxt_atoms[0]), n, ofile); + + return (strid); +} + +static void +lltx_id(struct lltstate *state, struct llevent *lle, uint64_t record, + const uint64_t *extra, unsigned int n) +{ + unsigned int type; + + type = (record >> LLTRACE_ID_TYPE_SHIFT) & LLTRACE_ID_TYPE_MASK; + + switch (type) { + case LLTRACE_ID_TYPE_TID: + lltx_id_tid(state, lle, record, extra, n); + break; + default: + warnx("slot %4zu+%u unknown id type 0x%x ", lle->slot, n, + type); + break; + } +} + +static const char *lltrace_event_class_names[] = { + [LLTRACE_EVENT_CLASS_SYSCALL] = "syscall", + [LLTRACE_EVENT_CLASS_IDLE] = "idle", + [LLTRACE_EVENT_CLASS_INTR] = "intr", + [LLTRACE_EVENT_CLASS_SCHED] = "sched", + [LLTRACE_EVENT_CLASS_FUNC] = "function", + [LLTRACE_EVENT_CLASS_PAGEFAULT] = "pagefault", + [LLTRACE_EVENT_CLASS_WAKE] = "wake", + [LLTRACE_EVENT_CLASS_COUNT] = "count", +}; + +static const char *lltrace_event_phase_names[] = { + [LLTRACE_EVENT_PHASE_INSTANT] = "instant", + [LLTRACE_EVENT_PHASE_START] = "start", + [LLTRACE_EVENT_PHASE_STEP] = "step", + [LLTRACE_EVENT_PHASE_END] = "end", +}; + +static const unsigned int lltrace_event_phase_map[] = { + [LLTRACE_EVENT_PHASE_INSTANT] = 0, + [LLTRACE_EVENT_PHASE_START] = 2, + [LLTRACE_EVENT_PHASE_END] = 3, +}; + +static const char *lltrace_intr_type_names[1 << LLTRACE_INTR_T_WIDTH] = { + [LLTRACE_INTR_T_HW] = "hardintr", + [LLTRACE_INTR_T_SW] = "softintr", + [LLTRACE_INTR_T_IPI] = "ipi", + [LLTRACE_INTR_T_CLOCK] = "clockintr", +}; + +static const char *lltrace_count_type_names[] = { + [LLTRACE_COUNT_T_PKTS_IFIQ] = "pkts:ifiq", + [LLTRACE_COUNT_T_PKTS_NETTQ] = "pkts:nettq", + [LLTRACE_COUNT_T_PKTS_IFQ] = "pkts:ifq", + [LLTRACE_COUNT_T_PKTS_QDROP] = "pkts:qdrop", + [LLTRACE_COUNT_T_PKTS_HDROP] = "pkts:hdrop", +}; + +static const char * +syscall_name(unsigned int sc) +{ + extern const char *const syscallnames[]; + + if (sc < SYS_MAXSYSCALL) + return (syscallnames[sc]); + + return (NULL); +} + +#if 0 +static uint64_t +lltx_thrid(struct llt_tid *p) +{ + static unsigned int thrids; + unsigned int thrid = p->p_thrid; + uint64_t atoms[3]; + + if (thrid != 0) + return thrid; + + thrid = ++thrids; + p->p_thrid = thrid; + + /* XXX not the nicest place to do this */ + atoms[0] = FXT_T_THREAD | (nitems(atoms) << FXT_H_SIZE_SHIFT); + atoms[0] |= thrid << 16; + atoms[1] = p->p_p->ps_fxtid; + atoms[2] = p->p_fxtid; + + printf("#th 0x%016llx %llu %llu\n", atoms[0], atoms[1], atoms[2]); + + fwrite(atoms, sizeof(atoms[0]), nitems(atoms), ofile); + + return (thrid); +} +#endif + +static void +lltx_sched(struct lltstate *state, struct llevent *lle, uint64_t record, + const uint64_t *extra, unsigned int extralen) +{ + unsigned int ntid, ostate; + struct llt_tid *op = state->p; + struct llt_tid *np; +// uint64_t oid, nid; + size_t n; + + ntid = (record >> LLTRACE_SCHED_TID_SHIFT) & + LLTRACE_SCHED_TID_MASK; + ostate = (record >> LLTRACE_SCHED_STATE_SHIFT) & + LLTRACE_SCHED_STATE_MASK; + + np = lltx_tid(ntid); + if (np->p_p == NULL) + errx(1, "new thread %u is unknown", ntid); + + if (verbose >= 2) { + printf("#ev %zu[%zu] %llu cpu %u pid %llu tid %llu " + "switch to pid %llu tid %llu\n", + lle->block, lle->slot, state->ns, state->cpu, + op->p_p->ps_fxtid, op->p_fxtid, + np->p_p->ps_fxtid, np->p_fxtid); + } + + if (extralen > 0) { + n = 1; + fxt_atoms[n++] = state->ns; + fxt_atoms[n++] = np->p_p->ps_fxtid; + fxt_atoms[n++] = np->p_fxtid; + + fxt_atoms[0] = FXT_T_EVENT | (n << FXT_H_SIZE_SHIFT); + fxt_atoms[0] |= (uint64_t)0 << 16; /* instant event */ + fxt_atoms[0] |= 0ULL << 20; /* number of args */ + //fxt_atoms[0] |= nid << 24; + fxt_atoms[0] |= (uint64_t)lltx_strid_sched << 32; + fxt_atoms[0] |= (uint64_t)lltx_strid_woken << 48; + + //fwrite(fxt_atoms, sizeof(fxt_atoms[0]), n, ofile); + fxt_insert(state->ns, fxt_atoms, n); + + n = 1; + fxt_atoms[n++] = state->ns; + fxt_atoms[n++] = np->p_p->ps_fxtid; + fxt_atoms[n++] = np->p_fxtid; + fxt_atoms[n++] = extra[0]; + + fxt_atoms[0] = FXT_T_EVENT | (n << FXT_H_SIZE_SHIFT); + fxt_atoms[0] |= (uint64_t)10 << 16; + fxt_atoms[0] |= 0ULL << 20; /* number of args */ + //fxt_atoms[0] |= nid << 24; + fxt_atoms[0] |= (uint64_t)lltx_strid_sched << 32; + fxt_atoms[0] |= (uint64_t)lltx_strid_wakeup << 48; + + //fwrite(fxt_atoms, sizeof(fxt_atoms[0]), n, ofile); + fxt_insert(state->ns, fxt_atoms, n); + } + +// oid = lltx_thrid(op); +// nid = lltx_thrid(np); + + n = 1; + fxt_atoms[n++] = state->ns; + fxt_atoms[n++] = op->p_p->ps_fxtid; + fxt_atoms[n++] = op->p_fxtid; + fxt_atoms[n++] = np->p_p->ps_fxtid; + fxt_atoms[n++] = np->p_fxtid; + + fxt_atoms[0] = FXT_T_SCHED | (n << FXT_H_SIZE_SHIFT); + fxt_atoms[0] |= (uint64_t)state->cpu << 16; + fxt_atoms[0] |= (uint64_t)ostate << 24; +// fxt_atoms[0] |= oid << 28; +// fxt_atoms[0] |= nid << 36; + fxt_atoms[0] |= 1ULL << 44; + fxt_atoms[0] |= 1ULL << 52; + fxt_atoms[0] |= (uint64_t)0 << 60; + + //fwrite(fxt_atoms, sizeof(fxt_atoms[0]), n, ofile); + fxt_insert(state->ns, fxt_atoms, n); + + state->p = np; +} + +static void +lltx_sched_wake(struct lltstate *state, struct llevent *lle, uint64_t record, + const uint64_t *extra, unsigned int extralen) +{ + unsigned int tid; + struct llt_tid *p; + size_t n; + + if (extralen > 0) { + p = state->p; + + n = 1; + fxt_atoms[n++] = state->ns; + fxt_atoms[n++] = p->p_p->ps_fxtid; + fxt_atoms[n++] = p->p_fxtid; + + fxt_atoms[0] = FXT_T_EVENT | (n << FXT_H_SIZE_SHIFT); + fxt_atoms[0] |= (uint64_t)0 << 16; /* instant event */ + fxt_atoms[0] |= 0ULL << 20; /* number of args */ + fxt_atoms[0] |= (uint64_t)lltx_strid_sched << 32; + fxt_atoms[0] |= (uint64_t)lltx_strid_wakeup << 48; + + //fwrite(fxt_atoms, sizeof(fxt_atoms[0]), n, ofile); + fxt_insert(state->ns, fxt_atoms, n); + + n = 1; + fxt_atoms[n++] = state->ns; + fxt_atoms[n++] = p->p_p->ps_fxtid; + fxt_atoms[n++] = p->p_fxtid; + fxt_atoms[n++] = extra[0]; + + fxt_atoms[0] = FXT_T_EVENT | (n << FXT_H_SIZE_SHIFT); + fxt_atoms[0] |= (uint64_t)8 << 16; + fxt_atoms[0] |= 0ULL << 20; /* number of args */ + fxt_atoms[0] |= (uint64_t)lltx_strid_sched << 32; + fxt_atoms[0] |= (uint64_t)lltx_strid_wakeup << 48; + + //fwrite(fxt_atoms, sizeof(fxt_atoms[0]), n, ofile); + fxt_insert(state->ns, fxt_atoms, n); + } + + tid = (record >> LLTRACE_SCHED_TID_SHIFT) & + LLTRACE_SCHED_TID_MASK; + + p = lltx_tid(tid); + if (p->p_p == NULL) + errx(1, "wakeup thread %u is unknown", tid); + + if (verbose >= 2) { + printf("#ev %zu[%zu] %llu cpu %u pid %llu tid %llu " + "wakeup pid %llu tid %llu\n", + lle->block, lle->slot, state->ns, state->cpu, + state->p->p_p->ps_fxtid, state->p->p_fxtid, + p->p_p->ps_fxtid, p->p_fxtid); + } + + n = 1; + fxt_atoms[n++] = state->ns; + fxt_atoms[n++] = p->p_fxtid; + + fxt_atoms[0] = FXT_T_SCHED | (n << FXT_H_SIZE_SHIFT); + fxt_atoms[0] |= (uint64_t)state->cpu << 20; + fxt_atoms[0] |= (uint64_t)2 << 60; + + //fwrite(fxt_atoms, sizeof(fxt_atoms[0]), n, ofile); + //fxt_insert(state->ns, fxt_atoms, n); +} + +static void +lltx_idle(struct lltstate *state, struct llevent *lle, unsigned int phase) +{ + struct llt_tid *p = state->p; + uint64_t iprio, oprio; +// uint64_t oid, iid; + size_t n; + + if (state->idle == phase) + return; + + if (state->idletid != p->p_tid) { + errx(1, "idle outside the idle thread %u, in %u", + state->idletid, p->p_tid); + } + if (p->p_p == NULL) + errx(1, "idle thread %u is unknown", state->idletid); + + if (verbose >= 2) { + printf("#ev %zu[%zu] %llu cpu %u pid %llu tid %llu idle %s\n", + lle->block, lle->slot, state->ns, state->cpu, + p->p_p->ps_fxtid, p->p_fxtid, + lltrace_event_phase_names[phase]); + } + + n = 1; + fxt_atoms[n++] = state->ns; + + switch (phase) { + case LLTRACE_EVENT_PHASE_START: + oprio = 1; + fxt_atoms[n++] = p->p_p->ps_fxtid; + fxt_atoms[n++] = p->p_fxtid; + iprio = 0; + fxt_atoms[n++] = 0; + fxt_atoms[n++] = 0; + break; + case LLTRACE_EVENT_PHASE_END: + oprio = 0; + fxt_atoms[n++] = 0; + fxt_atoms[n++] = 0; + iprio = 1; + fxt_atoms[n++] = p->p_p->ps_fxtid; + fxt_atoms[n++] = p->p_fxtid; + break; + default: + return; + } + + fxt_atoms[0] = FXT_T_SCHED | (n << FXT_H_SIZE_SHIFT); + fxt_atoms[0] |= (uint64_t)state->cpu << 16; + fxt_atoms[0] |= (uint64_t)3 << 24; + fxt_atoms[0] |= oprio << 44; + fxt_atoms[0] |= iprio << 52; + fxt_atoms[0] |= (uint64_t)0 << 60; + + //fwrite(fxt_atoms, sizeof(fxt_atoms[0]), n, ofile); + fxt_insert(state->ns, fxt_atoms, n); + + state->idle = phase; +} + +static void +lltx_event_count(struct lltstate *state, struct llevent *lle, + unsigned int phase, const char *classnm, size_t classnmlen, + uint64_t record) +{ + char tname[128]; + uint32_t t, v; + const char *eventnm; + size_t eventnmlen; + size_t n, an; + + t = (record >> LLTRACE_COUNT_T_SHIFT) & LLTRACE_COUNT_T_MASK; + if (t >= nitems(lltrace_count_type_names) || + (eventnm = lltrace_count_type_names[t]) == NULL) { + int rv; + + warnx("unknown count type class %u", t); + + rv = snprintf(tname, sizeof(tname), "count-type-%u", t); + if (rv == -1) + errx(1, "count event type name snprintf"); + eventnm = tname; + eventnmlen = rv; + if (classnmlen >= sizeof(tname)) + errx(1, "event class name too long"); + } else + eventnmlen = strlen(eventnm); + + v = (record >> LLTRACE_COUNT_V_SHIFT); + + n = 1; + fxt_atoms[n++] = state->ns; + fxt_atoms[n++] = state->p->p_p->ps_fxtid; + fxt_atoms[n++] = state->p->p_fxtid; + n = strtoatoms(fxt_atoms, n, classnm, classnmlen); + n = strtoatoms(fxt_atoms, n, eventnm, eventnmlen); + + an = n++; + fxt_atoms[an] = 2 | (1 << 4); + fxt_atoms[an] |= lltx_strid_count << 16; + fxt_atoms[an] |= (uint64_t)v << 32; + + fxt_atoms[0] = FXT_T_EVENT | (n << FXT_H_SIZE_SHIFT); + fxt_atoms[0] |= lltrace_event_phase_map[phase] << 16; + fxt_atoms[0] |= 1 << 20; /* 1 argument */ + fxt_atoms[0] |= ((1<<15) | classnmlen) << 32; + fxt_atoms[0] |= ((1<<15) | eventnmlen) << 48; + + fwrite(fxt_atoms, sizeof(fxt_atoms[0]), n, ofile); +} + +static void +lltx_event(struct lltstate *state, struct llevent *lle, uint64_t record, + const uint64_t *extra, unsigned int extralen) +{ + char cname[32], ename[128]; + unsigned int phase; + unsigned int class; + const char *classnm; + size_t classnmlen; + const char *eventnm; + size_t eventnmlen; + size_t n; + + phase = (record >> LLTRACE_EVENT_PHASE_SHIFT) & + LLTRACE_EVENT_PHASE_MASK; + class = (record >> LLTRACE_EVENT_CLASS_SHIFT) & + LLTRACE_EVENT_CLASS_MASK; + + if (class >= nitems(lltrace_event_class_names) || + (classnm = lltrace_event_class_names[class]) == NULL) { + int rv; + + warnx("unknown event class %u", class); + + rv = snprintf(cname, sizeof(cname), "class-%u", class); + if (rv == -1) + errx(1, "event class name snprintf"); + classnm = cname; + classnmlen = rv; + if (classnmlen >= sizeof(cname)) + errx(1, "event class name too long"); + } else + classnmlen = strlen(classnm); + + switch (class) { + case LLTRACE_EVENT_CLASS_SCHED: + if (verbose >= 2) { + printf("#ev %zu[%zu] %llu cpu %u tid %llu sched\n", + lle->block, lle->slot, state->ns, state->cpu, + state->p->p_fxtid); + } + + if (phase == LLTRACE_EVENT_PHASE_INSTANT) + lltx_sched(state, lle, record, extra, extralen); + return; + case LLTRACE_EVENT_CLASS_WAKE: + if (verbose >= 2) { + printf("#ev %zu[%zu] %llu cpu %u tid %llu wake\n", + lle->block, lle->slot, state->ns, state->cpu, + state->p->p_fxtid); + } + lltx_sched_wake(state, lle, record, extra, extralen); + return; + case LLTRACE_EVENT_CLASS_IDLE: + if (verbose >= 2) { + printf("#ev %zu[%zu] %llu cpu %u tid %llu idle\n", + lle->block, lle->slot, state->ns, state->cpu, + state->p->p_fxtid); + } + lltx_idle(state, lle, phase); + return; + case LLTRACE_EVENT_CLASS_SYSCALL: + { + unsigned int code = (record >> LLTRACE_SYSCALL_SHIFT) & + LLTRACE_SYSCALL_MASK; + eventnm = syscall_name(code); + + switch (code) { + case SYS_exit: + case SYS___threxit: + phase = LLTRACE_EVENT_PHASE_INSTANT; + break; + } + } + eventnmlen = strlen(eventnm); + break; + case LLTRACE_EVENT_CLASS_INTR: + { + unsigned int type = (record >> LLTRACE_INTR_T_SHIFT) & + LLTRACE_INTR_T_MASK; + eventnm = lltrace_intr_type_names[type]; + } + eventnmlen = strlen(eventnm); + break; + case LLTRACE_EVENT_CLASS_FUNC: { + uint32_t addr = record >> 32; + const struct ksym *k = ksym_nfind(addr); + if (k == NULL) { + int rv = snprintf(ename, sizeof(ename), + "?+%x", addr); + if (rv == -1) + errx(1, "func name snprintf"); + eventnm = ename; + eventnmlen = rv; + } else { + uint32_t diff = addr - k->addr; + if (diff != 0) { + int rv = snprintf(ename, sizeof(ename), + "%s+%x", k->name, diff); + if (rv == -1) + errx(1, "func name snprintf"); + eventnm = ename; + eventnmlen = rv; + } else { + eventnm = k->name; + eventnmlen = strlen(eventnm); + } + } + } + break; + case LLTRACE_EVENT_CLASS_COUNT: + lltx_event_count(state, lle, phase, classnm, classnmlen, + record); + return; + + default: + eventnm = classnm; + eventnmlen = classnmlen; + break; + } + + if (verbose >= 2) { + printf("#ev %zu[%zu] %llu cpu %u tid %llu %s:%s %s\n", + lle->block, lle->slot, state->ns, state->cpu, + state->p->p_fxtid, + classnm, eventnm, lltrace_event_phase_names[phase]); + } + + n = 1; + fxt_atoms[n++] = state->ns; + fxt_atoms[n++] = state->p->p_p->ps_fxtid; + fxt_atoms[n++] = state->p->p_fxtid; + n = strtoatoms(fxt_atoms, n, classnm, classnmlen); + n = strtoatoms(fxt_atoms, n, eventnm, eventnmlen); + + fxt_atoms[0] = FXT_T_EVENT | (n << FXT_H_SIZE_SHIFT); + fxt_atoms[0] |= lltrace_event_phase_map[phase] << 16; + fxt_atoms[0] |= ((1<<15) | classnmlen) << 32; + fxt_atoms[0] |= ((1<<15) | eventnmlen) << 48; + + fwrite(fxt_atoms, sizeof(fxt_atoms[0]), n, ofile); +} + +static void +lltx_locking(struct lltstate *state, struct llevent *lle, uint64_t record, + const uint64_t *extra, unsigned int extralen) +{ + struct llt_tid *p = state->p; + unsigned int ltype; + unsigned int lop; + uint64_t cref; + uint64_t nref; +// uint64_t tref; + uint64_t addr; + size_t n; + struct ksym *k; + int durev = -1; + unsigned int nargs = 1; + + ltype = (record >> LLTRACE_LK_TYPE_SHIFT) & LLTRACE_LK_TYPE_MASK; + lop = (record >> LLTRACE_LK_PHASE_SHIFT) & LLTRACE_LK_PHASE_MASK; + addr = record >> LLTRACE_LK_ADDR_SHIFT; + + cref = lltx_strids_locks[ltype]; + if (cref == 0) { + warnx("unknown lock type %u", ltype); + return; + } + nref = lltx_strids_lock_ops[lop]; + if (cref == 0) { + warnx("unknown %s lock op %u", str_locks[ltype], lop); + return; + } + +// tref = lltx_thrid(state->p); + + switch (lop) { + case LLTRACE_LK_A_START: + durev = 2; + break; + case LLTRACE_LK_A_EXCL: + case LLTRACE_LK_A_SHARED: + case LLTRACE_LK_A_ABORT: + durev = 3; + break; + } + + if (0 && ltype == LLTRACE_LK_RW && durev != -1) { + n = 1; + fxt_atoms[n++] = state->ns; + fxt_atoms[n++] = p->p_p->ps_fxtid; + fxt_atoms[n++] = p->p_fxtid; + + fxt_atoms[0] = FXT_T_EVENT | (n << FXT_H_SIZE_SHIFT); + fxt_atoms[0] |= (uint64_t)durev << 16; /* duration begin */ + fxt_atoms[0] |= cref << 32; + fxt_atoms[0] |= (uint64_t)lltx_strid_acquire << 48; + + //fwrite(fxt_atoms, sizeof(fxt_atoms[0]), n, ofile); + fxt_insert(state->ns, fxt_atoms, n); + } + + k = ksym_nfind(addr); + if (k != NULL && k->ref == 0) { + k->ref = lltx_str(k->name); +#if 0 + + n = 1; + fxt_atoms[n++] = addr; + + fxt_atoms[0] = FXT_T_KOBJ | (n << FXT_H_SIZE_SHIFT); + fxt_atoms[0] |= 0ULL << 16; /* ZX_OBJ_TYPE_NONE */ + fxt_atoms[0] |= k->ref << 24; /* name */ + fxt_atoms[0] |= 0ULL << 40; /* number of args */ + + fwrite(fxt_atoms, sizeof(fxt_atoms[0]), n, ofile); +#endif + } + + if (verbose >= 2) { + printf("#lk %zu[%zu] %llu cpu %u pid %llu tid %llu " + "%s %s\n", + lle->block, lle->slot, state->ns, state->cpu, + state->p->p_p->ps_fxtid, state->p->p_fxtid, + str_locks[ltype], str_lock_ops[lop]); + } + + n = 1; + fxt_atoms[n++] = state->ns; + fxt_atoms[n++] = p->p_p->ps_fxtid; + fxt_atoms[n++] = p->p_fxtid; + fxt_atoms[n++] = 8 | (2 << 4) | (cref << 16); + fxt_atoms[n++] = addr; + if (k != NULL) { + size_t na = n++; + uint32_t diff; + + fxt_atoms[na] = 6 | (2 << 4); + fxt_atoms[na] |= (uint64_t)lltx_strid_symbol << 16; + fxt_atoms[na] |= (uint64_t)k->ref << 32; + + nargs++; + + diff = addr - k->addr; + if (diff > 0) { + na = n++; + + fxt_atoms[na] = 2 | (1 << 4); + fxt_atoms[na] |= (uint64_t)lltx_strid_offset << 16; + fxt_atoms[na] |= (uint64_t)diff << 32; + + nargs++; + } + } + + fxt_atoms[0] = FXT_T_EVENT | (n << FXT_H_SIZE_SHIFT); + fxt_atoms[0] |= 0 << 16; /* instant event */ + fxt_atoms[0] |= nargs << 20; +// fxt_atoms[0] |= tref << 24; + fxt_atoms[0] |= cref << 32; + fxt_atoms[0] |= nref << 48; + + fwrite(fxt_atoms, sizeof(fxt_atoms[0]), n, ofile); +} + +#if 0 +static void +lltextract_pc(struct llevent *lle, int event, uint64_t pc) +{ + lle->event = event; + + /* + * XXX The PC sample is generated after the local_timer + * interrupt, but we really want its sample time to be just + * before that interrupt. + */ + + /* + * Put a hash of the PC name into arg, so HTML display can + * choose colors quickly. + */ + lle->arg0 = (pc >> 6) & 0xffff; + + if (event == KUTRACE_PC_K) { + const struct ksym *k; + + k = ksym_nfind(pc); + if (k != NULL) { + if (asprintf(&lle->name, "PC=%s", k->name) == -1) + errx(1, "PC_K name asprintf"); + return; + } + } + + if (asprintf(&lle->name, "PC=%016llx", pc) == -1) + errx(1, "PC asprintf"); +} + +static char * +xstrdup(const char *src) +{ + char *dst; + + dst = strdup(src); + if (dst == NULL) + err(1, "strdup %s", src); + + return (dst); +} + +static void +lltx_event(const char *name, const char *cat, const char *ph, + uint64_t ts, pid_t pid, pid_t tid) +{ + fprintf(ofile, "{"); + fprintf(ofile, "\"name\":\"%s\",\"cat\":\"%s\",\"ph\":\"%s\",", + name, cat, ph); + fprintf(ofile, "\"ts\":%llu.%03llu,\"pid\":%d,\"tid\":%d", + ts / 1000, ts % 1000, pid, tid); + fprintf(ofile, "},\n"); +} + +static char * +trap_name(unsigned int trap) +{ + const char *source; + char *name; + + switch (trap) { + case LLTRACE_TRAP_PAGEFAULT: + source = "page_fault"; + break; + default: + if (asprintf(&name, "trap-%u", trap) == -1) + errx(1, "trap asprintf"); + return (name); + } + + name = xstrdup(source); + + return (name); +} + +static void +lltextract_trap(struct lltstate *state, struct llevent *lle, + unsigned int event, uint64_t v) +{ + unsigned int trap; + + trap = (v >> LLTRACE_ARG32_SHIFT) & LLTRACE_ARG32_MASK; + + lle->pid = state->pid; + lle->tid = state->tid; + lle->event = event + trap; + lle->name = trap_name(trap); + + lltx_event(trap_name(trap), "trap", event == KUTRACE_TRAP ? "B" : "E", + state->ns, lle->pid, lle->tid); +} + +static void +lltextract_sched(struct lltstate *state, struct llevent *lle, + unsigned int event) +{ + lle->pid = state->pid; + lle->tid = state->tid; + lle->event = event; + lle->arg0 = 0; + lle->name = xstrdup("-sched-"); + + lltx_event("sched", "sched", event == 0x9ff ? "B" : "E", + state->ns, lle->pid, lle->tid); +} + +static void +lltextract_lock(struct lltstate *state, struct llevent *lle, + unsigned int event, uint64_t v) +{ + unsigned int lock; + + lock = (v >> LLTRACE_ARG32_SHIFT) & LLTRACE_ARG32_MASK; + lock &= 0xffff; + + lle->pid = state->pid; + lle->tid = state->tid; + lle->event = event; + lle->arg0 = lock; + + if (asprintf(&lle->name, "lock.%x", lock) == -1) + errx(1, "lock asprintf"); +} + +static void +lltextract_pkts(struct lltstate *state, struct llevent *lle, uint64_t v) +{ + unsigned int type = v & LLTRACE_PKTS_T_MASK; + const char *name; + + switch (type) { + case LLTRACE_PKTS_T_IFQ: + name = "ifq"; + break; + case LLTRACE_PKTS_T_NETTQ: + name = "process"; + break; + case LLTRACE_PKTS_T_IFIQ: + name = "ifiq"; + break; +#ifdef LLTRACE_PKTS_T_DROP + case LLTRACE_PKTS_T_DROP: + name = "drop"; + break; +#endif + default: + errx(1, "unexpected pkts type %x", + type >> LLTRACE_PKTS_T_SHIFT); + /* NOTREACHED */ + } + + lle->tid = state->tid; + lle->event = KUTRACE_MARKA; /* sure */ + lle->arg0 = v; + + if (asprintf(&lle->name, "%s=%llu", name, + v & LLTRACE_PKTS_V_MASK) == -1) + errx(1, "pkts asprintf"); +} + +static void +lltextract_func(struct lltstate *state, struct llevent *lle, + unsigned int event, const char *evname, uint64_t v) +{ + const struct ksym *k; + + lle->arg0 = (v >> LLTRACE_ARG32_SHIFT) & LLTRACE_ARG32_MASK; + + lle->tid = state->tid; + lle->event = event; + + k = ksym_nfind(lle->arg0); + if (k != NULL) { + uint32_t diff = lle->arg0 - k->addr; + if (diff == 0) { + if (asprintf(&lle->name, "%s=%s", evname, + k->name) == -1) + err(1, "kfunc %s asprintf", evname); + } else { + if (asprintf(&lle->name, "%s=%s+%u", evname, + k->name, diff) == -1) + err(1, "kfunc %s asprintf", evname); + } + } else { + if (asprintf(&lle->name, "%s=0x%x", evname, lle->arg0) == -1) + err(1, "kfunc %s asprintf", evname); + } +} + +static void +lltextract_mark(struct lltstate *state, struct llevent *lle, + unsigned int ev, uint64_t v) +{ + + switch (ev) { + case LLTRACE_EVENT_IDLE: + lle->event = KUTRACE_MWAIT; + lle->arg0 = 255; + + lle->name = xstrdup("mwait"); + break; + + case LLTRACE_EVENT_RUNNABLE: + lle->tid = state->tid; + lle->event = KUTRACE_RUNNABLE; + lle->arg0 = (v >> LLTRACE_ARG32_SHIFT) & LLTRACE_ARG32_MASK; + lle->arg0 &= 0xffff; + + if (asprintf(&lle->name, "runnable.%u", lle->arg0) == -1) + err(1, "runnable asprintf"); + break; + + case LLTRACE_EVENT_IPI: + lle->tid = state->tid; + lle->event = KUTRACE_IPI; + lle->arg0 = (v >> LLTRACE_ARG32_SHIFT) & LLTRACE_ARG32_MASK; + + lle->name = xstrdup("sendipi"); + break; + + case LLTRACE_EVENT_SCHED: + lltextract_sched(state, lle, + KUTRACE_SYSCALL(KUTRACE_SYSCALL_SCHED)); + break; + case LLTRACE_EVENT_SCHEDRET: + lltextract_sched(state, lle, + KUTRACE_SYSRET(KUTRACE_SYSCALL_SCHED)); + break; + + case LLTRACE_EVENT_TRAP: + lltextract_trap(state, lle, KUTRACE_TRAP, v); + break; + case LLTRACE_EVENT_TRAPRET: + lltextract_trap(state, lle, KUTRACE_TRAPRET, v); + break; + + case LLTRACE_EVENT_LOCK(LLTRACE_LOCK_NOACQUIRE): + lltextract_lock(state, lle, KUTRACE_LOCKNOACQUIRE, v); + break; + case LLTRACE_EVENT_LOCK(LLTRACE_LOCK_ACQUIRE): + lltextract_lock(state, lle, KUTRACE_LOCKACQUIRE, v); + break; + case LLTRACE_EVENT_LOCK(LLTRACE_LOCK_WAKEUP): + lltextract_lock(state, lle, KUTRACE_LOCKWAKEUP, v); + break; + + case LLTRACE_EVENT_PKTS: + lltextract_pkts(state, lle, v); + break; + + case LLTRACE_EVENT_MARK: + lle->tid = state->tid; + lle->event = KUTRACE_MARKB; + lle->arg0 = 0; + + lle->name = xstrdup("markd=yep"); + break; + + case LLTRACE_EVENT_KFUNC_ENTER: + lltextract_func(state, lle, KUTRACE_MARKD, "enter", v); + break; + + case LLTRACE_EVENT_KFUNC_LEAVE: + lltextract_func(state, lle, KUTRACE_MARKD, "leave", v); + break; + + default: + errx(1, "unexpected mark event 0x%03x", ev); + /* NOTREACHED */ + } +} + +static char * +irq_name(unsigned int type, unsigned int vec) +{ + const char *source; + char *name; + + switch (type) { + case LLTRACE_IRQ_IPI: + source = "ipi"; + break; + case LLTRACE_IRQ_BOTTOM_HALF: + if (vec == 0) + return xstrdup("BH:timer"); + + source = "BH"; + break; + case LLTRACE_IRQ_LOCAL_TIMER: + return xstrdup("local_timer_vector"); + default: + if (asprintf(&name, "irq%u:%u", type, vec) == -1) + errx(1, "irq asprintf"); + return (name); + } + + if (asprintf(&name, "%s:%u", source, vec) == -1) + errx(1, "irq %s asprintf", source); + + return (name); +} + +static void +lltextract_irq(struct lltstate *state, struct llevent *lle, + unsigned int ev, uint64_t v) +{ + unsigned int ret = ev & 0x100; + unsigned int type = ev & 0xff; + unsigned int vec = (v >> LLTRACE_ARG32_SHIFT) & LLTRACE_ARG32_MASK; + + lle->event = (ret ? KUTRACE_IRQRET : KUTRACE_IRQ) | type; + lle->arg0 = vec; + + lle->name = irq_name(type, vec); +} + +static void +lltextract_syscall(struct lltstate *state, struct llevent *lle, + unsigned int ev, uint64_t v) +{ + unsigned int sc = LLTRACE_SYSCALL_MASK(ev); + + lle->pid = state->pid; + lle->tid = state->tid; + lle->event = KUTRACE_SYSCALL(sc); + lle->arg0 = (v >> LLTRACE_ARG0_SHIFT) & LLTRACE_ARG0_MASK; + lle->name = syscall_name(sc); + + lltx_event(syscall_name(sc), "syscall", "B", + state->ns, lle->pid, lle->tid); +} + +static void +lltextract_sysret(struct lltstate *state, struct llevent *lle, + unsigned int ev, uint64_t v) +{ + unsigned int sc = LLTRACE_SYSCALL_MASK(ev); + + lle->pid = state->pid; + lle->tid = state->tid; + lle->event = KUTRACE_SYSRET(sc); + lle->arg0 = (v >> LLTRACE_ARG0_SHIFT) & LLTRACE_ARG0_MASK; + lle->name = syscall_name(sc); + + lltx_event(syscall_name(sc), "syscall", "E", + state->ns, lle->pid, lle->tid); +} +#endif + +RBT_GENERATE(llt_pid_tree, llt_pid, ps_entry, llt_pid_cmp); +RBT_GENERATE(llt_tid_tree, llt_tid, p_entry, llt_tid_cmp); + +static inline int +lltx_fxt_record_cmp(const struct lltx_fxt_record *a, + const struct lltx_fxt_record *b) +{ + if (a->ts > b->ts) + return (1); + if (a->ts < b->ts) + return (-1); + return (0); +} + +HEAP_GENERATE(lltx_fxt_heap, lltx_fxt_record, entry, lltx_fxt_record_cmp); Index: usr.bin/lltextract/lltextract.h =================================================================== RCS file: usr.bin/lltextract/lltextract.h diff -N usr.bin/lltextract/lltextract.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ usr.bin/lltextract/lltextract.h 1 Jul 2024 05:41:24 -0000 @@ -0,0 +1,30 @@ +/* $OpenBSD$ */ + +/* + * Copyright (c) 2022 David Gwynne + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include + +struct ksym { + RBT_ENTRY(ksym) entry; + char *name; + uint32_t addr; + uint32_t len; + unsigned int ref; +}; + +struct ksym *ksym_find(uint32_t); +struct ksym *ksym_nfind(uint32_t); Index: usr.bin/lltextract/names.c =================================================================== RCS file: usr.bin/lltextract/names.c diff -N usr.bin/lltextract/names.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ usr.bin/lltextract/names.c 1 Jul 2024 05:41:24 -0000 @@ -0,0 +1,133 @@ +/* $OpenBSD$ */ + +/* + * Copyright (c) 2022 David Gwynne + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "lltextract.h" + +#define DBNAME "/var/db/kvm_bsd.db" + +HASHINFO openinfo = { + 4096, /* bsize */ + 128, /* ffactor */ + 1024, /* nelem */ + 2048 * 1024, /* cachesize */ + NULL, /* hash() */ + 0 /* lorder */ +}; + +RBT_HEAD(ksyms, ksym); + +RBT_PROTOTYPE(ksyms, ksym, entry, ksym_cmp); + +static struct ksyms _ksyms = RBT_INITIALIZER(ksyms); + +static void +knames_load(struct ksyms *ksyms) +{ + DB *db; + DBT key, data; + struct nlist n; + struct ksym *k; + + db = dbopen(DBNAME, O_RDONLY, 0, DB_HASH, NULL); + if (db == NULL) + err(1, "%s", DBNAME); + + for (;;) { + int rv = db->seq(db, &key, &data, R_NEXT); + if (rv == -1) + errx(1, "%s seq", DBNAME); + + if (rv != 0) + break; + + if (key.size < 2 || *(const char *)key.data != '_') + continue; + if (data.size != sizeof(n)) + continue; + + memcpy(&n, data.data, sizeof(n)); + //if (n.n_type != N_TEXT) + // continue; + + k = malloc(sizeof(*k) + key.size); + if (k == NULL) + err(1, "%s ksym", __func__); + + k->addr = n.n_value; + k->len = 0; + k->name = (char *)(k + 1); + k->ref = 0; + + memcpy(k->name, (const char *)key.data + 1, key.size - 1); + k->name[key.size - 1] = '\0'; + + if (RBT_INSERT(ksyms, ksyms, k) != NULL) + free(k); + } + + db->close(db); +} + +struct ksym * +ksym_find(uint32_t addr) +{ + struct ksyms *ksyms = &_ksyms; + struct ksym key = { .addr = addr }; + + if (RBT_EMPTY(ksyms, ksyms)) + knames_load(ksyms); + + return (RBT_FIND(ksyms, ksyms, &key)); +} + +struct ksym * +ksym_nfind(uint32_t addr) +{ + struct ksyms *ksyms = &_ksyms; + struct ksym key = { .addr = addr }; + + if (RBT_EMPTY(ksyms, ksyms)) + knames_load(ksyms); + + return (RBT_NFIND(ksyms, ksyms, &key)); +} + +static inline int +ksym_cmp(const struct ksym *a, const struct ksym *b) +{ + if (a->addr > b->addr) + return (-1); + if (a->addr < b->addr) + return (1); + return (0); +} + +RBT_GENERATE(ksyms, ksym, entry, ksym_cmp); Index: usr.bin/lltextract/syscallnames.c =================================================================== RCS file: usr.bin/lltextract/syscallnames.c diff -N usr.bin/lltextract/syscallnames.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ usr.bin/lltextract/syscallnames.c 1 Jul 2024 05:41:24 -0000 @@ -0,0 +1,26 @@ +/* $OpenBSD$ */ + +/* + * Copyright (c) 2022 David Gwynne + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#define ACCOUNTING +#define KTRACE +#define PTRACE +#define SYSVMSG +#define SYSVSEM +#define SYSVSHM + +#include Index: usr.sbin/lltrace/Makefile =================================================================== RCS file: usr.sbin/lltrace/Makefile diff -N usr.sbin/lltrace/Makefile --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ usr.sbin/lltrace/Makefile 1 Jul 2024 05:41:24 -0000 @@ -0,0 +1,13 @@ +# $OpenBSD$ + +PROG= lltrace +SRCS= lltrace.c +MAN= + +LDADD= -levent +DPADD= ${LIBEVENT} + +WARNINGS= Yes +DEBUG= -g + +.include Index: usr.sbin/lltrace/lltrace.c =================================================================== RCS file: usr.sbin/lltrace/lltrace.c diff -N usr.sbin/lltrace/lltrace.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ usr.sbin/lltrace/lltrace.c 1 Jul 2024 05:41:24 -0000 @@ -0,0 +1,678 @@ +/* $OpenBSD$ */ + +/* + * Copyright (c) 2022 David Gwynne + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "/sys/sys/lltrace.h" + +#ifndef nitems +#define nitems(_a) (sizeof((_a)) / sizeof((_a)[0])) +#endif + +#define DEV_KUTRACE "/dev/lltrace" + +#define NRINGS_DEFAULT 256 /* 256 * 8192 * 8 is 16MB */ + +struct lltrace; + +struct mode { + const char *name; + void *(*setup)(struct lltrace *, int, char **); + int (*run)(struct lltrace *); +}; + +static void *mode_kill_setup(struct lltrace *, int, char **); +static int mode_kill_run(struct lltrace *); + +static const struct mode mode_kill = { + "kill", mode_kill_setup, mode_kill_run +}; + +static void *mode_wait_setup(struct lltrace *, int, char **); +static int mode_wait_run(struct lltrace *); +static void *mode_exec_setup(struct lltrace *, int, char **); +static int mode_exec_run(struct lltrace *); + +static const struct mode modes[] = { + { "wait", mode_wait_setup, mode_wait_run }, + { "exec", mode_exec_setup, mode_exec_run }, +}; + +static const struct mode * + mode_lookup(const char *); +static const char *outfile_default(void); + +__dead static void +usage(void) +{ + extern char *__progname; + + fprintf(stderr, "usage: %s [-v] [-m blen] [-o output] [command]\n", + __progname); + fprintf(stderr, " %s wait seconds\n", __progname); + fprintf(stderr, " %s exec program ...\n", __progname); + + exit(-1); +} + +struct lltrace { + const char *outfile; + int dv; /* /dev/lltrace fd */ + int of; /* outfile fd */ + void *mode; + + struct event dv_ev; /* handle reading from the kernel */ + + unsigned int blen; + size_t nbuffers; + struct lltrace_buffer + *buffers; + size_t buffer_idx; + + uint64_t nsec_first; + uint64_t nsec_last; + uint64_t count_buffers; + uint64_t count_slots; + uint64_t count_drops; +}; + +static void lltrace_start(struct lltrace *); +static void lltrace_stop(struct lltrace *); + +static void lltrace_read(int, short, void *); +static void lltrace_flush(struct lltrace *); + +int +main(int argc, char *argv[]) +{ + const struct mode *mode = &mode_kill; + int ch; + const char *errstr; + int verbose = 0; + int prio; + + struct lltrace lltrace = { + .outfile = NULL, + .blen = 0, + .nbuffers = NRINGS_DEFAULT, + + .nsec_first = ~0, + .nsec_last = 0, + .count_buffers = 0, + .count_slots = 0, + .count_drops = 0, + }; + struct lltrace *llt = &lltrace; + + while ((ch = getopt(argc, argv, "m:n:o:v")) != -1) { + switch (ch) { + case 'm': + llt->blen = strtonum(optarg, + LLTRACE_BLEN_MIN, LLTRACE_BLEN_MAX, &errstr); + if (errstr != NULL) { + errx(1, "kernel buffer len %s: %s", + optarg, errstr); + } + break; + case 'n': + llt->nbuffers = strtonum(optarg, 4, 4096, &errstr); + if (errstr != NULL) { + errx(1, "number of buffers %s: %s", + optarg, errstr); + } + break; + case 'o': + llt->outfile = optarg; + break; + case 'v': + verbose = 1; + break; + default: + usage(); + /* NOTREACHED */ + } + } + + argc -= optind; + argv += optind; + + optreset = optind = opterr = 1; /* kill mode has to be careful */ + + if (argc > 0) { + mode = mode_lookup(argv[0]); + if (mode == NULL) + errx(1, "unknown mode %s", argv[0]); + } + + if (llt->outfile == NULL) + llt->outfile = outfile_default(); + + event_init(); + + llt->mode = (*mode->setup)(llt, argc, argv); + + llt->dv = open(DEV_KUTRACE, O_NONBLOCK|O_RDWR|O_CLOEXEC); + if (llt->dv == -1) + err(1, "%s", DEV_KUTRACE); + + if (llt->blen != 0) { + if (ioctl(llt->dv, LLTIOCSBLEN, &llt->blen) == -1) + err(1, "set kernel buffer len %u", llt->blen); + } + + event_set(&llt->dv_ev, llt->dv, EV_READ|EV_PERSIST, + lltrace_read, llt); + + llt->of = open(llt->outfile, O_WRONLY|O_CREAT|O_CLOEXEC|O_TRUNC, 0640); + if (llt->of == -1) + err(1, "open %s", llt->outfile); + + llt->buffers = calloc(llt->nbuffers, sizeof(*llt->buffers)); + if (llt->buffers == NULL) + err(1, "unable to allocate %zu buffers", llt->nbuffers); + + llt->buffer_idx = 0; + + if ((*mode->run)(llt) == -1) + exit(1); + + prio = getpriority(PRIO_PROCESS, 0); + if (setpriority(PRIO_PROCESS, 0, -20) == -1) + err(1, "setpriority -20"); + + lltrace_start(llt); + + event_dispatch(); + + if (setpriority(PRIO_PROCESS, 0, prio) == -1) + err(1, "setpriority %d", prio); + + if (llt->buffer_idx != 0) + lltrace_flush(llt); + + if (verbose) { + uint64_t diff = llt->nsec_last - llt->nsec_first; + double interval = (double)diff / 1000000000.0; + int mib[] = { CTL_HW, HW_NCPU }; + int ncpus; + size_t ncpuslen = sizeof(ncpus); + + if (sysctl(mib, nitems(mib), &ncpus, &ncpuslen, NULL, 0) == -1) + err(1, "sysctl hw.ncpus"); + + printf("output file: %s\n", llt->outfile); + printf("interval: %.03lfs, ncpus: %d\n", interval, ncpus); + printf("buffers: %llu (%.01lf/cpu/s), " + "slots: %llu (%.01lf/cpu/s)\n", + llt->count_buffers, llt->count_buffers / interval / ncpus, + llt->count_slots, llt->count_slots / interval / ncpus); + printf("drops: %llu (%.01lf/cpu/s)\n", + llt->count_drops, llt->count_drops / interval / ncpus); + } + + return (0); +} + +static void +lltrace_start(struct lltrace *llt) +{ + event_add(&llt->dv_ev, NULL); + + if (ioctl(llt->dv, LLTIOCSTART) == -1) + err(1, "lltrace start"); +} + +static void +lltrace_flush(struct lltrace *llt) +{ + size_t len; + ssize_t rv; + + len = llt->buffer_idx * sizeof(*llt->buffers); + rv = write(llt->of, llt->buffers, len); + if (rv == -1) + err(1, "%s write", llt->outfile); + + if ((size_t)rv < len) { + errx(1, "%s write short (%zd/%zu bytes)", + llt->outfile, rv, len); + } +} + +static int +lltrace_read_one(struct lltrace *llt) +{ + struct lltrace_buffer *buffer; + ssize_t rv; + uint64_t nsec; + + if (llt->buffer_idx >= llt->nbuffers) { + size_t i, j; + + lltrace_flush(llt); + + /* reset */ + llt->buffer_idx = 0; + + /* + * memset(llt->buffers, 0, + * llt->nbuffers * sizeof(*llt->buffers)); + */ + for (i = 0; i < llt->nbuffers; i++) { + buffer = llt->buffers + i; + + for (j = 0; j < nitems(buffer->llt_slots); j++) + buffer->llt_slots[j] = 0; + } + } + + buffer = llt->buffers + llt->buffer_idx; + rv = read(llt->dv, buffer, sizeof(*buffer)); + if (rv == -1) { + switch (errno) { + case EAGAIN: + /* try again later */ + return (EAGAIN); + case ENOENT: + /* we're done */ + event_del(&llt->dv_ev); + return (ENOENT); + default: + err(1, "%s read", DEV_KUTRACE); + /* NOTREACHED */ + } + } + + if (rv == 0) { + /* we're done */ + event_del(&llt->dv_ev); + return (ENOENT); + } + + llt->buffer_idx++; + + nsec = buffer->llt_slots[3]; + if (nsec < llt->nsec_first) + llt->nsec_first = nsec; + + nsec = buffer->llt_slots[5]; + if (nsec > llt->nsec_last) + llt->nsec_last = nsec; + + llt->count_buffers++; + llt->count_slots += rv / sizeof(uint64_t); + //llt->count_drops += buffer->slots[7]; + + return (0); +} + +static void +lltrace_read(int dv, short events, void *arg) +{ + struct lltrace *llt = arg; + + lltrace_read_one(llt); +} + +static void +lltrace_stop(struct lltrace *llt) +{ + int error; + + if (ioctl(llt->dv, LLTIOCSTOP) == -1) { + if (errno != EALREADY) + err(1, "lltrace stop"); + } + + do { + error = lltrace_read_one(llt); + } while (error == 0); + + event_del(&llt->dv_ev); +} + +static const char * +outfile_default(void) +{ + extern char *__progname; + char host[MAXHOSTNAMELEN]; + time_t now; + struct tm *tm; + char *outfile; + + if (gethostname(host, sizeof(host)) == -1) + err(1, "gethostname"); + + now = time(NULL); + + tm = localtime(&now); + + if (asprintf(&outfile, "%s_%04d%02d%02d_%02d%02d%02d_%s.lltrace", + __progname, + tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday, + tm->tm_hour, tm->tm_min, tm->tm_sec, + host) == -1) + errx(1, "error generating default output filename"); + + return (outfile); +} + +#if 0 +static int +printable(int ch) +{ + if (ch == '\0') + return ('_'); + if (!isprint(ch)) + return ('~'); + + return (ch); +} + +static void +hexdump(const void *d, size_t datalen) +{ + const uint8_t *data = d; + size_t i, j = 0; + + for (i = 0; i < datalen; i += j) { +#if 0 + printf("%04zu: ", i); + for (j = 0; j < 16 && i+j < datalen; j++) + printf("%02x ", data[i + j]); + while (j++ < 16) + printf(" "); +#endif + printf("|"); + + for (j = 0; j < 16 && i+j < datalen; j++) + putchar(printable(data[i + j])); + printf("|\n"); + } +} +#endif + +static const struct mode * +mode_lookup(const char *name) +{ + size_t i; + + for (i = 0; i < nitems(modes); i++) { + const struct mode *mode = &modes[i]; + + if (strcmp(mode->name, name) == 0) + return (mode); + } + + return (NULL); +} + +static void +mode_kill_event(int nil, short events, void *arg) +{ + struct lltrace *llt = arg; + struct event *ev = llt->mode; + + fprintf(stdout, "lltrace stopped\n"); + fflush(stdout); + + event_del(ev); + + lltrace_stop(llt); +} + +static void * +mode_kill_setup(struct lltrace *llt, int argc, char *argv[]) +{ + struct event *ev; + + if (argc != 0) + usage(); + + ev = malloc(sizeof(*ev)); + if (ev == NULL) + err(1, NULL); + + signal_set(ev, SIGINT, mode_kill_event, llt); + return (ev); +} + +static int +mode_kill_run(struct lltrace *llt) +{ + struct event *ev = llt->mode; + + signal_add(ev, NULL); + + fprintf(stdout, "lltrace starting, press Ctrl-C to end...\n"); + fflush(stdout); + + return (0); +} + +/* + * lltrace for specified number of seconds. + */ + +struct mode_wait_state { + struct lltrace *llt; + struct timeval tv; + struct event tmo; + struct event sig; +}; + +static void +mode_wait_tmo(int wat, short events, void *arg) +{ + struct mode_wait_state *state = arg; + struct lltrace *llt = state->llt; + + signal_del(&state->sig); + lltrace_stop(llt); +} + +static void +mode_wait_sig(int wat, short events, void *arg) +{ + struct mode_wait_state *state = arg; + struct lltrace *llt = state->llt; + + evtimer_del(&state->tmo); + signal_del(&state->sig); + lltrace_stop(llt); +} + +static void * +mode_wait_setup(struct lltrace *llt, int argc, char *argv[]) +{ + struct mode_wait_state *state; + const char *errstr; + + if (argc != 2) + usage(); + + state = malloc(sizeof(*state)); + if (state == NULL) + err(1, NULL); + + state->llt = llt; + + state->tv.tv_sec = strtonum(argv[1], 1, 600, &errstr); + if (errstr != NULL) + errx(1, "wait time %s: %s", argv[1], errstr); + + state->tv.tv_usec = 0; + + evtimer_set(&state->tmo, mode_wait_tmo, state); + signal_set(&state->sig, SIGINT, mode_wait_sig, state); + + return (state); +} + +static int +mode_wait_run(struct lltrace *llt) +{ + struct mode_wait_state *state = llt->mode; + + evtimer_add(&state->tmo, &state->tv); + signal_add(&state->sig, NULL); + + return (0); +} + +/* + * trace the execution of a (child) program + */ + +struct mode_exec_state { + struct lltrace *llt; + + char **argv; + + pid_t pid; + struct event sigchld; + struct event sigint; + + uid_t uid; + gid_t gid; + gid_t groups[NGROUPS_MAX]; + int ngroups; +}; + +static void +mode_exec_sig(int wat, short events, void *arg) +{ + struct mode_exec_state *state = arg; + struct lltrace *llt = state->llt; + + /* do we check the pid? */ + + signal_del(&state->sigchld); + signal_del(&state->sigint); + lltrace_stop(llt); +} + +static void * +mode_exec_setup(struct lltrace *llt, int argc, char *argv[]) +{ + struct mode_exec_state *state; + const char *user = NULL; + int ch; + + while ((ch = getopt(argc, argv, "u:")) != -1) { + switch (ch) { + case 'u': + user = optarg; + break; + default: + usage(); + /* NOTREACHED */ + } + } + + argc -= optind; + argv += optind; + + if (argc == 0) { + warnx("no command specified"); + usage(); + } + + state = malloc(sizeof(*state)); + if (state == NULL) + err(1, NULL); + + state->llt = llt; + state->argv = argv; + state->uid = 0; + state->pid = -1; /* not yet */ + signal_set(&state->sigchld, SIGCHLD, mode_exec_sig, state); + signal_set(&state->sigint, SIGINT, mode_exec_sig, state); + + if (user != NULL) { + struct passwd *pw; + + pw = getpwnam(user); + if (pw == NULL) + errx(1, "unable to lookup user %s", user); + + state->uid = pw->pw_uid; + state->gid = pw->pw_gid; + + endpwent(); + + state->ngroups = nitems(state->groups); + if (getgrouplist(user, pw->pw_gid, + state->groups, &state->ngroups) == -1) + errx(1, "unable to get groups for user %s", user); + } + + return (state); +} + +static int +mode_exec_run(struct lltrace *llt) +{ + struct mode_exec_state *state = llt->mode; + + signal_add(&state->sigchld, NULL); + signal_add(&state->sigint, NULL); + + state->pid = fork(); + switch (state->pid) { + case -1: + err(1, "unable to fork"); + /* NOTREACHED */ + case 0: /* child */ + break; + default: /* parent */ + return (0); + } + + if (state->uid != 0) { + if (setresgid(state->gid, state->gid, state->gid) == -1) + err(1, "setresgid %d", state->gid); + + if (setgroups(state->ngroups, state->groups) == -1) + err(1, "setgroups"); + + if (setresuid(state->uid, state->uid, state->uid) == -1) + err(1, "setresuid %d", state->uid); + } + + execvp(state->argv[0], state->argv); + + err(1, "exec %s", state->argv[0]); + return (-1); +}