Index: arch/amd64/amd64/conf.c =================================================================== RCS file: /cvs/src/sys/arch/amd64/amd64/conf.c,v diff -u -p -r1.78 conf.c --- arch/amd64/amd64/conf.c 6 Mar 2023 17:20:05 -0000 1.78 +++ arch/amd64/amd64/conf.c 18 Jun 2024 02:47:11 -0000 @@ -136,6 +136,7 @@ cdev_decl(cy); #include "bktr.h" #include "ksyms.h" #include "kstat.h" +#include "llt.h" #include "usb.h" #include "uhid.h" #include "fido.h" @@ -213,7 +214,8 @@ struct cdevsw cdevsw[] = cdev_notdef(), /* 28 was LKM */ cdev_notdef(), /* 29 */ cdev_dt_init(NDT,dt), /* 30: dynamic tracer */ - cdev_notdef(), /* 31 */ + cdev_lltrace_init(NLLT,lltrace), + /* 31: lltrace */ cdev_notdef(), /* 32 */ cdev_notdef(), /* 33 */ cdev_notdef(), /* 34 */ Index: arch/amd64/amd64/intr.c =================================================================== RCS file: /cvs/src/sys/arch/amd64/amd64/intr.c,v diff -u -p -r1.56 intr.c --- arch/amd64/amd64/intr.c 19 Jan 2024 18:38:16 -0000 1.56 +++ arch/amd64/amd64/intr.c 18 Jun 2024 02:47:11 -0000 @@ -45,6 +45,7 @@ #include #include #include +#include #include #include @@ -538,10 +539,18 @@ intr_handler(struct intrframe *frame, st if (need_lock) __mp_lock(&kernel_lock); #endif + floor = ci->ci_handled_intr_level; ci->ci_handled_intr_level = ih->ih_level; + + LLTRACE_CPU(ci, lltrace_irq, 0, + ci->ci_isources[ih->ih_slot]->is_idtvec); rc = (*ih->ih_fun)(ih->ih_arg ? ih->ih_arg : frame); + LLTRACE_CPU(ci, lltrace_irqret, 0, + ci->ci_isources[ih->ih_slot]->is_idtvec); + ci->ci_handled_intr_level = floor; + #ifdef MULTIPROCESSOR if (need_lock) __mp_unlock(&kernel_lock); Index: arch/amd64/amd64/ipi.c =================================================================== RCS file: /cvs/src/sys/arch/amd64/amd64/ipi.c,v diff -u -p -r1.18 ipi.c --- arch/amd64/amd64/ipi.c 10 Nov 2022 08:26:54 -0000 1.18 +++ arch/amd64/amd64/ipi.c 18 Jun 2024 02:47:11 -0000 @@ -35,9 +35,10 @@ #include #include #include +#include +#include #include -#include #include #include #include @@ -45,6 +46,8 @@ void x86_send_ipi(struct cpu_info *ci, int ipimask) { + LLTRACE(lltrace_ipi, ci->ci_cpuid); + x86_atomic_setbits_u32(&ci->ci_ipis, ipimask); /* Don't send IPI to cpu which isn't (yet) running. */ @@ -57,6 +60,10 @@ x86_send_ipi(struct cpu_info *ci, int ip int x86_fast_ipi(struct cpu_info *ci, int ipi) { +#if 0 + LLTRACE(lltrace_ipi, ci->ci_cpuid); +#endif + if (!(ci->ci_flags & CPUF_RUNNING)) return (ENOENT); @@ -72,6 +79,8 @@ x86_broadcast_ipi(int ipimask) int count = 0; CPU_INFO_ITERATOR cii; + LLTRACE_CPU(self, lltrace_ipi, ~0); + CPU_INFO_FOREACH(cii, ci) { if (ci == self) continue; @@ -102,7 +111,10 @@ x86_ipi_handler(void) for (bit = 0; bit < X86_NIPI && pending; bit++) { if (pending & (1 << bit)) { pending &= ~(1 << bit); + + LLTRACE_CPU(ci, lltrace_irq, LLTRACE_IRQ_IPI, bit); (*ipifunc[bit])(ci); + LLTRACE_CPU(ci, lltrace_irqret, LLTRACE_IRQ_IPI, bit); evcount_inc(&ipi_count); } } Index: arch/amd64/amd64/softintr.c =================================================================== RCS file: /cvs/src/sys/arch/amd64/amd64/softintr.c,v diff -u -p -r1.10 softintr.c --- arch/amd64/amd64/softintr.c 11 Sep 2020 09:27:09 -0000 1.10 +++ arch/amd64/amd64/softintr.c 18 Jun 2024 02:47:11 -0000 @@ -34,9 +34,15 @@ * Generic soft interrupt implementation for NetBSD/x86. */ +#include "llt.h" + #include #include #include +#include + +#include +#include #include @@ -99,8 +105,9 @@ softintr_dispatch(int which) uvmexp.softs++; mtx_leave(&si->softintr_lock); - + //LLTRACE_CPU(ci, lltrace_irq, LLTRACE_IRQ_BOTTOM_HALF, which); (*sih->sih_fn)(sih->sih_arg); + //LLTRACE_CPU(ci, lltrace_irqret, LLTRACE_IRQ_BOTTOM_HALF, which); } KERNEL_UNLOCK(); Index: arch/arm64/arm64/conf.c =================================================================== RCS file: /cvs/src/sys/arch/arm64/arm64/conf.c,v diff -u -p -r1.22 conf.c --- arch/arm64/arm64/conf.c 6 Mar 2023 17:20:06 -0000 1.22 +++ arch/arm64/arm64/conf.c 18 Jun 2024 02:47:11 -0000 @@ -91,6 +91,7 @@ cdev_decl(lpt); #include "bktr.h" #include "ksyms.h" #include "kstat.h" +#include "llt.h" #include "usb.h" #include "uhid.h" #include "fido.h" @@ -157,7 +158,8 @@ struct cdevsw cdevsw[] = cdev_notdef(), /* 28 was LKM */ cdev_notdef(), /* 29 */ cdev_dt_init(NDT,dt), /* 30: dynamic tracer */ - cdev_notdef(), /* 31 */ + cdev_lltrace_init(NLLT,lltrace), + /* 31: lltrace */ cdev_notdef(), /* 32 */ cdev_notdef(), /* 33 */ cdev_notdef(), /* 34 */ Index: arch/arm64/dev/agintc.c =================================================================== RCS file: /cvs/src/sys/arch/arm64/dev/agintc.c,v diff -u -p -r1.56 agintc.c --- arch/arm64/dev/agintc.c 13 May 2024 01:15:50 -0000 1.56 +++ arch/arm64/dev/agintc.c 18 Jun 2024 02:47:11 -0000 @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -1111,7 +1112,11 @@ agintc_run_handler(struct intrhand *ih, else arg = frame; + LLTRACE(lltrace_irq, ih->ih_ipl == IPL_IPI ? LLTRACE_IRQ_IPI : 0, + ih->ih_irq); handled = ih->ih_func(arg); + LLTRACE(lltrace_irqret, ih->ih_ipl == IPL_IPI ? LLTRACE_IRQ_IPI : 0, + ih->ih_irq); if (handled) ih->ih_count.ec_count++; @@ -1456,6 +1461,8 @@ agintc_send_ipi(struct cpu_info *ci, int { struct agintc_softc *sc = agintc_sc; uint64_t sendmask; + + LLTRACE(lltrace_ipi, ci->ci_cpuid); if (ci == curcpu() && id == ARM_IPI_NOP) return; Index: conf/GENERIC =================================================================== RCS file: /cvs/src/sys/conf/GENERIC,v diff -u -p -r1.296 GENERIC --- conf/GENERIC 5 May 2024 07:26:58 -0000 1.296 +++ conf/GENERIC 18 Jun 2024 02:47:13 -0000 @@ -81,6 +81,7 @@ pseudo-device endrun 1 # EndRun line dis pseudo-device vnd 4 # vnode disk devices pseudo-device ksyms 1 # kernel symbols device pseudo-device kstat # kernel statistics device +pseudo-device llt # low-level tracing device # clonable devices pseudo-device bpfilter # packet filter Index: conf/files =================================================================== RCS file: /cvs/src/sys/conf/files,v diff -u -p -r1.731 files --- conf/files 10 Apr 2024 13:59:05 -0000 1.731 +++ conf/files 18 Jun 2024 02:47:13 -0000 @@ -598,6 +598,9 @@ file dev/ksyms.c ksyms needs-flag pseudo-device kstat file dev/kstat.c kstat needs-flag +pseudo-device llt +file dev/lltrace.c llt needs-flag + pseudo-device fuse file miscfs/fuse/fuse_device.c fuse needs-flag file miscfs/fuse/fuse_file.c fuse Index: dev/lltrace.c =================================================================== RCS file: dev/lltrace.c diff -N dev/lltrace.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ dev/lltrace.c 18 Jun 2024 02:47:13 -0000 @@ -0,0 +1,965 @@ +/* $OpenBSD$ */ + +/* + * Copyright (c) 2022 The University of Queensland + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * This code was written by David Gwynne as part + * of the Information Technology Infrastructure Group (ITIG) in the + * Faculty of Engineering, Architecture and Information Technology + * (EAIT). + * + * It was heavily inspired by and aims to be largely compatible + * with the KUTrace (kernel/userland tracing) framework by Richard + * L. Sites. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +#if defined(__amd64__) || defined(__i386__) + +static inline unsigned int +lltrace_cas(unsigned int *p, unsigned int e, unsigned int n) +{ + __asm volatile("cmpxchgl %2, %1" + : "=a" (e), "=m" (*p) + : "r" (n), "a" (e), "m" (*p)); + + return (e); +} + +static inline uint64_t +lltrace_ts(void) +{ + unsigned int hi, lo; + + __asm volatile("lfence; rdtsc" : "=d" (hi), "=a" (lo)); + + return (lo >> 6); +} + +static inline uint64_t +lltrace_ts_long(void) +{ + return (rdtsc_lfence() >> 6); +} + +#elif defined(__aarch64__) + +#define lltrace_cas(_p, _e, _n) atomic_cas_uint((_p), (_e), (_n)) + +static inline uint64_t +lltrace_ts_long(void) +{ + uint64_t ts; + + __asm volatile("mrs %x0, cntvct_el0" : "=r" (ts)); + + return (ts); +} + +#define lltrace_ts() lltrace_ts_long() + +#else /* not x86 or arm64 */ + +static unsigned int +lltrace_cas(unsigned int *p, unsigned int e, unsigned int n) +{ + unsigned int o; + int s; + + s = intr_disable(); + o = *p; + if (o == e) + *p = n; + intr_restore(s); + + return (o); +} + +static inline uint64_t +lltrace_ts(void) +{ + return (countertime()); +} + +static inline uint64_t +lltrace_ts_long(void) +{ + return (countertime()); +} + +#endif + +#define LLTRACE_MB2NBUF(_mb) \ + (((_mb) * (1U << 20)) / sizeof(struct lltrace_buffer)) +#define LLTRACE_NBUF2MB(_nbuf) \ + (((_nbuf) * sizeof(struct lltrace_buffer)) / (1U << 20)) + +#define LLTRACE_BLEN_DEFAULT 16 + +struct lltrace_cpu { + SIMPLEQ_ENTRY(lltrace_cpu) + llt_entry; + struct lltrace_buffer llt_buffer; + unsigned int llt_slot; + pid_t llt_tid; +}; + +SIMPLEQ_HEAD(lltrace_cpu_list, lltrace_cpu); + +struct lltrace_softc { + unsigned int sc_running; + unsigned int sc_mode; + struct rwlock sc_lock; + unsigned int sc_nbuffers; + + unsigned int sc_free; + unsigned int sc_used; + struct lltrace_cpu **sc_ring; + struct lltrace_cpu *sc_buffers; + + unsigned int sc_read; + unsigned int sc_reading; + struct selinfo sc_sel; + + uint64_t sc_boottime; + uint64_t sc_monotime; +}; + +static int lltrace_start(struct lltrace_softc *, struct proc *); +static int lltrace_stop(struct lltrace_softc *, struct proc *); +static int lltrace_flush(struct lltrace_softc *); + +static struct lltrace_softc *lltrace_sc; + +static void lltrace_arg32(struct lltrace_cpu *, uint64_t, unsigned int); + +int +lltattach(int num) +{ + return (0); +} + +int +lltraceopen(dev_t dev, int flag, int mode, struct proc *p) +{ + struct lltrace_softc *sc; + int error; + + if (minor(dev) != 0) + return (ENXIO); + + error = suser(p); + if (error != 0) + return (error); + + if (lltrace_sc != NULL) + return (EBUSY); + + sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_CANFAIL|M_ZERO); + if (sc == NULL) + return (ENOMEM); + + sc->sc_running = 0; + sc->sc_nbuffers = LLTRACE_MB2NBUF(LLTRACE_BLEN_DEFAULT); + + rw_init(&sc->sc_lock, "lltlk"); + + sc->sc_read = 0; + sc->sc_reading = 0; + klist_init_rwlock(&sc->sc_sel.si_note, &sc->sc_lock); + + /* commit */ + if (atomic_cas_ptr(&lltrace_sc, NULL, sc) != NULL) { + free(sc, M_DEVBUF, sizeof(*sc)); + return (EBUSY); + } + + return (0); +} + +int +lltraceclose(dev_t dev, int flag, int mode, struct proc *p) +{ + struct lltrace_softc *sc = lltrace_sc; + + rw_enter_write(&sc->sc_lock); + lltrace_stop(sc, p); + lltrace_flush(sc); + rw_exit_write(&sc->sc_lock); + + lltrace_sc = NULL; + membar_sync(); + + free(sc, M_DEVBUF, sizeof(*sc)); + + return (0); +} + +static int +lltrace_fionread(struct lltrace_softc *sc) +{ + int canread; + + rw_enter_read(&sc->sc_lock); + canread = !sc->sc_running && (sc->sc_buffers != NULL) && + (sc->sc_read < sc->sc_nbuffers); + rw_exit_read(&sc->sc_lock); + + return (canread ? sizeof(struct lltrace_buffer) : 0); +} + +static void +lltrace_cpu_init(struct lltrace_cpu *llt, struct lltrace_softc *sc, + struct cpu_info *ci, pid_t tid) +{ + struct lltrace_header *llh; + + llh = (struct lltrace_header *)&llt->llt_buffer; + llh->h_cpu = cpu_number(); + llh->h_boottime = sc->sc_boottime; + llh->h_start_cy = lltrace_ts_long(); + llh->h_start_ns = nsecuptime() - sc->sc_monotime; + llh->h_end_cy = 0; + llh->h_end_ns = 0; + llh->h_idletid = ci->ci_schedstate.spc_idleproc->p_tid; + llh->h_tid = tid; + llh->h_zero = 0; + + llt->llt_tid = tid; + llt->llt_slot = 8; +} + +static void +lltrace_cpu_fini(struct lltrace_cpu *llt, struct lltrace_softc *sc) +{ + struct lltrace_header *llh; + + llh = (struct lltrace_header *)&llt->llt_buffer; + llh->h_end_cy = lltrace_ts_long(); + llh->h_end_ns = nsecuptime() - sc->sc_monotime; +} + +static int +lltrace_set_mode(struct lltrace_softc *sc, unsigned int mode) +{ + int error; + + if (mode >= LLTRACE_MODE_COUNT) + return (EINVAL); + + error = rw_enter(&sc->sc_lock, RW_WRITE|RW_INTR); + if (error != 0) + return (error); + + if (sc->sc_running) + error = EBUSY; + else + sc->sc_mode = mode; + + rw_exit(&sc->sc_lock); + return (error); +} + +static int +lltrace_set_blen(struct lltrace_softc *sc, unsigned int blen) +{ + int error; + unsigned int nbuffers; + + if (blen < LLTRACE_BLEN_MIN || blen > LLTRACE_BLEN_MAX) + return (EINVAL); + + /* convert megabytes to the number of buffers */ + nbuffers = LLTRACE_MB2NBUF(blen); + if (nbuffers <= ncpus) + EINVAL; + + error = rw_enter(&sc->sc_lock, RW_WRITE|RW_INTR); + if (error != 0) + return (error); + + if (sc->sc_buffers != NULL) + error = EBUSY; + else { + sc->sc_nbuffers = nbuffers; + printf("%s[%u]: nbuffers %u\n", __func__, __LINE__, sc->sc_nbuffers); +} + + rw_exit(&sc->sc_lock); + return (error); +} + +static int +lltrace_start(struct lltrace_softc *sc, struct proc *p) +{ + struct bintime boottime; + unsigned int i; + size_t sz; + struct lltrace_cpu_list l = SIMPLEQ_HEAD_INITIALIZER(l); + struct lltrace_cpu *llt; + struct cpu_info *ci; + CPU_INFO_ITERATOR cii; + + if (sc->sc_running) + return EINVAL; + + if (sc->sc_nbuffers <= (ncpus * 2 + 1)) + return (EINVAL); + + lltrace_flush(sc); + + sc->sc_monotime = nsecuptime(); + + binboottime(&boottime); + sc->sc_boottime = BINTIME_TO_NSEC(&boottime) + sc->sc_monotime; + + sz = roundup(sc->sc_nbuffers * sizeof(*sc->sc_buffers), PAGE_SIZE); + sc->sc_buffers = km_alloc(sz, &kv_any, &kp_dirty, &kd_waitok); + if (sc->sc_buffers == NULL) + return (ENOMEM); + sc->sc_ring = mallocarray(sc->sc_nbuffers, sizeof(*sc->sc_ring), + M_DEVBUF, M_WAITOK); + for (i = 0; i < sc->sc_nbuffers; i++) { + llt = &sc->sc_buffers[i]; + llt->llt_slot = 0; + sc->sc_ring[i] = llt; + } + + sc->sc_free = 0; /* next slot to pull a free buffer from */ + sc->sc_used = 0; /* next slot to put a used buffer in */ + + CPU_INFO_FOREACH(cii, ci) { + i = sc->sc_free++; /* can't wrap yet */ + + llt = sc->sc_ring[i]; + sc->sc_ring[i] = NULL; + + SIMPLEQ_INSERT_HEAD(&l, llt, llt_entry); + } + + CPU_INFO_FOREACH(cii, ci) { + sched_peg_curproc(ci); + + llt = SIMPLEQ_FIRST(&l); + SIMPLEQ_REMOVE_HEAD(&l, llt_entry); + + lltrace_cpu_init(llt, sc, ci, p->p_tid); + lltrace_pidname(llt, p); + + membar_producer(); + ci->ci_schedstate.spc_lltrace = llt; + } + atomic_clearbits_int(&p->p_flag, P_CPUPEG); + + sc->sc_running = 1; + + return (0); +} + +static int +lltrace_stop(struct lltrace_softc *sc, struct proc *p) +{ + struct lltrace_cpu *llt; + struct cpu_info *ci; + CPU_INFO_ITERATOR cii; + unsigned long s; + + if (!sc->sc_running) + return (EALREADY); + + sc->sc_running = 0; + + /* visit each cpu to take llt away safely */ + CPU_INFO_FOREACH(cii, ci) { + sched_peg_curproc(ci); + + s = intr_disable(); + llt = ci->ci_schedstate.spc_lltrace; + ci->ci_schedstate.spc_lltrace = NULL; + intr_restore(s); + + lltrace_cpu_fini(llt, sc); + } + atomic_clearbits_int(&p->p_flag, P_CPUPEG); + + return (0); +} + +static int +lltrace_flush(struct lltrace_softc *sc) +{ + size_t sz; + + rw_assert_wrlock(&sc->sc_lock); + if (sc->sc_running) + return (EBUSY); + + if (sc->sc_buffers == NULL) + return (0); + + sz = roundup(sc->sc_nbuffers * sizeof(*sc->sc_buffers), PAGE_SIZE); + km_free(sc->sc_buffers, sz, &kv_any, &kp_dirty); + free(sc->sc_ring, M_DEVBUF, sc->sc_nbuffers * sizeof(*sc->sc_ring)); + + sc->sc_buffers = NULL; + sc->sc_ring = NULL; + sc->sc_read = 0; + + return (0); +} + +int +lltraceioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p) +{ + struct lltrace_softc *sc = lltrace_sc; + int error = 0; + + KERNEL_UNLOCK(); + + switch (cmd) { + case FIONREAD: + *(int *)data = lltrace_fionread(sc); + break; + case FIONBIO: + /* vfs tracks this for us if we let it */ + break; + + case LLTIOCSTART: + error = rw_enter(&sc->sc_lock, RW_WRITE|RW_INTR); + if (error != 0) + break; + error = lltrace_start(sc, p); + rw_exit(&sc->sc_lock); + break; + case LLTIOCSTOP: + error = rw_enter(&sc->sc_lock, RW_WRITE|RW_INTR); + if (error != 0) + break; + error = lltrace_stop(sc, p); + rw_exit(&sc->sc_lock); + break; + case LLTIOCFLUSH: + error = rw_enter(&sc->sc_lock, RW_WRITE|RW_INTR); + if (error != 0) + break; + error = lltrace_flush(sc); + rw_exit(&sc->sc_lock); + break; + + case LLTIOCSBLEN: + error = lltrace_set_blen(sc, *(unsigned int *)data); + break; + case LLTIOCGBLEN: + *(unsigned int *)data = LLTRACE_NBUF2MB(sc->sc_nbuffers); + break; + + case LLTIOCSMODE: + error = lltrace_set_mode(sc, *(unsigned int *)data); + break; + case LLTIOCGMODE: + *(unsigned int *)data = sc->sc_mode; + break; + + default: + error = ENOTTY; + break; + } + + KERNEL_LOCK(); + + return (error); +} + +int +lltraceread(dev_t dev, struct uio *uio, int ioflag) +{ + struct lltrace_softc *sc = lltrace_sc; + struct lltrace_cpu *llt; + unsigned int slot; + int error; + + KERNEL_UNLOCK(); + + error = rw_enter(&sc->sc_lock, RW_WRITE|RW_INTR); + if (error != 0) + goto lock; + + if (sc->sc_running) { + if (ISSET(ioflag, IO_NDELAY)) { + error = EWOULDBLOCK; + goto unlock; + } + + do { + sc->sc_reading++; + error = rwsleep_nsec(&sc->sc_reading, &sc->sc_lock, + PRIBIO|PCATCH, "lltread", INFSLP); + sc->sc_reading--; + if (error != 0) + goto unlock; + } while (sc->sc_running); + } + + if (sc->sc_buffers == NULL) { + error = 0; + goto unlock; + } + + slot = sc->sc_read; + for (;;) { + if (slot >= sc->sc_nbuffers) { + error = 0; + goto unlock; + } + + llt = &sc->sc_buffers[slot]; + KASSERT(llt->llt_slot <= nitems(llt->llt_buffer.llt_slots)); + if (llt->llt_slot > 0) + break; + + slot++; + } + + error = uiomove(&llt->llt_buffer, + llt->llt_slot * sizeof(llt->llt_buffer.llt_slots[0]), uio); + if (error != 0) + goto unlock; + + sc->sc_read = slot + 1; + +unlock: + rw_exit(&sc->sc_lock); +lock: + KERNEL_LOCK(); + return (error); +} + +static void +lltrace_filt_detach(struct knote *kn) +{ + struct lltrace_softc *sc = kn->kn_hook; + + klist_remove(&sc->sc_sel.si_note, kn); +} + +static int +lltrace_filt_event(struct knote *kn, long hint) +{ + struct lltrace_softc *sc = kn->kn_hook; + int canread; + + canread = !sc->sc_running && (sc->sc_buffers != NULL) && + (sc->sc_read < sc->sc_nbuffers); + + kn->kn_data = canread ? sizeof(struct lltrace_buffer) : 0; + + return (canread); +} + +static int +lltrace_filt_modify(struct kevent *kev, struct knote *kn) +{ + struct lltrace_softc *sc = kn->kn_hook; + int active; + + rw_enter_write(&sc->sc_lock); + active = knote_modify_fn(kev, kn, lltrace_filt_event); + rw_exit_write(&sc->sc_lock); + + return (active); +} + +static int +lltrace_filt_process(struct knote *kn, struct kevent *kev) +{ + struct lltrace_softc *sc = kn->kn_hook; + int active; + + rw_enter_write(&sc->sc_lock); + active = knote_process_fn(kn, kev, lltrace_filt_event); + rw_exit_write(&sc->sc_lock); + + return (active); +} + +static const struct filterops lltrace_filtops = { + .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, + .f_attach = NULL, + .f_detach = lltrace_filt_detach, + .f_event = lltrace_filt_event, + .f_modify = lltrace_filt_modify, + .f_process = lltrace_filt_process, +}; + +int +lltracekqfilter(dev_t dev, struct knote *kn) +{ + struct lltrace_softc *sc = lltrace_sc; + struct klist *klist; + + switch (kn->kn_filter) { + case EVFILT_READ: + klist = &sc->sc_sel.si_note; + kn->kn_fop = &lltrace_filtops; + break; + default: + return (EINVAL); + } + + kn->kn_hook = sc; + klist_insert(klist, kn); + + return (0); +} + +static struct lltrace_cpu * +lltrace_next(struct lltrace_cpu *llt) +{ + struct lltrace_softc *sc = lltrace_sc; + struct cpu_info *ci = curcpu(); + struct lltrace_cpu *nllt; + unsigned int slot, oslot, nslot; + + /* check if we were preempted */ + nllt = ci->ci_schedstate.spc_lltrace; + if (nllt != llt) { + /* something preempted us and swapped buffers already */ + return (nllt); + } + + slot = sc->sc_free; + for (;;) { + nslot = slot + 1; + if (nslot > sc->sc_nbuffers) { + if (sc->sc_mode == LLTRACE_MODE_HEAD) + return (NULL); + } + + oslot = atomic_cas_uint(&sc->sc_free, slot, nslot); + if (slot == oslot) + break; + + slot = oslot; + } + + slot %= sc->sc_nbuffers; + nllt = sc->sc_ring[slot]; + sc->sc_ring[slot] = NULL; + + slot = sc->sc_used; + for (;;) { + nslot = slot + 1; + + oslot = atomic_cas_uint(&sc->sc_used, slot, nslot); + if (slot == oslot) + break; + + slot = oslot; + } + + lltrace_cpu_init(nllt, sc, ci, llt->llt_tid); + lltrace_cpu_fini(llt, sc); + + slot %= sc->sc_nbuffers; + sc->sc_ring[slot] = llt; + + ci->ci_schedstate.spc_lltrace = nllt; + + return (nllt); +} + +static unsigned int +lltrace_insert(struct lltrace_cpu *llt, uint64_t record, + const uint64_t *extra, unsigned int n) +{ + unsigned int slot, oslot, nslot; + uint64_t *slots; + + n++; + record |= lltrace_ts() << LLTRACE_TIMESTAMP_SHIFT; + + slot = llt->llt_slot; + for (;;) { + nslot = slot + n; + if (nslot > nitems(llt->llt_buffer.llt_slots)) { + unsigned long s; + + s = intr_disable(); + llt = lltrace_next(llt); + intr_restore(s); + + if (llt == NULL) + return (1); + + slot = llt->llt_slot; + continue; + } + + oslot = lltrace_cas(&llt->llt_slot, slot, nslot); + if (slot == oslot) + break; + + slot = oslot; + } + + slots = llt->llt_buffer.llt_slots + slot; + *slots = record; + while (n > 1) { + *(++slots) = *(extra++); + n--; + } + + return (0); +} + +void +lltrace_statclock(struct lltrace_cpu *llt, int usermode, unsigned long pc) +{ + uint64_t event = usermode ? LLTRACE_EVENT_PC_U : LLTRACE_EVENT_PC_K; + uint64_t extra[1] = { pc }; + + lltrace_insert(llt, (event | nitems(extra)) << LLTRACE_EVENT_SHIFT, + extra, nitems(extra)); +} + +void +lltrace_syscall(struct lltrace_cpu *llt, register_t code, + size_t argsize, const register_t *args) +{ + uint64_t record = LLTRACE_EVENT_SYSCALL(code) << LLTRACE_EVENT_SHIFT; + + if (argsize > 0) { + uint64_t arg0 = args[0] & LLTRACE_ARG0_MASK; + record |= arg0 << LLTRACE_ARG0_SHIFT; + } + + lltrace_insert(llt, record, NULL, 0); +} + +void +lltrace_sysret(struct lltrace_cpu *llt, register_t code, + int error, const register_t retvals[2]) +{ + uint64_t record = LLTRACE_EVENT_SYSRET(code) << LLTRACE_EVENT_SHIFT; + uint64_t arg0 = error & LLTRACE_ARG0_MASK; + record |= arg0 << LLTRACE_ARG0_SHIFT; + unsigned int stop; + + stop = lltrace_insert(llt, record, NULL, 0); + + if (stop) { + struct lltrace_softc *sc = lltrace_sc; + + rw_enter_write(&sc->sc_lock); + if (sc->sc_running) + lltrace_stop(sc, curproc); + + knote_locked(&sc->sc_sel.si_note, 0); + if (sc->sc_reading) + wakeup(&sc->sc_reading); + rw_exit_write(&sc->sc_lock); + } +} + +void +lltrace_pidname(struct lltrace_cpu *llt, struct proc *p) +{ + uint64_t record; + uint64_t extra[3]; + unsigned int l, n; + pid_t tid = p->p_tid; + + if (ISSET(p->p_p->ps_flags, PS_SYSTEM)) + tid |= LLTRACE_EVENT_PID_ARG_KTHREAD; + + CTASSERT(sizeof(extra) == sizeof(p->p_p->ps_comm)); + + extra[0] = extra[1] = extra[2] = 0; /* memset */ + l = strlcpy((char *)extra, p->p_p->ps_comm, sizeof(extra)); + + /* turn the string length into the number of slots we need */ + n = howmany(l, sizeof(uint64_t)); + + record = (LLTRACE_EVENT_PID | n) << LLTRACE_EVENT_SHIFT; + record |= (tid & LLTRACE_ARG32_MASK) << LLTRACE_ARG32_SHIFT; + + llt->llt_tid = p->p_tid; + + lltrace_insert(llt, record, extra, n); +} + +void +lltrace_sched_enter(struct lltrace_cpu *llt) +{ + uint64_t record = LLTRACE_EVENT_SCHED << LLTRACE_EVENT_SHIFT; + + lltrace_insert(llt, record, NULL, 0); +} + +void +lltrace_sched_leave(struct lltrace_cpu *llt) +{ + uint64_t record = LLTRACE_EVENT_SCHEDRET << LLTRACE_EVENT_SHIFT; + + lltrace_insert(llt, record, NULL, 0); +} + +void +lltrace_idle(struct lltrace_cpu *llt) +{ + uint64_t record = LLTRACE_EVENT_IDLE << LLTRACE_EVENT_SHIFT; + + lltrace_insert(llt, record, NULL, 0); +} + +static void +lltrace_arg32(struct lltrace_cpu *llt, uint64_t event, unsigned int arg32) +{ + uint64_t record; + + record = event << LLTRACE_EVENT_SHIFT; + record |= (arg32 & LLTRACE_ARG32_MASK) << LLTRACE_ARG32_SHIFT; + + lltrace_insert(llt, record, NULL, 0); +} + +void +lltrace_runnable(struct lltrace_cpu *llt, struct proc *p) +{ + lltrace_arg32(llt, LLTRACE_EVENT_RUNNABLE, p->p_tid); +} + +void +lltrace_trap(struct lltrace_cpu *llt, unsigned int trap) +{ + lltrace_arg32(llt, LLTRACE_EVENT_TRAP, trap); +} + +void +lltrace_trapret(struct lltrace_cpu *llt, unsigned int trap) +{ + lltrace_arg32(llt, LLTRACE_EVENT_TRAPRET, trap); +} + +void +lltrace_ipi(struct lltrace_cpu *llt, unsigned int cpu) +{ + lltrace_arg32(llt, LLTRACE_EVENT_IPI, cpu); +} + +void +lltrace_irq(struct lltrace_cpu *llt, unsigned int type, unsigned int vec) +{ + lltrace_arg32(llt, LLTRACE_EVENT_IRQ(type), vec); +} + +void +lltrace_irqret(struct lltrace_cpu *llt, unsigned int type, unsigned int vec) +{ + lltrace_arg32(llt, LLTRACE_EVENT_IRQRET(type), vec); +} + +void +lltrace_lock(struct lltrace_cpu *llt, void *lock, unsigned int op) +{ + lltrace_arg32(llt, LLTRACE_EVENT_LOCK(op), (uint32_t)(intptr_t)lock); +} + +void +lltrace_klock(struct lltrace_cpu *llt, void *lock, unsigned int op) +{ +#if 0 + lltrace_arg32(llt, LLTRACE_EVENT_LOCK(op), (uint32_t)(intptr_t)lock); +#endif +} + +void +lltrace_pkts(struct lltrace_cpu *llt, unsigned int t, unsigned int v) +{ + t &= LLTRACE_PKTS_T_MASK; + + v <<= LLTRACE_PKTS_V_SHIFT; + v &= LLTRACE_PKTS_V_MASK; + + lltrace_arg32(llt, LLTRACE_EVENT_PKTS, t | v); +} + +void +lltrace_mark(struct lltrace_cpu *llt) +{ + uint64_t record = LLTRACE_EVENT_MARK << LLTRACE_EVENT_SHIFT; + + lltrace_insert(llt, record, NULL, 0); +} + +void +lltrace_fn_enter(struct lltrace_cpu *llt, void *fn) +{ + lltrace_arg32(llt, LLTRACE_EVENT_KFUNC_ENTER, (uintptr_t)fn); +} + +void +lltrace_fn_leave(struct lltrace_cpu *llt, void *fn) +{ + lltrace_arg32(llt, LLTRACE_EVENT_KFUNC_LEAVE, (uintptr_t)fn); +} + +void +__cyg_profile_func_enter(void *fn, void *pc) +{ + struct lltrace_cpu *llt; + uint64_t record; + + llt = lltrace_enter(); + if (llt == NULL) + return; + + record = LLTRACE_EVENT_KFUNC_ENTER << LLTRACE_EVENT_SHIFT; + record |= ((uintptr_t)fn & LLTRACE_ARG32_MASK) << LLTRACE_ARG32_SHIFT; + + lltrace_insert(llt, record, NULL, 0); +} + +void +__cyg_profile_func_exit(void *fn, void *pc) +{ + struct lltrace_cpu *llt; + uint64_t record; + + llt = lltrace_enter(); + if (llt == NULL) + return; + + record = LLTRACE_EVENT_KFUNC_LEAVE << LLTRACE_EVENT_SHIFT; + record |= ((uintptr_t)fn & LLTRACE_ARG32_MASK) << LLTRACE_ARG32_SHIFT; + + lltrace_insert(llt, record, NULL, 0); +} Index: kern/kern_exec.c =================================================================== RCS file: /cvs/src/sys/kern/kern_exec.c,v diff -u -p -r1.255 kern_exec.c --- kern/kern_exec.c 2 Apr 2024 08:39:16 -0000 1.255 +++ kern/kern_exec.c 18 Jun 2024 02:47:16 -0000 @@ -49,6 +49,7 @@ #include #include #include +#include #include #include #include @@ -542,6 +543,8 @@ sys_execve(struct proc *p, void *v, regi memset(pr->ps_comm, 0, sizeof(pr->ps_comm)); strlcpy(pr->ps_comm, nid.ni_cnd.cn_nameptr, sizeof(pr->ps_comm)); pr->ps_acflag &= ~AFORK; + + LLTRACE(lltrace_pidname, p); /* record proc's vnode, for use by sysctl */ otvp = pr->ps_textvp; Index: kern/kern_lock.c =================================================================== RCS file: /cvs/src/sys/kern/kern_lock.c,v diff -u -p -r1.73 kern_lock.c --- kern/kern_lock.c 26 Mar 2024 18:18:30 -0000 1.73 +++ kern/kern_lock.c 18 Jun 2024 02:47:16 -0000 @@ -24,6 +24,8 @@ #include #include #include +#include +#include #include @@ -139,6 +141,8 @@ __mp_lock(struct __mp_lock *mpl) LOP_EXCLUSIVE | LOP_NEWORDER, NULL); #endif + LLTRACE(lltrace_klock, mpl, LLTRACE_LOCK_NOACQUIRE); + s = intr_disable(); if (cpu->mplc_depth++ == 0) cpu->mplc_ticket = atomic_inc_int_nv(&mpl->mpl_users); @@ -148,6 +152,8 @@ __mp_lock(struct __mp_lock *mpl) membar_enter_after_atomic(); WITNESS_LOCK(&mpl->mpl_lock_obj, LOP_EXCLUSIVE); + + LLTRACE(lltrace_klock, mpl, LLTRACE_LOCK_ACQUIRE); } void @@ -167,6 +173,7 @@ __mp_unlock(struct __mp_lock *mpl) s = intr_disable(); if (--cpu->mplc_depth == 0) { + LLTRACE(lltrace_klock, mpl, LLTRACE_LOCK_WAKEUP); membar_exit(); mpl->mpl_ticket++; } @@ -183,6 +190,8 @@ __mp_release_all(struct __mp_lock *mpl) int i; #endif + LLTRACE(lltrace_klock, mpl, LLTRACE_LOCK_WAKEUP); + s = intr_disable(); rv = cpu->mplc_depth; #ifdef WITNESS @@ -445,3 +454,139 @@ _mtx_init_flags(struct mutex *m, int ipl _mtx_init(m, ipl); } #endif /* WITNESS */ + +void +pc_lock_init(struct pc_lock *pcl) +{ + pcl->pcl_gen = 0; +} + +unsigned int +pc_mprod_enter(struct pc_lock *pcl) +{ + unsigned int gen, ngen, ogen; + + gen = pcl->pcl_gen; + for (;;) { + while (gen & 1) { + CPU_BUSY_CYCLE(); + gen = pcl->pcl_gen; + } + + ngen = 1 + gen; + ogen = atomic_cas_uint(&pcl->pcl_gen, gen, ngen); + if (gen == ogen) + break; + + CPU_BUSY_CYCLE(); + gen = ogen; + } + + membar_enter_after_atomic(); + return (ngen); +} + +void +pc_mprod_leave(struct pc_lock *pcl, unsigned int gen) +{ + membar_exit(); + pcl->pcl_gen = ++gen; +} + +unsigned int +pc_sprod_enter(struct pc_lock *pcl) +{ + unsigned int gen; + + gen = pcl->pcl_gen; + pcl->pcl_gen = ++gen; + membar_producer(); + + return (gen); +} + +void +pc_sprod_leave(struct pc_lock *pcl, unsigned int gen) +{ + membar_producer(); + pcl->pcl_gen = ++gen; +} + +void +pc_cons_enter(struct pc_lock *pcl, unsigned int *genp) +{ + unsigned int gen; + + gen = pcl->pcl_gen; + while (gen & 1) { + CPU_BUSY_CYCLE(); + gen = pcl->pcl_gen; + } + + membar_consumer(); + *genp = gen; +} + +int +pc_cons_leave(struct pc_lock *pcl, unsigned int *genp) +{ + unsigned int gen; + + membar_consumer(); + + gen = pcl->pcl_gen; + if (gen & 1) { + do { + CPU_BUSY_CYCLE(); + gen = pcl->pcl_gen; + } while (gen & 1); + } else if (gen == *genp) + return (0); + + *genp = gen; + return (EBUSY); +} + +#if 0 +void +NET_LOCK(void) +{ + LLTRACE(lltrace_lock, &netlock, LLTRACE_LOCK_NOACQUIRE); + rw_enter_write(&netlock); + LLTRACE(lltrace_lock, &netlock, LLTRACE_LOCK_ACQUIRE); +} + +void +NET_UNLOCK(void) +{ + rw_exit_write(&netlock); + LLTRACE(lltrace_lock, &netlock, LLTRACE_LOCK_WAKEUP); +} + +void +NET_RLOCK_IN_SOFTNET(void) +{ + LLTRACE(lltrace_lock, &netlock, LLTRACE_RLOCK_NOACQUIRE); + rw_enter_read(&netlock); + LLTRACE(lltrace_lock, &netlock, LLTRACE_RLOCK_ACQUIRE); +} + +void +NET_RUNLOCK_IN_SOFTNET(void) +{ + rw_exit_read(&netlock); + LLTRACE(lltrace_lock, &netlock, LLTRACE_RLOCK_WAKEUP); +} + +void +NET_RLOCK_IN_IOCTL(void) +{ + rw_enter_read(&netlock); +} + +void +NET_RUNLOCK_IN_IOCTL(void) +{ + rw_exit_read(&netlock); +} +#endif Index: kern/kern_sched.c =================================================================== RCS file: /cvs/src/sys/kern/kern_sched.c,v diff -u -p -r1.95 kern_sched.c --- kern/kern_sched.c 28 Feb 2024 13:43:44 -0000 1.95 +++ kern/kern_sched.c 18 Jun 2024 02:47:16 -0000 @@ -192,6 +194,8 @@ sched_idle(void *v) wakeup(spc); } #endif + + LLTRACE(lltrace_idle); cpu_idle_cycle(); } cpu_idle_leave(); @@ -598,6 +602,7 @@ sched_proc_to_cpu_cost(struct cpu_info * if (cpuset_isset(&sched_queued_cpus, ci)) cost += spc->spc_nrun * sched_cost_runnable; +#if 0 /* * Try to avoid the primary cpu as it handles hardware interrupts. * @@ -606,6 +611,7 @@ sched_proc_to_cpu_cost(struct cpu_info * */ if (CPU_IS_PRIMARY(ci)) cost += sched_cost_runnable; +#endif /* * If the proc is on this cpu already, lower the cost by how much Index: kern/kern_sensors.c =================================================================== RCS file: /cvs/src/sys/kern/kern_sensors.c,v diff -u -p -r1.40 kern_sensors.c --- kern/kern_sensors.c 5 Dec 2022 23:18:37 -0000 1.40 +++ kern/kern_sensors.c 18 Jun 2024 02:47:16 -0000 @@ -27,6 +27,7 @@ #include #include #include +#include #include #include "hotplug.h" @@ -260,8 +261,11 @@ sensor_task_work(void *xst) atomic_inc_int(&sensors_running); rw_enter_write(&st->lock); period = st->period; - if (period > 0 && !sensors_quiesced) + if (period > 0 && !sensors_quiesced) { + LLTRACE(lltrace_fn_enter, st->func); st->func(st->arg); + LLTRACE(lltrace_fn_leave, st->func); + } rw_exit_write(&st->lock); if (atomic_dec_int_nv(&sensors_running) == 0) { if (sensors_quiesced) Index: kern/kern_synch.c =================================================================== RCS file: /cvs/src/sys/kern/kern_synch.c,v diff -u -p -r1.202 kern_synch.c --- kern/kern_synch.c 18 Apr 2024 08:59:38 -0000 1.202 +++ kern/kern_synch.c 18 Jun 2024 02:47:16 -0000 @@ -37,6 +37,8 @@ * @(#)kern_synch.c 8.6 (Berkeley) 1/21/94 */ +#include "llt.h" + #include #include #include @@ -287,7 +289,7 @@ rwsleep(const volatile void *ident, stru { int error, status; - KASSERT((priority & ~(PRIMASK | PCATCH | PNORELOCK)) == 0); + KASSERT((priority & ~(PRIMASK | PCATCH | PNORELOCK | PLLTRACE)) == 0); KASSERT(ident != &nowake || ISSET(priority, PCATCH) || timo != 0); KASSERT(ident != rwl); rw_assert_anylock(rwl); @@ -296,11 +298,26 @@ rwsleep(const volatile void *ident, stru sleep_setup(ident, priority, wmesg); rw_exit(rwl); + +#if NLLT > 0 + if (priority & PLLTRACE) + LLTRACE(lltrace_lock, rwl, LLTRACE_LOCK_WAKEUP); +#endif + /* signal may stop the process, release rwlock before that */ error = sleep_finish(timo, 1); - if ((priority & PNORELOCK) == 0) + if ((priority & PNORELOCK) == 0) { +#if NLLT > 0 + if (priority & PLLTRACE) + LLTRACE(lltrace_lock, rwl, LLTRACE_LOCK_NOACQUIRE); +#endif rw_enter(rwl, status); +#if NLLT > 0 + if (priority & PLLTRACE) + LLTRACE(lltrace_lock, rwl, LLTRACE_LOCK_ACQUIRE); +#endif + } return error; } @@ -523,6 +540,7 @@ unsleep(struct proc *p) p->p_wchan = NULL; TRACEPOINT(sched, unsleep, p->p_tid + THREAD_PID_OFFSET, p->p_p->ps_pid); + LLTRACE(lltrace_runnable, p); } } Index: kern/kern_timeout.c =================================================================== RCS file: /cvs/src/sys/kern/kern_timeout.c,v diff -u -p -r1.97 kern_timeout.c --- kern/kern_timeout.c 23 Feb 2024 16:51:39 -0000 1.97 +++ kern/kern_timeout.c 18 Jun 2024 02:47:16 -0000 @@ -35,6 +35,7 @@ #include /* _Q_INVALIDATE */ #include #include +#include #ifdef DDB #include @@ -739,6 +740,8 @@ softclock(void *arg) int need_proc_mp; #endif + LLTRACE(lltrace_irq, LLTRACE_IRQ_BOTTOM_HALF, 0); + first_new = NULL; new = 0; @@ -773,6 +776,8 @@ softclock(void *arg) if (need_proc_mp) wakeup(&timeout_proc_mp); #endif + + LLTRACE(lltrace_irqret, LLTRACE_IRQ_BOTTOM_HALF, 0); } void Index: kern/sched_bsd.c =================================================================== RCS file: /cvs/src/sys/kern/sched_bsd.c,v diff -u -p -r1.91 sched_bsd.c --- kern/sched_bsd.c 30 Mar 2024 13:33:20 -0000 1.91 +++ kern/sched_bsd.c 18 Jun 2024 02:47:16 -0000 @@ -356,6 +356,8 @@ mi_switch(void) int sched_count; #endif + LLTRACE(lltrace_sched_enter); + assertwaitok(); KASSERT(p->p_stat != SONPROC); @@ -413,10 +415,13 @@ mi_switch(void) if (p != nextproc) { uvmexp.swtch++; + TRACEPOINT(sched, off__cpu, nextproc->p_tid + THREAD_PID_OFFSET, nextproc->p_p->ps_pid); cpu_switchto(p, nextproc); TRACEPOINT(sched, on__cpu, NULL); + + LLTRACE(lltrace_pidname, p); } else { TRACEPOINT(sched, remain__cpu, NULL); p->p_stat = SONPROC; @@ -437,6 +442,8 @@ mi_switch(void) #endif SCHED_ASSERT_UNLOCKED(); + + LLTRACE(lltrace_sched_leave); smr_idle(); Index: net/ifq.c =================================================================== RCS file: /cvs/src/sys/net/ifq.c,v diff -u -p -r1.53 ifq.c --- net/ifq.c 10 Nov 2023 15:51:24 -0000 1.53 +++ net/ifq.c 18 Jun 2024 02:47:16 -0000 @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -127,7 +128,10 @@ ifq_is_serialized(struct ifqueue *ifq) void ifq_start(struct ifqueue *ifq) { - if (ifq_len(ifq) >= min(ifq->ifq_if->if_txmit, ifq->ifq_maxlen)) { + struct ifnet *ifp = ifq->ifq_if; + + if (ISSET(ifp->if_xflags, IFXF_MPSAFE) && + ifq_len(ifq) >= min(ifp->if_txmit, ifq->ifq_maxlen)) { task_del(ifq->ifq_softnet, &ifq->ifq_bundle); ifq_run_start(ifq); } else @@ -144,6 +148,7 @@ ifq_start_task(void *p) ifq_empty(ifq) || ifq_is_oactive(ifq)) return; + LLTRACE(lltrace_pkts, LLTRACE_PKTS_T_IFQ, ifq_len(ifq)); ifp->if_qstart(ifq); } @@ -168,6 +173,7 @@ ifq_restart_task(void *p) struct ifnet *ifp = ifq->ifq_if; ifq_clr_oactive(ifq); + LLTRACE(lltrace_pkts, LLTRACE_PKTS_T_IFQ, ifq_len(ifq)); ifp->if_qstart(ifq); } @@ -260,7 +266,8 @@ void ifq_init(struct ifqueue *ifq, struct ifnet *ifp, unsigned int idx) { ifq->ifq_if = ifp; - ifq->ifq_softnet = net_tq(idx); + ifq->ifq_softnet = ISSET(ifp->if_xflags, IFXF_MPSAFE) ? + net_tq(idx) : systq; ifq->ifq_softc = NULL; mtx_init(&ifq->ifq_mtx, IPL_NET); @@ -601,8 +608,11 @@ ifq_mfreeml(struct ifqueue *ifq, struct #if NKSTAT > 0 struct ifiq_kstat_data { + struct kstat_kv kd_histogram[IFIQ_HISTOGRAM_BUCKETS]; + struct kstat_kv kd_packets; struct kstat_kv kd_bytes; + struct kstat_kv kd_hdrops; struct kstat_kv kd_fdrops; struct kstat_kv kd_qdrops; struct kstat_kv kd_errors; @@ -613,10 +623,35 @@ struct ifiq_kstat_data { }; static const struct ifiq_kstat_data ifiq_kstat_tpl = { + { + KSTAT_KV_UNIT_INITIALIZER("histo1", + KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS), + KSTAT_KV_UNIT_INITIALIZER("histo2", + KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS), + KSTAT_KV_UNIT_INITIALIZER("histo4", + KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS), + KSTAT_KV_UNIT_INITIALIZER("histo8", + KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS), + KSTAT_KV_UNIT_INITIALIZER("histo16", + KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS), + KSTAT_KV_UNIT_INITIALIZER("histo32", + KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS), + KSTAT_KV_UNIT_INITIALIZER("histo64", + KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS), + KSTAT_KV_UNIT_INITIALIZER("histo128", + KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS), + KSTAT_KV_UNIT_INITIALIZER("histo256", + KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS), + KSTAT_KV_UNIT_INITIALIZER("histo512", + KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS), + }, + KSTAT_KV_UNIT_INITIALIZER("packets", KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS), KSTAT_KV_UNIT_INITIALIZER("bytes", KSTAT_KV_T_COUNTER64, KSTAT_KV_U_BYTES), + KSTAT_KV_UNIT_INITIALIZER("hdrops", + KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS), KSTAT_KV_UNIT_INITIALIZER("fdrops", KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS), KSTAT_KV_UNIT_INITIALIZER("qdrops", @@ -637,10 +672,15 @@ ifiq_kstat_copy(struct kstat *ks, void * { struct ifiqueue *ifiq = ks->ks_softc; struct ifiq_kstat_data *kd = dst; + size_t i; *kd = ifiq_kstat_tpl; + for (i = 0; i < IFIQ_HISTOGRAM_BUCKETS; i++) + kstat_kv_u64(&kd->kd_histogram[i]) = ifiq->ifiq_histogram[i]; + kstat_kv_u64(&kd->kd_packets) = ifiq->ifiq_packets; kstat_kv_u64(&kd->kd_bytes) = ifiq->ifiq_bytes; + kstat_kv_u64(&kd->kd_hdrops) = ifiq->ifiq_hdrops; kstat_kv_u64(&kd->kd_fdrops) = ifiq->ifiq_fdrops; kstat_kv_u64(&kd->kd_qdrops) = ifiq->ifiq_qdrops; kstat_kv_u64(&kd->kd_errors) = ifiq->ifiq_errors; @@ -659,7 +699,8 @@ void ifiq_init(struct ifiqueue *ifiq, struct ifnet *ifp, unsigned int idx) { ifiq->ifiq_if = ifp; - ifiq->ifiq_softnet = net_tq(idx); + ifiq->ifiq_softnet = ISSET(ifp->if_xflags, IFXF_MPSAFE) ? + net_tq(idx) : systq; ifiq->ifiq_softc = NULL; mtx_init(&ifiq->ifiq_mtx, IPL_NET); @@ -703,8 +744,8 @@ ifiq_destroy(struct ifiqueue *ifiq) ml_purge(&ifiq->ifiq_ml); } -unsigned int ifiq_maxlen_drop = 2048 * 5; -unsigned int ifiq_maxlen_return = 2048 * 3; +unsigned int ifiq_pressure_drop = 8; +unsigned int ifiq_pressure_return = 6; int ifiq_input(struct ifiqueue *ifiq, struct mbuf_list *ml) @@ -713,8 +754,10 @@ ifiq_input(struct ifiqueue *ifiq, struct struct mbuf *m; uint64_t packets; uint64_t bytes = 0; - uint64_t fdrops = 0; - unsigned int len; + unsigned int pressure = 0; + unsigned int fdrops = 0; + unsigned int hdrops = 0; + unsigned int bucket; #if NBPFILTER > 0 caddr_t if_bpf; #endif @@ -726,8 +769,17 @@ ifiq_input(struct ifiqueue *ifiq, struct m->m_pkthdr.ph_ifidx = ifp->if_index; m->m_pkthdr.ph_rtableid = ifp->if_rdomain; bytes += m->m_pkthdr.len; + hdrops += m->m_pkthdr.ph_drops; } packets = ml_len(ml); + bucket = fls(packets); + if (bucket > IFIQ_HISTOGRAM_BUCKETS) + bucket = IFIQ_HISTOGRAM_BUCKETS; + bucket--; + + LLTRACE(lltrace_pkts, LLTRACE_PKTS_T_IFIQ, packets); + if (hdrops) + LLTRACE(lltrace_pkts, LLTRACE_PKTS_T_HDROP, hdrops); #if NBPFILTER > 0 if_bpf = ifp->if_bpf; @@ -738,16 +790,18 @@ ifiq_input(struct ifiqueue *ifiq, struct while ((m = ml_dequeue(&ml0)) != NULL) { if ((*ifp->if_bpf_mtap)(if_bpf, m, BPF_DIRECTION_IN)) { - m_freem(m); fdrops++; + m_freem(m); } else ml_enqueue(ml, m); } if (ml_empty(ml)) { mtx_enter(&ifiq->ifiq_mtx); + ifiq->ifiq_histogram[bucket]++; ifiq->ifiq_packets += packets; ifiq->ifiq_bytes += bytes; + ifiq->ifiq_hdrops += hdrops; ifiq->ifiq_fdrops += fdrops; mtx_leave(&ifiq->ifiq_mtx); @@ -757,13 +811,15 @@ ifiq_input(struct ifiqueue *ifiq, struct #endif mtx_enter(&ifiq->ifiq_mtx); + ifiq->ifiq_histogram[bucket]++; ifiq->ifiq_packets += packets; ifiq->ifiq_bytes += bytes; + ifiq->ifiq_hdrops += hdrops; ifiq->ifiq_fdrops += fdrops; - len = ml_len(&ifiq->ifiq_ml); if (__predict_true(!ISSET(ifp->if_xflags, IFXF_MONITOR))) { - if (len > ifiq_maxlen_drop) + pressure = ++ifiq->ifiq_pressure; + if (pressure > ifiq_pressure_drop) ifiq->ifiq_qdrops += ml_len(ml); else { ifiq->ifiq_enqueues++; @@ -774,10 +830,12 @@ ifiq_input(struct ifiqueue *ifiq, struct if (ml_empty(ml)) task_add(ifiq->ifiq_softnet, &ifiq->ifiq_task); - else + else { + LLTRACE(lltrace_pkts, LLTRACE_PKTS_T_QDROP, ml_len(ml)); ml_purge(ml); + } - return (len > ifiq_maxlen_return); + return (pressure > ifiq_pressure_return); } void @@ -787,6 +845,7 @@ ifiq_add_data(struct ifiqueue *ifiq, str data->ifi_ipackets += ifiq->ifiq_packets; data->ifi_ibytes += ifiq->ifiq_bytes; data->ifi_iqdrops += ifiq->ifiq_qdrops; + data->ifi_collisions += ifiq->ifiq_hdrops; mtx_leave(&ifiq->ifiq_mtx); } @@ -839,11 +898,14 @@ ifiq_process(void *arg) return; mtx_enter(&ifiq->ifiq_mtx); + ifiq->ifiq_pressure = 0; ifiq->ifiq_dequeues++; ml = ifiq->ifiq_ml; ml_init(&ifiq->ifiq_ml); mtx_leave(&ifiq->ifiq_mtx); + LLTRACE(lltrace_pkts, LLTRACE_PKTS_T_NETTQ, ml_len(&ml)); + if_input_process(ifiq->ifiq_if, &ml); } @@ -852,8 +914,6 @@ net_ifiq_sysctl(int *name, u_int namelen void *newp, size_t newlen) { int error = EOPNOTSUPP; -/* pressure is disabled for 6.6-release */ -#if 0 int val; if (namelen != 1) @@ -882,7 +942,6 @@ net_ifiq_sysctl(int *name, u_int namelen error = EOPNOTSUPP; break; } -#endif return (error); } Index: net/ifq.h =================================================================== RCS file: /cvs/src/sys/net/ifq.h,v diff -u -p -r1.41 ifq.h --- net/ifq.h 10 Nov 2023 15:51:24 -0000 1.41 +++ net/ifq.h 18 Jun 2024 02:47:16 -0000 @@ -73,6 +73,8 @@ struct ifqueue { unsigned int ifq_idx; }; +#define IFIQ_HISTOGRAM_BUCKETS 10 + struct ifiqueue { struct ifnet *ifiq_if; struct taskq *ifiq_softnet; @@ -89,8 +91,10 @@ struct ifiqueue { unsigned int ifiq_pressure; /* counters */ + uint64_t ifiq_histogram[IFIQ_HISTOGRAM_BUCKETS]; uint64_t ifiq_packets; uint64_t ifiq_bytes; + uint64_t ifiq_hdrops; uint64_t ifiq_fdrops; uint64_t ifiq_qdrops; uint64_t ifiq_errors; Index: sys/conf.h =================================================================== RCS file: /cvs/src/sys/sys/conf.h,v diff -u -p -r1.161 conf.h --- sys/conf.h 14 Jan 2023 12:11:11 -0000 1.161 +++ sys/conf.h 18 Jun 2024 02:47:16 -0000 @@ -326,6 +326,21 @@ extern struct cdevsw cdevsw[]; (dev_type_stop((*))) enodev, 0, \ (dev_type_mmap((*))) enodev } +/* open, close, read, ioctl, poll, kqfilter */ +#define cdev_lltrace_init(c,n) { \ + .d_open = dev_init(c,n,open), \ + .d_close = dev_init(c,n,close), \ + .d_read = dev_init(c,n,read), \ + .d_write = (dev_type_write((*))) enodev, \ + .d_ioctl = dev_init(c,n,ioctl), \ + .d_stop = (dev_type_stop((*))) enodev, \ + .d_tty = NULL, \ + .d_mmap = (dev_type_mmap((*))) enodev, \ + .d_type = 0, \ + .d_flags = 0, \ + .d_kqfilter = dev_init(c,n,kqfilter), \ +} + /* open, close, read, write, ioctl, stop, tty, mmap, kqfilter */ #define cdev_wsdisplay_init(c,n) { \ dev_init(c,n,open), dev_init(c,n,close), dev_init(c,n,read), \ @@ -617,6 +632,7 @@ cdev_decl(wsmux); cdev_decl(ksyms); cdev_decl(kstat); +cdev_decl(lltrace); cdev_decl(bio); cdev_decl(vscsi); Index: sys/lltrace.h =================================================================== RCS file: sys/lltrace.h diff -N sys/lltrace.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/lltrace.h 18 Jun 2024 02:47:16 -0000 @@ -0,0 +1,325 @@ +/* $OpenBSD$ */ + +/* + * Copyright (c) 2022 David Gwynne + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef _SYS_LLTRACE_H_ +#define _SYS_LLTRACE_H_ + +/* + * lltrace is heavily based KUTrace (kernel/userland tracing) by + * Richard L. Sites. + */ + +#define LLTRACE_NSLOTS 8192 + +struct lltrace_buffer { + uint64_t llt_slots[LLTRACE_NSLOTS]; +}; + +#define LLTIOCSTART _IO('t',128) +#define LLTIOCSTOP _IO('t',129) +#define LLTIOCFLUSH _IO('t',130) + +/* + * trace until all the buffers are used, or trace and reuse buffers. + */ +#define LLTRACE_MODE_HEAD 0 +#define LLTRACE_MODE_TAIL 1 +#define LLTRACE_MODE_COUNT 2 + +#define LLTIOCSMODE _IOW('t', 131, unsigned int) +#define LLTIOCGMODE _IOR('t', 131, unsigned int) + +/* + * how much memory in MB to allocate for lltrace_buffer structs + * during tracing. + */ + +#define LLTRACE_BLEN_MIN 1 +#define LLTRACE_BLEN_MAX 128 + +#define LLTIOCSBLEN _IOW('t', 132, unsigned int) +#define LLTIOCGBLEN _IOR('t', 132, unsigned int) + +/* + * lltrace collects kernel events in per-CPU buffers. + */ + +/* + * The first 8 words of the per-CPU buffer are dedicated to metadata + * about the CPU and the period of time over which events were + * collected. + */ + +struct lltrace_header { + /* slots[0] */ + uint64_t h_cpu; + + /* slots[1] */ + uint64_t h_boottime; + + /* slots[2] */ + uint64_t h_start_cy; + /* slots[3] */ + uint64_t h_start_ns; + /* slots[4] */ + uint64_t h_end_cy; + /* slots[5] */ + uint64_t h_end_ns; + + /* slots[6] */ + uint32_t h_idletid; + uint32_t h_tid; + /* slots[7] */ + uint64_t h_zero; +}; + +/* + * The high 32-bits of the trace entry contain a timestamp and event id. + */ + +#define LLTRACE_TIMESTAMP_SHIFT 44 +#define LLTRACE_TIMESTAMP_BITS 20 +#define LLTRACE_TIMESTAMP_MASK ((1ULL << LLTRACE_TIMESTAMP_BITS) - 1) + +#define LLTRACE_EVENT_SHIFT 32 +#define LLTRACE_EVENT_BITS 12 +#define LLTRACE_EVENT_MASK ((1ULL << LLTRACE_EVENT_BITS) - 1) + +/* + * The low 32-bits vary depending on the event id. + */ + +/* full 32 bits are used */ +#define LLTRACE_ARG32_SHIFT 0 +#define LLTRACE_ARG32_BITS 32 +#define LLTRACE_ARG32_MASK ((1ULL << LLTRACE_ARG32_BITS) - 1) + +/* layout for syscalls/traps/irqs */ +#define LLTRACE_ARG0_SHIFT 0 +#define LLTRACE_ARG0_BITS 16 +#define LLTRACE_ARG0_MASK ((1ULL << LLTRACE_ARG0_BITS) - 1) + +#define LLTRACE_RETVAL_SHIFT 16 +#define LLTRACE_RETVAL_BITS 8 +#define LLTRACE_RETVAL_MASK ((1ULL << LLTRACE_RETVAL_BITS) - 1) + +#define LLTRACE_DUR_SHIFT 24 +#define LLTRACE_DUR_BITS 8 +#define LLTRACE_DUR_MASK ((1ULL << LLTRACE_DUR_BITS) - 1) + +/* + * lltrace event types + */ + +/* + * the high 3 bits of the event id defines how the rest of the bits are used. + */ + +#define LLTRACE_EVENT_T_MASK (0x7ULL << 9) +#define LLTRACE_EVENT_T_VARLEN (0x0ULL << 9) +#define LLTRACE_EVENT_T_MARK (0x1ULL << 9) +#define LLTRACE_EVENT_T_IRQ (0x2ULL << 9) +#define LLTRACE_EVENT_T_SYSCALL (0x4ULL << 9) +#define LLTRACE_EVENT_T_SYSRET (0x5ULL << 9) + +/* + * variable len events use extra slots on the ring. + */ + +#define LLTRACE_EVENT_VARLEN_MASK (0x00fULL) /* low 4bits are the len */ + +#define LLTRACE_EVENT_PID (LLTRACE_EVENT_T_VARLEN | 0x10) +#define LLTRACE_EVENT_PID_ARG_KTHREAD (1U << 31) +#define LLTRACE_EVENT_LOCKNAME (LLTRACE_EVENT_T_VARLEN | 0x70) + +/* hardcode the space used by PC entries */ +#define LLTRACE_EVENT_PC_K (LLTRACE_EVENT_T_VARLEN | 0x80) +#define LLTRACE_EVENT_PC_U (LLTRACE_EVENT_T_VARLEN | 0x90) + +/* + * mark a particular event occuring + */ + +#define LLTRACE_EVENT_IDLE (LLTRACE_EVENT_T_MARK | 0x0) + +#define LLTRACE_EVENT_RUNNABLE (LLTRACE_EVENT_T_MARK | 0x1) + /* arg32 is tid */ + +#define LLTRACE_EVENT_TRAP (LLTRACE_EVENT_T_MARK | 0x2) +#define LLTRACE_EVENT_TRAPRET (LLTRACE_EVENT_T_MARK | 0x3) + /* arg32 is trap id */ +#define LLTRACE_TRAP_PAGEFAULT 14 /* as per kutrace */ + +#define LLTRACE_EVENT_SCHED (LLTRACE_EVENT_T_MARK | 0x4) +#define LLTRACE_EVENT_SCHEDRET (LLTRACE_EVENT_T_MARK | 0x5) + +#define LLTRACE_EVENT_IPI (LLTRACE_EVENT_T_MARK | 0x6) + /* arg32 is cpu */ + +#define LLTRACE_EVENT_PKTS (LLTRACE_EVENT_T_MARK | 0x7) +#define LLTRACE_PKTS_T_SHIFT 28 +#define LLTRACE_PKTS_T_MASK (0xf << LLTRACE_PKTS_T_SHIFT) +#define LLTRACE_PKTS_T_IFIQ (0x0 << LLTRACE_PKTS_T_SHIFT) +#define LLTRACE_PKTS_T_NETTQ (0x1 << LLTRACE_PKTS_T_SHIFT) +#define LLTRACE_PKTS_T_IFQ (0x2 << LLTRACE_PKTS_T_SHIFT) +#define LLTRACE_PKTS_T_QDROP (0x3 << LLTRACE_PKTS_T_SHIFT) +#define LLTRACE_PKTS_T_HDROP (0x4 << LLTRACE_PKTS_T_SHIFT) +#define LLTRACE_PKTS_V_SHIFT 0 +#define LLTRACE_PKTS_V_MASK (0xffff << LLTRACE_PKTS_V_SHIFT) + +#define LLTRACE_EVENT_LOCK(_t) (LLTRACE_EVENT_T_MARK | 0x10 | (_t)) +#define LLTRACE_LOCK_NOACQUIRE (0x00) +#define LLTRACE_LOCK_ACQUIRE (0x01) +#define LLTRACE_LOCK_WAKEUP (0x02) +#define LLTRACE_RLOCK_NOACQUIRE (0x03) +#define LLTRACE_RLOCK_ACQUIRE (0x04) +#define LLTRACE_RLOCK_WAKEUP (0x05) +#define LLTRACE_KLOCK_NOACQUIRE (0x06) +#define LLTRACE_KLOCK_ACQUIRE (0x07) +#define LLTRACE_KLOCK_WAKEUP (0x08) + +#define LLTRACE_EVENT_KFUNC_ENTER (LLTRACE_EVENT_T_MARK | 0xf0) +#define LLTRACE_EVENT_KFUNC_LEAVE (LLTRACE_EVENT_T_MARK | 0xf1) +#define LLTRACE_EVENT_MARK (LLTRACE_EVENT_T_MARK | 0xff) + +/* + * irqs + */ + +#define LLTRACE_EVENT_IRQ(_c) (LLTRACE_EVENT_T_IRQ | 0x000 | (_c)) +#define LLTRACE_EVENT_IRQRET(_c) (LLTRACE_EVENT_T_IRQ | 0x100 | (_c)) + +#define LLTRACE_IRQ_LOCAL_TIMER (0xecULL) /* like linux */ +#define LLTRACE_IRQ_IPI (0xfdULL) /* like linux */ + +#define LLTRACE_IRQ_BOTTOM_HALF (0xffULL) /* like kutrace */ + +/* + * syscalls and returns from syscalls + */ + +#define LLTRACE_SYSCALL_MASK(_c) ((uint64_t)(_c) & 0x1ff) + +#define LLTRACE_EVENT_SYSCALL(_c) \ + (LLTRACE_EVENT_T_SYSCALL | LLTRACE_SYSCALL_MASK(_c)) +#define LLTRACE_EVENT_SYSRET(_c) \ + (LLTRACE_EVENT_T_SYSRET | LLTRACE_SYSCALL_MASK(_c)) + +/* + * KUTrace event types for compatibility + */ + +#define KUTRACE_FILENAME (0x001ULL) +#define KUTRACE_PIDNAME (0x002ULL) +#define KUTRACE_METHODNAME (0x003ULL) +#define KUTRACE_TRAPNAME (0x004ULL) +#define KUTRACE_LOCKNAME (0x007ULL) + +#define KUTRACE_USERPID (0x200ULL) +#define KUTRACE_RUNNABLE (0x206ULL) +#define KUTRACE_IPI (0x207ULL) +#define KUTRACE_MWAIT (0x208ULL) +#define KUTRACE_PSTATE (0x209ULL) + +#define KUTRACE_MARKA (0x20aULL) +#define KUTRACE_MARKB (0x20bULL) +#define KUTRACE_MARKC (0x20cULL) +#define KUTRACE_MARKD (0x20dULL) + +#define KUTRACE_LOCKNOACQUIRE (0x210ULL) +#define KUTRACE_LOCKACQUIRE (0x211ULL) +#define KUTRACE_LOCKWAKEUP (0x212ULL) + +#define KUTRACE_PC_U (0x280ULL) +#define KUTRACE_PC_K (0x281ULL) + +/* these are in blocks of 256 */ +#define KUTRACE_TRAP (0x400ULL) +#define KUTRACE_IRQ (0x500ULL) +#define KUTRACE_TRAPRET (0x600ULL) +#define KUTRACE_IRQRET (0x700ULL) + +#define KUTRACE_LOCAL_TIMER_VECTOR (0xec) + +/* these are in blocks of 512 */ +#define KUTRACE_SYSCALL_MASK(_c) ((uint64_t)(_c) & 0x1ff) +#define KUTRACE_SYSCALL_SCHED 511 + +#define KUTRACE_SYSCALL(_c) (0x800ULL | KUTRACE_SYSCALL_MASK(_c)) +#define KUTRACE_SYSRET(_c) (0xa00ULL | KUTRACE_SYSCALL_MASK(_c)) + +/* Specific trap number for page fault */ +#define KUTRACE_PAGEFAULT 14 + +#ifdef _KERNEL + +struct lltrace_cpu; + +static inline struct lltrace_cpu * +lltrace_enter_spc(struct schedstate_percpu *spc) +{ + return (READ_ONCE(spc->spc_lltrace)); +} + +static inline struct lltrace_cpu * +lltrace_enter_cpu(struct cpu_info *ci) +{ + return lltrace_enter_spc(&ci->ci_schedstate); +} + +static inline struct lltrace_cpu * +lltrace_enter(void) +{ + return lltrace_enter_cpu(curcpu()); +} + +void lltrace_idle(struct lltrace_cpu *); +void lltrace_statclock(struct lltrace_cpu *, int, unsigned long); + +void lltrace_syscall(struct lltrace_cpu *, register_t, + size_t, const register_t *); +void lltrace_sysret(struct lltrace_cpu *, register_t, + int, const register_t [2]); +void lltrace_pidname(struct lltrace_cpu *, struct proc *); +void lltrace_sched_enter(struct lltrace_cpu *); +void lltrace_sched_leave(struct lltrace_cpu *); +void lltrace_runnable(struct lltrace_cpu *, struct proc *); + +void lltrace_trap(struct lltrace_cpu *, unsigned int); +void lltrace_trapret(struct lltrace_cpu *, unsigned int); + +void lltrace_lock(struct lltrace_cpu *, void *, unsigned int); +void lltrace_klock(struct lltrace_cpu *, void *, unsigned int); + +void lltrace_pkts(struct lltrace_cpu *, unsigned int, unsigned int); +void lltrace_mark(struct lltrace_cpu *); + +void lltrace_fn_enter(struct lltrace_cpu *, void *); +void lltrace_fn_leave(struct lltrace_cpu *, void *); + +/* MD bits */ + +void lltrace_ipi(struct lltrace_cpu *, unsigned int); +#define lltrace_ipi_bcast(_llt) lltrace_ipi((_llt), ~0U); + +void lltrace_irq(struct lltrace_cpu *, unsigned int, unsigned int); +void lltrace_irqret(struct lltrace_cpu *, unsigned int, unsigned int); + +#endif /* _KERNEL */ + +#endif /* _SYS_LLTRACE_H_ */ Index: sys/mbuf.h =================================================================== RCS file: /cvs/src/sys/sys/mbuf.h,v diff -u -p -r1.263 mbuf.h --- sys/mbuf.h 14 Apr 2024 20:46:27 -0000 1.263 +++ sys/mbuf.h 18 Jun 2024 02:47:16 -0000 @@ -138,6 +138,7 @@ struct pkthdr { u_int16_t ph_mss; /* TCP max segment size */ u_int8_t ph_loopcnt; /* mbuf is looping in kernel */ u_int8_t ph_family; /* af, used when queueing */ + u_int8_t ph_drops; /* hardware packet drops */ struct pkthdr_pf pf; }; Index: sys/param.h =================================================================== RCS file: /cvs/src/sys/sys/param.h,v diff -u -p -r1.142 param.h --- sys/param.h 17 Feb 2024 16:13:24 -0000 1.142 +++ sys/param.h 18 Jun 2024 02:47:16 -0000 @@ -111,6 +111,8 @@ #define PCATCH 0x100 /* OR'd with pri for tsleep to check signals */ #define PNORELOCK 0x200 /* OR'd with pri for msleep to not reacquire the mutex */ +#define PLLTRACE 0x400 + #endif /* _KERNEL */ #define NODEV (dev_t)(-1) /* non-existent device */ Index: sys/pclock.h =================================================================== RCS file: sys/pclock.h diff -N sys/pclock.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/pclock.h 18 Jun 2024 02:47:16 -0000 @@ -0,0 +1,49 @@ +/* $OpenBSD$ */ + +/* + * Copyright (c) 2023 David Gwynne + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef _SYS_PCLOCK_H +#define _SYS_PCLOCK_H + +#include + +struct pc_lock { + volatile unsigned int pcl_gen; +}; + +#ifdef _KERNEL + +#define PC_LOCK_INITIALIZER() { .pcl_gen = 0 } + +void pc_lock_init(struct pc_lock *); + +/* single (non-interlocking) producer */ +unsigned int pc_sprod_enter(struct pc_lock *); +void pc_sprod_leave(struct pc_lock *, unsigned int); + +/* multiple (interlocking) producers */ +unsigned int pc_mprod_enter(struct pc_lock *); +void pc_mprod_leave(struct pc_lock *, unsigned int); + +/* consumer */ +void pc_cons_enter(struct pc_lock *, unsigned int *); +__warn_unused_result int + pc_cons_leave(struct pc_lock *, unsigned int *); + +#endif /* _KERNEL */ + +#endif /* _SYS_RWLOCK_H */ Index: sys/sched.h =================================================================== RCS file: /cvs/src/sys/sys/sched.h,v diff -u -p -r1.70 sched.h --- sys/sched.h 24 Jan 2024 19:23:38 -0000 1.70 +++ sys/sched.h 18 Jun 2024 02:47:16 -0000 @@ -97,21 +97,25 @@ struct cpustats { #include #include +#include #define SCHED_NQS 32 /* 32 run queues. */ struct smr_entry; +struct lltrace_cpu; /* * Per-CPU scheduler state. */ struct schedstate_percpu { + struct lltrace_cpu *spc_lltrace; struct proc *spc_idleproc; /* idle proc for this cpu */ TAILQ_HEAD(prochead, proc) spc_qs[SCHED_NQS]; LIST_HEAD(,proc) spc_deadproc; struct timespec spc_runtime; /* time curproc started running */ volatile int spc_schedflags; /* flags; see below */ u_int spc_schedticks; /* ticks for schedclock() */ + struct pc_lock spc_cp_time_lock; u_int64_t spc_cp_time[CPUSTATES]; /* CPU state statistics */ u_char spc_curpriority; /* usrpri of curproc */ Index: sys/syscall_mi.h =================================================================== RCS file: /cvs/src/sys/sys/syscall_mi.h,v diff -u -p -r1.33 syscall_mi.h --- sys/syscall_mi.h 1 Apr 2024 12:00:15 -0000 1.33 +++ sys/syscall_mi.h 18 Jun 2024 02:47:16 -0000 @@ -158,6 +158,7 @@ mi_syscall(struct proc *p, register_t co KERNEL_UNLOCK(); } #endif + LLTRACE_CPU(p->p_cpu, lltrace_syscall, code, callp->sy_argsize, argp); /* SP must be within MAP_STACK space */ if (!uvm_map_inentry(p, &p->p_spinentry, PROC_STACK(p), @@ -191,6 +192,7 @@ static inline void mi_syscall_return(struct proc *p, register_t code, int error, const register_t retval[2]) { + LLTRACE_CPU(p->p_cpu, lltrace_sysret, code, error, retval); #ifdef SYSCALL_DEBUG KERNEL_LOCK(); scdebug_ret(p, code, error, retval); @@ -218,12 +220,13 @@ mi_syscall_return(struct proc *p, regist static inline void mi_child_return(struct proc *p) { -#if defined(SYSCALL_DEBUG) || defined(KTRACE) || NDT > 0 +#if defined(SYSCALL_DEBUG) || defined(KTRACE) || NDT > 0 || NLLT > 0 int code = (p->p_flag & P_THREAD) ? SYS___tfork : (p->p_p->ps_flags & PS_PPWAIT) ? SYS_vfork : SYS_fork; const register_t child_retval[2] = { 0, 1 }; #endif + LLTRACE_CPU(p->p_cpu, lltrace_sysret, code, 0, child_retval); TRACEPOINT(sched, on__cpu, NULL); #ifdef SYSCALL_DEBUG Index: sys/tracepoint.h =================================================================== RCS file: /cvs/src/sys/sys/tracepoint.h,v diff -u -p -r1.2 tracepoint.h --- sys/tracepoint.h 28 Jun 2022 09:32:28 -0000 1.2 +++ sys/tracepoint.h 18 Jun 2024 02:47:16 -0000 @@ -34,5 +34,33 @@ #define TRACEINDEX(func, index, args...) #endif /* NDT > 0 */ + +#include "llt.h" +#if NLLT > 0 +#include + +#define LLTRACE_SPC(_spc, _fn, ...) { \ + struct lltrace_cpu *_llt = lltrace_enter_spc((_spc)); \ + if (_llt != NULL) \ + (_fn)(_llt __VA_OPT__(,) __VA_ARGS__); \ +} while (0) + +#define LLTRACE_CPU(_ci, _fn, ...) { \ + struct lltrace_cpu *_llt = lltrace_enter_cpu((_ci)); \ + if (_llt != NULL) \ + (_fn)(_llt __VA_OPT__(,) __VA_ARGS__); \ +} while (0) + +#define LLTRACE(_fn, ...) { \ + struct lltrace_cpu *_llt = lltrace_enter(); \ + if (_llt != NULL) \ + (_fn)(_llt __VA_OPT__(,) __VA_ARGS__); \ +} while (0) + +#else /* NLLT > 0 */ + +#define LLTRACE(_fn, ...) + +#endif /* NLLT > 0 */ #endif /* _KERNEL */ #endif /* _SYS_TRACEPOINT_H_ */ Index: uvm/uvm_fault.c =================================================================== RCS file: /cvs/src/sys/uvm/uvm_fault.c,v diff -u -p -r1.135 uvm_fault.c --- uvm/uvm_fault.c 5 Sep 2023 05:08:26 -0000 1.135 +++ uvm/uvm_fault.c 18 Jun 2024 02:47:16 -0000 @@ -576,6 +576,8 @@ uvm_fault(vm_map_t orig_map, vaddr_t vad struct vm_page *pages[UVM_MAXRANGE]; int error; + LLTRACE(lltrace_trap, LLTRACE_TRAP_PAGEFAULT); + counters_inc(uvmexp_counters, faults); TRACEPOINT(uvm, fault, vaddr, fault_type, access_type, NULL); @@ -639,6 +641,8 @@ uvm_fault(vm_map_t orig_map, vaddr_t vad } } } + + LLTRACE(lltrace_trapret, LLTRACE_TRAP_PAGEFAULT); return error; }