Index: arch/amd64/amd64/conf.c =================================================================== RCS file: /cvs/src/sys/arch/amd64/amd64/conf.c,v retrieving revision 1.76 diff -u -p -r1.76 conf.c --- arch/amd64/amd64/conf.c 2 Sep 2022 20:06:55 -0000 1.76 +++ arch/amd64/amd64/conf.c 8 Dec 2022 06:10:12 -0000 @@ -136,6 +136,7 @@ cdev_decl(cy); #include "bktr.h" #include "ksyms.h" #include "kstat.h" +#include "llt.h" #include "usb.h" #include "uhid.h" #include "fido.h" @@ -212,7 +213,8 @@ struct cdevsw cdevsw[] = cdev_notdef(), /* 28 was LKM */ cdev_notdef(), /* 29 */ cdev_dt_init(NDT,dt), /* 30: dynamic tracer */ - cdev_notdef(), /* 31 */ + cdev_lltrace_init(NLLT,lltrace), + /* 31: lltrace */ cdev_notdef(), /* 32 */ cdev_notdef(), /* 33 */ cdev_notdef(), /* 34 */ Index: arch/amd64/amd64/intr.c =================================================================== RCS file: /cvs/src/sys/arch/amd64/amd64/intr.c,v retrieving revision 1.55 diff -u -p -r1.55 intr.c --- arch/amd64/amd64/intr.c 28 Dec 2020 14:23:30 -0000 1.55 +++ arch/amd64/amd64/intr.c 8 Dec 2022 06:10:12 -0000 @@ -45,6 +45,7 @@ #include #include #include +#include #include #include @@ -531,10 +532,18 @@ intr_handler(struct intrframe *frame, st if (need_lock) __mp_lock(&kernel_lock); #endif + floor = ci->ci_handled_intr_level; ci->ci_handled_intr_level = ih->ih_level; + + LLTRACE_CPU(ci, lltrace_irq, 0, + ci->ci_isources[ih->ih_slot]->is_idtvec); rc = (*ih->ih_fun)(ih->ih_arg ? ih->ih_arg : frame); + LLTRACE_CPU(ci, lltrace_irqret, 0, + ci->ci_isources[ih->ih_slot]->is_idtvec); + ci->ci_handled_intr_level = floor; + #ifdef MULTIPROCESSOR if (need_lock) __mp_unlock(&kernel_lock); @@ -552,7 +561,10 @@ struct intrhand fake_softclock_intrhand; struct intrhand fake_softnet_intrhand; struct intrhand fake_softtty_intrhand; struct intrhand fake_timer_intrhand; +#ifdef MULTIPROCESSOR struct intrhand fake_ipi_intrhand; +struct intrhand fake_xcall_intrhand; +#endif #if NXEN > 0 struct intrhand fake_xen_intrhand; #endif @@ -619,6 +631,17 @@ cpu_intr_init(struct cpu_info *ci) isp->is_handlers = &fake_ipi_intrhand; isp->is_pic = &local_pic; ci->ci_isources[LIR_IPI] = isp; + + isp = malloc(sizeof (struct intrsource), M_DEVBUF, M_NOWAIT|M_ZERO); + if (isp == NULL) + panic("can't allocate fixed interrupt source"); + isp->is_recurse = Xxcallintr; + isp->is_resume = Xxcallintr; + fake_xcall_intrhand.ih_level = IPL_SOFTCLOCK; + fake_xcall_intrhand.ih_flags = IPL_MPSAFE; + isp->is_handlers = &fake_xcall_intrhand; + isp->is_pic = &local_pic; + ci->ci_isources[SIR_XCALL] = isp; #endif #if NXEN > 0 isp = malloc(sizeof (struct intrsource), M_DEVBUF, M_NOWAIT|M_ZERO); Index: arch/amd64/amd64/ipi.c =================================================================== RCS file: /cvs/src/sys/arch/amd64/amd64/ipi.c,v retrieving revision 1.18 diff -u -p -r1.18 ipi.c --- arch/amd64/amd64/ipi.c 10 Nov 2022 08:26:54 -0000 1.18 +++ arch/amd64/amd64/ipi.c 8 Dec 2022 06:10:12 -0000 @@ -35,9 +35,10 @@ #include #include #include +#include +#include #include -#include #include #include #include @@ -45,6 +46,8 @@ void x86_send_ipi(struct cpu_info *ci, int ipimask) { + LLTRACE(lltrace_ipi, ci->ci_cpuid); + x86_atomic_setbits_u32(&ci->ci_ipis, ipimask); /* Don't send IPI to cpu which isn't (yet) running. */ @@ -57,6 +60,10 @@ x86_send_ipi(struct cpu_info *ci, int ip int x86_fast_ipi(struct cpu_info *ci, int ipi) { +#if 0 + LLTRACE(lltrace_ipi, ci->ci_cpuid); +#endif + if (!(ci->ci_flags & CPUF_RUNNING)) return (ENOENT); @@ -72,6 +79,8 @@ x86_broadcast_ipi(int ipimask) int count = 0; CPU_INFO_ITERATOR cii; + LLTRACE_CPU(self, lltrace_ipi, ~0); + CPU_INFO_FOREACH(cii, ci) { if (ci == self) continue; @@ -102,7 +111,10 @@ x86_ipi_handler(void) for (bit = 0; bit < X86_NIPI && pending; bit++) { if (pending & (1 << bit)) { pending &= ~(1 << bit); + + LLTRACE_CPU(ci, lltrace_irq, LLTRACE_IRQ_IPI, bit); (*ipifunc[bit])(ci); + LLTRACE_CPU(ci, lltrace_irqret, LLTRACE_IRQ_IPI, bit); evcount_inc(&ipi_count); } } Index: arch/amd64/amd64/ipifuncs.c =================================================================== RCS file: /cvs/src/sys/arch/amd64/amd64/ipifuncs.c,v retrieving revision 1.37 diff -u -p -r1.37 ipifuncs.c --- arch/amd64/amd64/ipifuncs.c 7 Aug 2022 23:56:06 -0000 1.37 +++ arch/amd64/amd64/ipifuncs.c 8 Dec 2022 06:10:12 -0000 @@ -61,6 +61,7 @@ void x86_64_ipi_nop(struct cpu_info *); void x86_64_ipi_halt(struct cpu_info *); void x86_64_ipi_wbinvd(struct cpu_info *); +void x86_64_ipi_xcall(struct cpu_info *); #if NVMM > 0 void x86_64_ipi_vmclear_vmm(struct cpu_info *); @@ -108,6 +109,7 @@ void (*ipifunc[X86_NIPI])(struct cpu_inf NULL, #endif x86_64_ipi_wbinvd, + x86_64_ipi_xcall, }; void @@ -166,3 +168,13 @@ x86_64_ipi_wbinvd(struct cpu_info *ci) { wbinvd(); } + +void +x86_64_ipi_xcall(struct cpu_info *ci) +{ + /* + * this is an inlining of softintr() because we already have + * curcpu() and the SIR_XCALL bit to set. + */ + x86_atomic_setbits_u64(&ci->ci_ipending, 1UL << SIR_XCALL); +}; Index: arch/amd64/amd64/softintr.c =================================================================== RCS file: /cvs/src/sys/arch/amd64/amd64/softintr.c,v retrieving revision 1.10 diff -u -p -r1.10 softintr.c --- arch/amd64/amd64/softintr.c 11 Sep 2020 09:27:09 -0000 1.10 +++ arch/amd64/amd64/softintr.c 8 Dec 2022 06:10:12 -0000 @@ -34,9 +34,15 @@ * Generic soft interrupt implementation for NetBSD/x86. */ +#include "llt.h" + #include #include #include +#include + +#include +#include #include @@ -99,8 +105,9 @@ softintr_dispatch(int which) uvmexp.softs++; mtx_leave(&si->softintr_lock); - + //LLTRACE_CPU(ci, lltrace_irq, LLTRACE_IRQ_BOTTOM_HALF, which); (*sih->sih_fn)(sih->sih_arg); + //LLTRACE_CPU(ci, lltrace_irqret, LLTRACE_IRQ_BOTTOM_HALF, which); } KERNEL_UNLOCK(); @@ -169,3 +176,58 @@ softintr_disestablish(void *arg) free(sih, M_DEVBUF, sizeof(*sih)); } + +void +#ifdef MULTIPROCESSOR +cpu_xcall_self(struct task *t) +#else +cpu_xcall(struct cpu_info *ci, struct task *t) +#endif +{ + int s = splsoftclock(); + (*t->t_func)(t->t_arg); + splx(s); +} + +#ifdef MULTIPROCESSOR +void +cpu_xcall(struct cpu_info *ci, struct task *t) +{ + size_t i; + + if (ci == curcpu()) { + /* execute the task immediately on the local cpu */ + cpu_xcall_self(t); + return; + } + + for (;;) { + for (i = 0; i < nitems(ci->ci_xcalls); i++) { + if (atomic_cas_ptr(&ci->ci_xcalls[i], NULL, t) != NULL) + continue; + + /* membar_producer(); */ + x86_send_ipi(ci, X86_IPI_XCALL); + return; + } + + CPU_BUSY_CYCLE(); + } +} + +void +cpu_xcall_dispatch(void) +{ + struct cpu_info *ci = curcpu(); + struct task *t; + size_t i; + + for (i = 0; i < nitems(ci->ci_xcalls); i++) { + t = ci->ci_xcalls[i]; + if (t != NULL) { + ci->ci_xcalls[i] = NULL; + (*t->t_func)(t->t_arg); + } + } +} +#endif /* MULTIPROCESSOR */ Index: arch/amd64/amd64/vector.S =================================================================== RCS file: /cvs/src/sys/arch/amd64/amd64/vector.S,v retrieving revision 1.87 diff -u -p -r1.87 vector.S --- arch/amd64/amd64/vector.S 1 Dec 2022 00:26:15 -0000 1.87 +++ arch/amd64/amd64/vector.S 8 Dec 2022 06:10:12 -0000 @@ -1293,3 +1293,12 @@ KIDTVEC(softclock) decl CPUVAR(IDEPTH) jmp retpoline_r13 END(Xsoftclock) + +KIDTVEC(xcallintr) + movl $IPL_SOFTCLOCK, CPUVAR(ILEVEL) + sti + incl CPUVAR(IDEPTH) + call _C_LABEL(cpu_xcall_dispatch) + decl CPUVAR(IDEPTH) + jmp retpoline_r13 +END(Xsoftclock) Index: arch/amd64/conf/Makefile.amd64 =================================================================== RCS file: /cvs/src/sys/arch/amd64/conf/Makefile.amd64,v retrieving revision 1.127 diff -u -p -r1.127 Makefile.amd64 --- arch/amd64/conf/Makefile.amd64 7 Apr 2022 07:30:54 -0000 1.127 +++ arch/amd64/conf/Makefile.amd64 8 Dec 2022 06:10:12 -0000 @@ -66,9 +66,12 @@ SORTR= cat COPTIMIZE= -Oz .if ${COMPILER_VERSION:Mclang} CMACHFLAGS+= -mno-retpoline +CMACHFLAGS+= -fno-ret-protector .endif .endif .if ${COMPILER_VERSION:Mclang} +CMACHFLAGS+= -mno-retpoline +CMACHFLAGS+= -fno-ret-protector NO_INTEGR_AS= -no-integrated-as CWARNFLAGS+= -Wno-address-of-packed-member -Wno-constant-conversion \ -Wno-unused-but-set-variable -Wno-gnu-folding-constant Index: arch/amd64/include/cpu.h =================================================================== RCS file: /cvs/src/sys/arch/amd64/include/cpu.h,v retrieving revision 1.154 diff -u -p -r1.154 cpu.h --- arch/amd64/include/cpu.h 29 Nov 2022 21:41:39 -0000 1.154 +++ arch/amd64/include/cpu.h 8 Dec 2022 06:10:12 -0000 @@ -115,7 +115,6 @@ struct cpu_info { struct schedstate_percpu ci_schedstate; /* scheduler state */ struct cpu_info *ci_next; /* [I] */ - struct proc *ci_curproc; /* [o] */ u_int ci_cpuid; /* [I] */ u_int ci_apicid; /* [I] */ u_int ci_acpi_proc_id; /* [I] */ @@ -129,6 +128,7 @@ struct cpu_info { char ci_mds_tmp[32]; /* [o] 32byte aligned */ void *ci_mds_buf; /* [I] */ + struct proc *ci_curproc; /* [o] */ struct pmap *ci_proc_pmap; /* last userspace pmap */ struct pcb *ci_curpcb; /* [o] */ struct pcb *ci_idle_pcb; /* [o] */ @@ -198,6 +198,7 @@ struct cpu_info { #ifdef MULTIPROCESSOR struct srp_hazard ci_srp_hazards[SRP_HAZARD_NUM]; + struct task *ci_xcalls[4]; #endif struct ksensordev ci_sensordev; Index: arch/amd64/include/intr.h =================================================================== RCS file: /cvs/src/sys/arch/amd64/include/intr.h,v retrieving revision 1.33 diff -u -p -r1.33 intr.h --- arch/amd64/include/intr.h 14 Dec 2021 18:16:14 -0000 1.33 +++ arch/amd64/include/intr.h 8 Dec 2022 06:10:12 -0000 @@ -207,6 +207,9 @@ void cpu_intr_init(struct cpu_info *); void intr_printconfig(void); void intr_barrier(void *); +struct task; +void cpu_xcall(struct cpu_info *ci, struct task *); + #ifdef MULTIPROCESSOR void x86_send_ipi(struct cpu_info *, int); int x86_fast_ipi(struct cpu_info *, int); @@ -215,6 +218,8 @@ void x86_ipi_handler(void); void x86_setperf_ipi(struct cpu_info *); extern void (*ipifunc[X86_NIPI])(struct cpu_info *); + +extern void Xxcallintr(void); #endif #endif /* !_LOCORE */ Index: arch/amd64/include/intrdefs.h =================================================================== RCS file: /cvs/src/sys/arch/amd64/include/intrdefs.h,v retrieving revision 1.22 diff -u -p -r1.22 intrdefs.h --- arch/amd64/include/intrdefs.h 31 Aug 2021 17:40:59 -0000 1.22 +++ arch/amd64/include/intrdefs.h 8 Dec 2022 06:10:12 -0000 @@ -54,9 +54,10 @@ #define SIR_CLOCK 61 #define SIR_NET 60 #define SIR_TTY 59 +#define SIR_XCALL 58 -#define LIR_XEN 58 -#define LIR_HYPERV 57 +#define LIR_XEN 57 +#define LIR_HYPERV 56 /* * Maximum # of interrupt sources per CPU. 64 to fit in one word. @@ -84,8 +85,9 @@ #define X86_IPI_START_VMM 0x00000100 #define X86_IPI_STOP_VMM 0x00000200 #define X86_IPI_WBINVD 0x00000400 +#define X86_IPI_XCALL 0x00000800 -#define X86_NIPI 12 +#define X86_NIPI 13 #define IREENT_MAGIC 0x18041969 Index: arch/arm64/arm64/conf.c =================================================================== RCS file: /cvs/src/sys/arch/arm64/arm64/conf.c,v retrieving revision 1.20 diff -u -p -r1.20 conf.c --- arch/arm64/arm64/conf.c 2 Sep 2022 20:06:55 -0000 1.20 +++ arch/arm64/arm64/conf.c 8 Dec 2022 06:10:12 -0000 @@ -91,6 +91,7 @@ cdev_decl(lpt); #include "bktr.h" #include "ksyms.h" #include "kstat.h" +#include "llt.h" #include "usb.h" #include "uhid.h" #include "fido.h" @@ -156,7 +157,8 @@ struct cdevsw cdevsw[] = cdev_notdef(), /* 28 was LKM */ cdev_notdef(), /* 29 */ cdev_dt_init(NDT,dt), /* 30: dynamic tracer */ - cdev_notdef(), /* 31 */ + cdev_lltrace_init(NLLT,lltrace), + /* 31: lltrace */ cdev_notdef(), /* 32 */ cdev_notdef(), /* 33 */ cdev_notdef(), /* 34 */ Index: arch/arm64/dev/agintc.c =================================================================== RCS file: /cvs/src/sys/arch/arm64/dev/agintc.c,v retrieving revision 1.44 diff -u -p -r1.44 agintc.c --- arch/arm64/dev/agintc.c 13 Oct 2022 18:34:56 -0000 1.44 +++ arch/arm64/dev/agintc.c 8 Dec 2022 06:10:12 -0000 @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -1013,7 +1014,11 @@ agintc_run_handler(struct intrhand *ih, else arg = frame; + LLTRACE(lltrace_irq, ih->ih_ipl == IPL_IPI ? LLTRACE_IRQ_IPI : 0, + ih->ih_irq); handled = ih->ih_func(arg); + LLTRACE(lltrace_irqret, ih->ih_ipl == IPL_IPI ? LLTRACE_IRQ_IPI : 0, + ih->ih_irq); if (handled) ih->ih_count.ec_count++; @@ -1341,6 +1346,8 @@ agintc_send_ipi(struct cpu_info *ci, int { struct agintc_softc *sc = agintc_sc; uint64_t sendmask; + + LLTRACE(lltrace_ipi, ci->ci_cpuid); if (ci == curcpu() && id == ARM_IPI_NOP) return; Index: conf/GENERIC =================================================================== RCS file: /cvs/src/sys/conf/GENERIC,v retrieving revision 1.286 diff -u -p -r1.286 GENERIC --- conf/GENERIC 30 Sep 2022 02:56:23 -0000 1.286 +++ conf/GENERIC 8 Dec 2022 06:10:14 -0000 @@ -82,6 +82,7 @@ pseudo-device endrun 1 # EndRun line dis pseudo-device vnd 4 # vnode disk devices pseudo-device ksyms 1 # kernel symbols device pseudo-device kstat # kernel statistics device +pseudo-device llt # low-level tracing device # clonable devices pseudo-device bpfilter # packet filter Index: conf/files =================================================================== RCS file: /cvs/src/sys/conf/files,v retrieving revision 1.719 diff -u -p -r1.719 files --- conf/files 6 Nov 2022 15:36:13 -0000 1.719 +++ conf/files 8 Dec 2022 06:10:14 -0000 @@ -579,6 +579,9 @@ file dev/ksyms.c ksyms needs-flag pseudo-device kstat file dev/kstat.c kstat needs-flag +pseudo-device llt +file dev/lltrace.c llt needs-flag + pseudo-device fuse file miscfs/fuse/fuse_device.c fuse needs-flag file miscfs/fuse/fuse_file.c fuse Index: dev/lltrace.c =================================================================== RCS file: dev/lltrace.c diff -N dev/lltrace.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ dev/lltrace.c 8 Dec 2022 06:10:14 -0000 @@ -0,0 +1,965 @@ +/* $OpenBSD$ */ + +/* + * Copyright (c) 2022 The University of Queensland + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * This code was written by David Gwynne as part + * of the Information Technology Infrastructure Group (ITIG) in the + * Faculty of Engineering, Architecture and Information Technology + * (EAIT). + * + * It was heavily inspired by and aims to be largely compatible + * with the KUTrace (kernel/userland tracing) framework by Richard + * L. Sites. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +#if defined(__amd64__) || defined(__i386__) + +static inline unsigned int +lltrace_cas(unsigned int *p, unsigned int e, unsigned int n) +{ + __asm volatile("cmpxchgl %2, %1" + : "=a" (e), "=m" (*p) + : "r" (n), "a" (e), "m" (*p)); + + return (e); +} + +static inline uint64_t +lltrace_ts(void) +{ + unsigned int hi, lo; + + __asm volatile("lfence; rdtsc" : "=d" (hi), "=a" (lo)); + + return (lo >> 6); +} + +static inline uint64_t +lltrace_ts_long(void) +{ + return (rdtsc_lfence() >> 6); +} + +#elif defined(__aarch64__) + +#define lltrace_cas(_p, _e, _n) atomic_cas_uint((_p), (_e), (_n)) + +static inline uint64_t +lltrace_ts_long(void) +{ + uint64_t ts; + + __asm volatile("mrs %x0, cntvct_el0" : "=r" (ts)); + + return (ts); +} + +#define lltrace_ts() lltrace_ts_long() + +#else /* not x86 or arm64 */ + +static unsigned int +lltrace_cas(unsigned int *p, unsigned int e, unsigned int n) +{ + unsigned int o; + int s; + + s = intr_disable(); + o = *p; + if (o == e) + *p = n; + intr_restore(s); + + return (o); +} + +static inline uint64_t +lltrace_ts(void) +{ + return (countertime()); +} + +static inline uint64_t +lltrace_ts_long(void) +{ + return (countertime()); +} + +#endif + +#define LLTRACE_MB2NBUF(_mb) \ + (((_mb) * (1U << 20)) / sizeof(struct lltrace_buffer)) +#define LLTRACE_NBUF2MB(_nbuf) \ + (((_nbuf) * sizeof(struct lltrace_buffer)) / (1U << 20)) + +#define LLTRACE_BLEN_DEFAULT 16 + +struct lltrace_cpu { + SIMPLEQ_ENTRY(lltrace_cpu) + llt_entry; + struct lltrace_buffer llt_buffer; + unsigned int llt_slot; + pid_t llt_tid; +}; + +SIMPLEQ_HEAD(lltrace_cpu_list, lltrace_cpu); + +struct lltrace_softc { + unsigned int sc_running; + unsigned int sc_mode; + struct rwlock sc_lock; + unsigned int sc_nbuffers; + + unsigned int sc_free; + unsigned int sc_used; + struct lltrace_cpu **sc_ring; + struct lltrace_cpu *sc_buffers; + + unsigned int sc_read; + unsigned int sc_reading; + struct selinfo sc_sel; + + uint64_t sc_boottime; + uint64_t sc_monotime; +}; + +static int lltrace_start(struct lltrace_softc *, struct proc *); +static int lltrace_stop(struct lltrace_softc *, struct proc *); +static int lltrace_flush(struct lltrace_softc *); + +static struct lltrace_softc *lltrace_sc; + +static void lltrace_arg32(struct lltrace_cpu *, uint64_t, unsigned int); + +int +lltattach(int num) +{ + return (0); +} + +int +lltraceopen(dev_t dev, int flag, int mode, struct proc *p) +{ + struct lltrace_softc *sc; + int error; + + if (minor(dev) != 0) + return (ENXIO); + + error = suser(p); + if (error != 0) + return (error); + + if (lltrace_sc != NULL) + return (EBUSY); + + sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_CANFAIL|M_ZERO); + if (sc == NULL) + return (ENOMEM); + + sc->sc_running = 0; + sc->sc_nbuffers = LLTRACE_MB2NBUF(LLTRACE_BLEN_DEFAULT); + + rw_init(&sc->sc_lock, "lltlk"); + + sc->sc_read = 0; + sc->sc_reading = 0; + klist_init_rwlock(&sc->sc_sel.si_note, &sc->sc_lock); + + /* commit */ + if (atomic_cas_ptr(&lltrace_sc, NULL, sc) != NULL) { + free(sc, M_DEVBUF, sizeof(*sc)); + return (EBUSY); + } + + return (0); +} + +int +lltraceclose(dev_t dev, int flag, int mode, struct proc *p) +{ + struct lltrace_softc *sc = lltrace_sc; + + rw_enter_write(&sc->sc_lock); + lltrace_stop(sc, p); + lltrace_flush(sc); + rw_exit_write(&sc->sc_lock); + + lltrace_sc = NULL; + membar_sync(); + + free(sc, M_DEVBUF, sizeof(*sc)); + + return (0); +} + +static int +lltrace_fionread(struct lltrace_softc *sc) +{ + int canread; + + rw_enter_read(&sc->sc_lock); + canread = !sc->sc_running && (sc->sc_buffers != NULL) && + (sc->sc_read < sc->sc_nbuffers); + rw_exit_read(&sc->sc_lock); + + return (canread ? sizeof(struct lltrace_buffer) : 0); +} + +static void +lltrace_cpu_init(struct lltrace_cpu *llt, struct lltrace_softc *sc, + struct cpu_info *ci, pid_t tid) +{ + struct lltrace_header *llh; + + llh = (struct lltrace_header *)&llt->llt_buffer; + llh->h_cpu = cpu_number(); + llh->h_boottime = sc->sc_boottime; + llh->h_start_cy = lltrace_ts_long(); + llh->h_start_ns = nsecuptime() - sc->sc_monotime; + llh->h_end_cy = 0; + llh->h_end_ns = 0; + llh->h_idletid = ci->ci_schedstate.spc_idleproc->p_tid; + llh->h_tid = tid; + llh->h_zero = 0; + + llt->llt_tid = tid; + llt->llt_slot = 8; +} + +static void +lltrace_cpu_fini(struct lltrace_cpu *llt, struct lltrace_softc *sc) +{ + struct lltrace_header *llh; + + llh = (struct lltrace_header *)&llt->llt_buffer; + llh->h_end_cy = lltrace_ts_long(); + llh->h_end_ns = nsecuptime() - sc->sc_monotime; +} + +static int +lltrace_set_mode(struct lltrace_softc *sc, unsigned int mode) +{ + int error; + + if (mode >= LLTRACE_MODE_COUNT) + return (EINVAL); + + error = rw_enter(&sc->sc_lock, RW_WRITE|RW_INTR); + if (error != 0) + return (error); + + if (sc->sc_running) + error = EBUSY; + else + sc->sc_mode = mode; + + rw_exit(&sc->sc_lock); + return (error); +} + +static int +lltrace_set_blen(struct lltrace_softc *sc, unsigned int blen) +{ + int error; + unsigned int nbuffers; + + if (blen < LLTRACE_BLEN_MIN || blen > LLTRACE_BLEN_MAX) + return (EINVAL); + + /* convert megabytes to the number of buffers */ + nbuffers = LLTRACE_MB2NBUF(blen); + if (nbuffers <= ncpus) + EINVAL; + + error = rw_enter(&sc->sc_lock, RW_WRITE|RW_INTR); + if (error != 0) + return (error); + + if (sc->sc_buffers != NULL) + error = EBUSY; + else { + sc->sc_nbuffers = nbuffers; + printf("%s[%u]: nbuffers %u\n", __func__, __LINE__, sc->sc_nbuffers); +} + + rw_exit(&sc->sc_lock); + return (error); +} + +static int +lltrace_start(struct lltrace_softc *sc, struct proc *p) +{ + struct bintime boottime; + unsigned int i; + size_t sz; + struct lltrace_cpu_list l = SIMPLEQ_HEAD_INITIALIZER(l); + struct lltrace_cpu *llt; + struct cpu_info *ci; + CPU_INFO_ITERATOR cii; + + if (sc->sc_running) + return EINVAL; + + if (sc->sc_nbuffers <= (ncpus * 2 + 1)) + return (EINVAL); + + lltrace_flush(sc); + + sc->sc_monotime = nsecuptime(); + + binboottime(&boottime); + sc->sc_boottime = BINTIME_TO_NSEC(&boottime) + sc->sc_monotime; + + sz = roundup(sc->sc_nbuffers * sizeof(*sc->sc_buffers), PAGE_SIZE); + sc->sc_buffers = km_alloc(sz, &kv_any, &kp_dirty, &kd_waitok); + if (sc->sc_buffers == NULL) + return (ENOMEM); + sc->sc_ring = mallocarray(sc->sc_nbuffers, sizeof(*sc->sc_ring), + M_DEVBUF, M_WAITOK); + for (i = 0; i < sc->sc_nbuffers; i++) { + llt = &sc->sc_buffers[i]; + llt->llt_slot = 0; + sc->sc_ring[i] = llt; + } + + sc->sc_free = 0; /* next slot to pull a free buffer from */ + sc->sc_used = 0; /* next slot to put a used buffer in */ + + CPU_INFO_FOREACH(cii, ci) { + i = sc->sc_free++; /* can't wrap yet */ + + llt = sc->sc_ring[i]; + sc->sc_ring[i] = NULL; + + SIMPLEQ_INSERT_HEAD(&l, llt, llt_entry); + } + + CPU_INFO_FOREACH(cii, ci) { + sched_peg_curproc(ci); + + llt = SIMPLEQ_FIRST(&l); + SIMPLEQ_REMOVE_HEAD(&l, llt_entry); + + lltrace_cpu_init(llt, sc, ci, p->p_tid); + lltrace_pidname(llt, p); + + membar_producer(); + ci->ci_schedstate.spc_lltrace = llt; + } + atomic_clearbits_int(&p->p_flag, P_CPUPEG); + + sc->sc_running = 1; + + return (0); +} + +static int +lltrace_stop(struct lltrace_softc *sc, struct proc *p) +{ + struct lltrace_cpu *llt; + struct cpu_info *ci; + CPU_INFO_ITERATOR cii; + unsigned long s; + + if (!sc->sc_running) + return (EALREADY); + + sc->sc_running = 0; + + /* visit each cpu to take llt away safely */ + CPU_INFO_FOREACH(cii, ci) { + sched_peg_curproc(ci); + + s = intr_disable(); + llt = ci->ci_schedstate.spc_lltrace; + ci->ci_schedstate.spc_lltrace = NULL; + intr_restore(s); + + lltrace_cpu_fini(llt, sc); + } + atomic_clearbits_int(&p->p_flag, P_CPUPEG); + + return (0); +} + +static int +lltrace_flush(struct lltrace_softc *sc) +{ + size_t sz; + + rw_assert_wrlock(&sc->sc_lock); + if (sc->sc_running) + return (EBUSY); + + if (sc->sc_buffers == NULL) + return (0); + + sz = roundup(sc->sc_nbuffers * sizeof(*sc->sc_buffers), PAGE_SIZE); + km_free(sc->sc_buffers, sz, &kv_any, &kp_dirty); + free(sc->sc_ring, M_DEVBUF, sc->sc_nbuffers * sizeof(*sc->sc_ring)); + + sc->sc_buffers = NULL; + sc->sc_ring = NULL; + sc->sc_read = 0; + + return (0); +} + +int +lltraceioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p) +{ + struct lltrace_softc *sc = lltrace_sc; + int error = 0; + + KERNEL_UNLOCK(); + + switch (cmd) { + case FIONREAD: + *(int *)data = lltrace_fionread(sc); + break; + case FIONBIO: + /* vfs tracks this for us if we let it */ + break; + + case LLTIOCSTART: + error = rw_enter(&sc->sc_lock, RW_WRITE|RW_INTR); + if (error != 0) + break; + error = lltrace_start(sc, p); + rw_exit(&sc->sc_lock); + break; + case LLTIOCSTOP: + error = rw_enter(&sc->sc_lock, RW_WRITE|RW_INTR); + if (error != 0) + break; + error = lltrace_stop(sc, p); + rw_exit(&sc->sc_lock); + break; + case LLTIOCFLUSH: + error = rw_enter(&sc->sc_lock, RW_WRITE|RW_INTR); + if (error != 0) + break; + error = lltrace_flush(sc); + rw_exit(&sc->sc_lock); + break; + + case LLTIOCSBLEN: + error = lltrace_set_blen(sc, *(unsigned int *)data); + break; + case LLTIOCGBLEN: + *(unsigned int *)data = LLTRACE_NBUF2MB(sc->sc_nbuffers); + break; + + case LLTIOCSMODE: + error = lltrace_set_mode(sc, *(unsigned int *)data); + break; + case LLTIOCGMODE: + *(unsigned int *)data = sc->sc_mode; + break; + + default: + error = ENOTTY; + break; + } + + KERNEL_LOCK(); + + return (error); +} + +int +lltraceread(dev_t dev, struct uio *uio, int ioflag) +{ + struct lltrace_softc *sc = lltrace_sc; + struct lltrace_cpu *llt; + unsigned int slot; + int error; + + KERNEL_UNLOCK(); + + error = rw_enter(&sc->sc_lock, RW_WRITE|RW_INTR); + if (error != 0) + goto lock; + + if (sc->sc_running) { + if (ISSET(ioflag, IO_NDELAY)) { + error = EWOULDBLOCK; + goto unlock; + } + + do { + sc->sc_reading++; + error = rwsleep_nsec(&sc->sc_reading, &sc->sc_lock, + PRIBIO|PCATCH, "lltread", INFSLP); + sc->sc_reading--; + if (error != 0) + goto unlock; + } while (sc->sc_running); + } + + if (sc->sc_buffers == NULL) { + error = 0; + goto unlock; + } + + slot = sc->sc_read; + for (;;) { + if (slot >= sc->sc_nbuffers) { + error = 0; + goto unlock; + } + + llt = &sc->sc_buffers[slot]; + KASSERT(llt->llt_slot <= nitems(llt->llt_buffer.llt_slots)); + if (llt->llt_slot > 0) + break; + + slot++; + } + + error = uiomove(&llt->llt_buffer, + llt->llt_slot * sizeof(llt->llt_buffer.llt_slots[0]), uio); + if (error != 0) + goto unlock; + + sc->sc_read = slot + 1; + +unlock: + rw_exit(&sc->sc_lock); +lock: + KERNEL_LOCK(); + return (error); +} + +static void +lltrace_filt_detach(struct knote *kn) +{ + struct lltrace_softc *sc = kn->kn_hook; + + klist_remove(&sc->sc_sel.si_note, kn); +} + +static int +lltrace_filt_event(struct knote *kn, long hint) +{ + struct lltrace_softc *sc = kn->kn_hook; + int canread; + + canread = !sc->sc_running && (sc->sc_buffers != NULL) && + (sc->sc_read < sc->sc_nbuffers); + + kn->kn_data = canread ? sizeof(struct lltrace_buffer) : 0; + + return (canread); +} + +static int +lltrace_filt_modify(struct kevent *kev, struct knote *kn) +{ + struct lltrace_softc *sc = kn->kn_hook; + int active; + + rw_enter_write(&sc->sc_lock); + active = knote_modify_fn(kev, kn, lltrace_filt_event); + rw_exit_write(&sc->sc_lock); + + return (active); +} + +static int +lltrace_filt_process(struct knote *kn, struct kevent *kev) +{ + struct lltrace_softc *sc = kn->kn_hook; + int active; + + rw_enter_write(&sc->sc_lock); + active = knote_process_fn(kn, kev, lltrace_filt_event); + rw_exit_write(&sc->sc_lock); + + return (active); +} + +static const struct filterops lltrace_filtops = { + .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, + .f_attach = NULL, + .f_detach = lltrace_filt_detach, + .f_event = lltrace_filt_event, + .f_modify = lltrace_filt_modify, + .f_process = lltrace_filt_process, +}; + +int +lltracekqfilter(dev_t dev, struct knote *kn) +{ + struct lltrace_softc *sc = lltrace_sc; + struct klist *klist; + + switch (kn->kn_filter) { + case EVFILT_READ: + klist = &sc->sc_sel.si_note; + kn->kn_fop = &lltrace_filtops; + break; + default: + return (EINVAL); + } + + kn->kn_hook = sc; + klist_insert(klist, kn); + + return (0); +} + +static struct lltrace_cpu * +lltrace_next(struct lltrace_cpu *llt) +{ + struct lltrace_softc *sc = lltrace_sc; + struct cpu_info *ci = curcpu(); + struct lltrace_cpu *nllt; + unsigned int slot, oslot, nslot; + + /* check if we were preempted */ + nllt = ci->ci_schedstate.spc_lltrace; + if (nllt != llt) { + /* something preempted us and swapped buffers already */ + return (nllt); + } + + slot = sc->sc_free; + for (;;) { + nslot = slot + 1; + if (nslot > sc->sc_nbuffers) { + if (sc->sc_mode == LLTRACE_MODE_HEAD) + return (NULL); + } + + oslot = atomic_cas_uint(&sc->sc_free, slot, nslot); + if (slot == oslot) + break; + + slot = oslot; + } + + slot %= sc->sc_nbuffers; + nllt = sc->sc_ring[slot]; + sc->sc_ring[slot] = NULL; + + slot = sc->sc_used; + for (;;) { + nslot = slot + 1; + + oslot = atomic_cas_uint(&sc->sc_used, slot, nslot); + if (slot == oslot) + break; + + slot = oslot; + } + + lltrace_cpu_init(nllt, sc, ci, llt->llt_tid); + lltrace_cpu_fini(llt, sc); + + slot %= sc->sc_nbuffers; + sc->sc_ring[slot] = llt; + + ci->ci_schedstate.spc_lltrace = nllt; + + return (nllt); +} + +static unsigned int +lltrace_insert(struct lltrace_cpu *llt, uint64_t record, + const uint64_t *extra, unsigned int n) +{ + unsigned int slot, oslot, nslot; + uint64_t *slots; + + n++; + record |= lltrace_ts() << LLTRACE_TIMESTAMP_SHIFT; + + slot = llt->llt_slot; + for (;;) { + nslot = slot + n; + if (nslot > nitems(llt->llt_buffer.llt_slots)) { + unsigned long s; + + s = intr_disable(); + llt = lltrace_next(llt); + intr_restore(s); + + if (llt == NULL) + return (1); + + slot = llt->llt_slot; + continue; + } + + oslot = lltrace_cas(&llt->llt_slot, slot, nslot); + if (slot == oslot) + break; + + slot = oslot; + } + + slots = llt->llt_buffer.llt_slots + slot; + *slots = record; + while (n > 1) { + *(++slots) = *(extra++); + n--; + } + + return (0); +} + +void +lltrace_statclock(struct lltrace_cpu *llt, int usermode, unsigned long pc) +{ + uint64_t event = usermode ? LLTRACE_EVENT_PC_U : LLTRACE_EVENT_PC_K; + uint64_t extra[1] = { pc }; + + lltrace_insert(llt, (event | nitems(extra)) << LLTRACE_EVENT_SHIFT, + extra, nitems(extra)); +} + +void +lltrace_syscall(struct lltrace_cpu *llt, register_t code, + size_t argsize, const register_t *args) +{ + uint64_t record = LLTRACE_EVENT_SYSCALL(code) << LLTRACE_EVENT_SHIFT; + + if (argsize > 0) { + uint64_t arg0 = args[0] & LLTRACE_ARG0_MASK; + record |= arg0 << LLTRACE_ARG0_SHIFT; + } + + lltrace_insert(llt, record, NULL, 0); +} + +void +lltrace_sysret(struct lltrace_cpu *llt, register_t code, + int error, const register_t retvals[2]) +{ + uint64_t record = LLTRACE_EVENT_SYSRET(code) << LLTRACE_EVENT_SHIFT; + uint64_t arg0 = error & LLTRACE_ARG0_MASK; + record |= arg0 << LLTRACE_ARG0_SHIFT; + unsigned int stop; + + stop = lltrace_insert(llt, record, NULL, 0); + + if (stop) { + struct lltrace_softc *sc = lltrace_sc; + + rw_enter_write(&sc->sc_lock); + if (sc->sc_running) + lltrace_stop(sc, curproc); + + KNOTE(&sc->sc_sel.si_note, 0); + if (sc->sc_reading) + wakeup(&sc->sc_reading); + rw_exit_write(&sc->sc_lock); + } +} + +void +lltrace_pidname(struct lltrace_cpu *llt, struct proc *p) +{ + uint64_t record; + uint64_t extra[3]; + unsigned int l, n; + pid_t tid = p->p_tid; + + if (ISSET(p->p_p->ps_flags, PS_SYSTEM)) + tid |= LLTRACE_EVENT_PID_ARG_KTHREAD; + + CTASSERT(sizeof(extra) == sizeof(p->p_p->ps_comm)); + + extra[0] = extra[1] = extra[2] = 0; /* memset */ + l = strlcpy((char *)extra, p->p_p->ps_comm, sizeof(extra)); + + /* turn the string length into the number of slots we need */ + n = howmany(l, sizeof(uint64_t)); + + record = (LLTRACE_EVENT_PID | n) << LLTRACE_EVENT_SHIFT; + record |= (tid & LLTRACE_ARG32_MASK) << LLTRACE_ARG32_SHIFT; + + llt->llt_tid = p->p_tid; + + lltrace_insert(llt, record, extra, n); +} + +void +lltrace_sched_enter(struct lltrace_cpu *llt) +{ + uint64_t record = LLTRACE_EVENT_SCHED << LLTRACE_EVENT_SHIFT; + + lltrace_insert(llt, record, NULL, 0); +} + +void +lltrace_sched_leave(struct lltrace_cpu *llt) +{ + uint64_t record = LLTRACE_EVENT_SCHEDRET << LLTRACE_EVENT_SHIFT; + + lltrace_insert(llt, record, NULL, 0); +} + +void +lltrace_idle(struct lltrace_cpu *llt) +{ + uint64_t record = LLTRACE_EVENT_IDLE << LLTRACE_EVENT_SHIFT; + + lltrace_insert(llt, record, NULL, 0); +} + +static void +lltrace_arg32(struct lltrace_cpu *llt, uint64_t event, unsigned int arg32) +{ + uint64_t record; + + record = event << LLTRACE_EVENT_SHIFT; + record |= (arg32 & LLTRACE_ARG32_MASK) << LLTRACE_ARG32_SHIFT; + + lltrace_insert(llt, record, NULL, 0); +} + +void +lltrace_runnable(struct lltrace_cpu *llt, struct proc *p) +{ + lltrace_arg32(llt, LLTRACE_EVENT_RUNNABLE, p->p_tid); +} + +void +lltrace_trap(struct lltrace_cpu *llt, unsigned int trap) +{ + lltrace_arg32(llt, LLTRACE_EVENT_TRAP, trap); +} + +void +lltrace_trapret(struct lltrace_cpu *llt, unsigned int trap) +{ + lltrace_arg32(llt, LLTRACE_EVENT_TRAPRET, trap); +} + +void +lltrace_ipi(struct lltrace_cpu *llt, unsigned int cpu) +{ + lltrace_arg32(llt, LLTRACE_EVENT_IPI, cpu); +} + +void +lltrace_irq(struct lltrace_cpu *llt, unsigned int type, unsigned int vec) +{ + lltrace_arg32(llt, LLTRACE_EVENT_IRQ(type), vec); +} + +void +lltrace_irqret(struct lltrace_cpu *llt, unsigned int type, unsigned int vec) +{ + lltrace_arg32(llt, LLTRACE_EVENT_IRQRET(type), vec); +} + +void +lltrace_lock(struct lltrace_cpu *llt, void *lock, unsigned int op) +{ + lltrace_arg32(llt, LLTRACE_EVENT_LOCK(op), (uint32_t)(intptr_t)lock); +} + +void +lltrace_klock(struct lltrace_cpu *llt, void *lock, unsigned int op) +{ +#if 0 + lltrace_arg32(llt, LLTRACE_EVENT_LOCK(op), (uint32_t)(intptr_t)lock); +#endif +} + +void +lltrace_pkts(struct lltrace_cpu *llt, unsigned int t, unsigned int v) +{ + t &= LLTRACE_PKTS_T_MASK; + + v <<= LLTRACE_PKTS_V_SHIFT; + v &= LLTRACE_PKTS_V_MASK; + + lltrace_arg32(llt, LLTRACE_EVENT_PKTS, t | v); +} + +void +lltrace_mark(struct lltrace_cpu *llt) +{ + uint64_t record = LLTRACE_EVENT_MARK << LLTRACE_EVENT_SHIFT; + + lltrace_insert(llt, record, NULL, 0); +} + +void +lltrace_fn_enter(struct lltrace_cpu *llt, void *fn) +{ + lltrace_arg32(llt, LLTRACE_EVENT_KFUNC_ENTER, (uintptr_t)fn); +} + +void +lltrace_fn_leave(struct lltrace_cpu *llt, void *fn) +{ + lltrace_arg32(llt, LLTRACE_EVENT_KFUNC_LEAVE, (uintptr_t)fn); +} + +void +__cyg_profile_func_enter(void *fn, void *pc) +{ + struct lltrace_cpu *llt; + uint64_t record; + + llt = lltrace_enter(); + if (llt == NULL) + return; + + record = LLTRACE_EVENT_KFUNC_ENTER << LLTRACE_EVENT_SHIFT; + record |= ((uintptr_t)fn & LLTRACE_ARG32_MASK) << LLTRACE_ARG32_SHIFT; + + lltrace_insert(llt, record, NULL, 0); +} + +void +__cyg_profile_func_exit(void *fn, void *pc) +{ + struct lltrace_cpu *llt; + uint64_t record; + + llt = lltrace_enter(); + if (llt == NULL) + return; + + record = LLTRACE_EVENT_KFUNC_LEAVE << LLTRACE_EVENT_SHIFT; + record |= ((uintptr_t)fn & LLTRACE_ARG32_MASK) << LLTRACE_ARG32_SHIFT; + + lltrace_insert(llt, record, NULL, 0); +} Index: dev/pci/if_mcx.c =================================================================== RCS file: /cvs/src/sys/dev/pci/if_mcx.c,v retrieving revision 1.106 diff -u -p -r1.106 if_mcx.c --- dev/pci/if_mcx.c 22 Nov 2022 06:48:32 -0000 1.106 +++ dev/pci/if_mcx.c 8 Dec 2022 06:10:15 -0000 @@ -6890,6 +6890,7 @@ mcx_process_rx(struct mcx_softc *sc, str ms->ms_m = NULL; m->m_pkthdr.len = m->m_len = len; + m->m_pkthdr.ph_drops = cqe->cq_rx_drops; if (cqe->cq_rx_hash_type) { m->m_pkthdr.ph_flowid = betoh32(cqe->cq_rx_hash); Index: dev/pci/if_vmx.c =================================================================== RCS file: /cvs/src/sys/dev/pci/if_vmx.c,v retrieving revision 1.70 diff -u -p -r1.70 if_vmx.c --- dev/pci/if_vmx.c 11 Sep 2022 08:38:39 -0000 1.70 +++ dev/pci/if_vmx.c 8 Dec 2022 06:10:15 -0000 @@ -845,8 +845,8 @@ vmxnet3_intr(void *arg) } if (ifp->if_flags & IFF_RUNNING) { - vmxnet3_rxintr(sc, &sc->sc_q[0].rx); vmxnet3_txintr(sc, &sc->sc_q[0].tx); + vmxnet3_rxintr(sc, &sc->sc_q[0].rx); vmxnet3_enable_intr(sc, 0); } @@ -873,8 +873,8 @@ vmxnet3_intr_queue(void *arg) { struct vmxnet3_queue *q = arg; - vmxnet3_rxintr(q->sc, &q->rx); vmxnet3_txintr(q->sc, &q->tx); + vmxnet3_rxintr(q->sc, &q->rx); vmxnet3_enable_intr(q->sc, q->intr); return 1; Index: kern/kern_clock.c =================================================================== RCS file: /cvs/src/sys/kern/kern_clock.c,v retrieving revision 1.105 diff -u -p -r1.105 kern_clock.c --- kern/kern_clock.c 14 Aug 2022 01:58:27 -0000 1.105 +++ kern/kern_clock.c 8 Dec 2022 06:10:16 -0000 @@ -48,6 +48,7 @@ #include #include #include +#include #if defined(GPROF) || defined(DDBPROF) #include @@ -138,6 +139,11 @@ hardclock(struct clockframe *frame) struct proc *p; struct cpu_info *ci = curcpu(); + LLTRACE_CPU(ci, lltrace_statclock, + CLKF_USERMODE(frame), CLKF_PC(frame)); + + LLTRACE_CPU(ci, lltrace_irq, LLTRACE_IRQ_LOCAL_TIMER, 0); + p = curproc; if (p && ((p->p_flag & (P_SYSTEM | P_WEXIT)) == 0)) { struct process *pr = p->p_p; @@ -177,17 +183,18 @@ hardclock(struct clockframe *frame) * If we are not the primary CPU, we're not allowed to do * any more work. */ - if (CPU_IS_PRIMARY(ci) == 0) - return; + if (CPU_IS_PRIMARY(ci)) { + tc_ticktock(); + ticks++; + jiffies++; - tc_ticktock(); - ticks++; - jiffies++; + /* + * Update the timeout wheel. + */ + timeout_hardclock_update(); + } - /* - * Update the timeout wheel. - */ - timeout_hardclock_update(); + LLTRACE_CPU(ci, lltrace_irqret, LLTRACE_IRQ_LOCAL_TIMER, 0); } /* Index: kern/kern_exec.c =================================================================== RCS file: /cvs/src/sys/kern/kern_exec.c,v retrieving revision 1.240 diff -u -p -r1.240 kern_exec.c --- kern/kern_exec.c 23 Nov 2022 11:00:27 -0000 1.240 +++ kern/kern_exec.c 8 Dec 2022 06:10:16 -0000 @@ -48,6 +48,7 @@ #include #include #include +#include #include #include #include @@ -510,6 +511,8 @@ sys_execve(struct proc *p, void *v, regi memset(pr->ps_comm, 0, sizeof(pr->ps_comm)); strlcpy(pr->ps_comm, nid.ni_cnd.cn_nameptr, sizeof(pr->ps_comm)); pr->ps_acflag &= ~AFORK; + + LLTRACE(lltrace_pidname, p); /* record proc's vnode, for use by sysctl */ otvp = pr->ps_textvp; Index: kern/kern_lock.c =================================================================== RCS file: /cvs/src/sys/kern/kern_lock.c,v retrieving revision 1.72 diff -u -p -r1.72 kern_lock.c --- kern/kern_lock.c 26 Apr 2022 15:31:14 -0000 1.72 +++ kern/kern_lock.c 8 Dec 2022 06:10:16 -0000 @@ -24,6 +24,7 @@ #include #include #include +#include #include @@ -139,6 +140,8 @@ __mp_lock(struct __mp_lock *mpl) LOP_EXCLUSIVE | LOP_NEWORDER, NULL); #endif + LLTRACE(lltrace_klock, mpl, LLTRACE_LOCK_NOACQUIRE); + s = intr_disable(); if (cpu->mplc_depth++ == 0) cpu->mplc_ticket = atomic_inc_int_nv(&mpl->mpl_users); @@ -148,6 +151,8 @@ __mp_lock(struct __mp_lock *mpl) membar_enter_after_atomic(); WITNESS_LOCK(&mpl->mpl_lock_obj, LOP_EXCLUSIVE); + + LLTRACE(lltrace_klock, mpl, LLTRACE_LOCK_ACQUIRE); } void @@ -167,6 +172,7 @@ __mp_unlock(struct __mp_lock *mpl) s = intr_disable(); if (--cpu->mplc_depth == 0) { + LLTRACE(lltrace_klock, mpl, LLTRACE_LOCK_WAKEUP); membar_exit(); mpl->mpl_ticket++; } @@ -183,6 +189,8 @@ __mp_release_all(struct __mp_lock *mpl) int i; #endif + LLTRACE(lltrace_klock, mpl, LLTRACE_LOCK_WAKEUP); + s = intr_disable(); rv = cpu->mplc_depth; #ifdef WITNESS @@ -443,3 +451,47 @@ _mtx_init_flags(struct mutex *m, int ipl _mtx_init(m, ipl); } #endif /* WITNESS */ + +#if 0 +void +NET_LOCK(void) +{ + LLTRACE(lltrace_lock, &netlock, LLTRACE_LOCK_NOACQUIRE); + rw_enter_write(&netlock); + LLTRACE(lltrace_lock, &netlock, LLTRACE_LOCK_ACQUIRE); +} + +void +NET_UNLOCK(void) +{ + rw_exit_write(&netlock); + LLTRACE(lltrace_lock, &netlock, LLTRACE_LOCK_WAKEUP); +} + +void +NET_RLOCK_IN_SOFTNET(void) +{ + LLTRACE(lltrace_lock, &netlock, LLTRACE_RLOCK_NOACQUIRE); + rw_enter_read(&netlock); + LLTRACE(lltrace_lock, &netlock, LLTRACE_RLOCK_ACQUIRE); +} + +void +NET_RUNLOCK_IN_SOFTNET(void) +{ + rw_exit_read(&netlock); + LLTRACE(lltrace_lock, &netlock, LLTRACE_RLOCK_WAKEUP); +} + +void +NET_RLOCK_IN_IOCTL(void) +{ + rw_enter_read(&netlock); +} + +void +NET_RUNLOCK_IN_IOCTL(void) +{ + rw_exit_read(&netlock); +} +#endif Index: kern/kern_sched.c =================================================================== RCS file: /cvs/src/sys/kern/kern_sched.c,v retrieving revision 1.76 diff -u -p -r1.76 kern_sched.c --- kern/kern_sched.c 5 Dec 2022 23:18:37 -0000 1.76 +++ kern/kern_sched.c 8 Dec 2022 06:10:16 -0000 @@ -184,6 +184,8 @@ sched_idle(void *v) wakeup(spc); } #endif + + LLTRACE(lltrace_idle); cpu_idle_cycle(); } cpu_idle_leave(); Index: kern/kern_sensors.c =================================================================== RCS file: /cvs/src/sys/kern/kern_sensors.c,v retrieving revision 1.40 diff -u -p -r1.40 kern_sensors.c --- kern/kern_sensors.c 5 Dec 2022 23:18:37 -0000 1.40 +++ kern/kern_sensors.c 8 Dec 2022 06:10:16 -0000 @@ -27,6 +27,7 @@ #include #include #include +#include #include #include "hotplug.h" @@ -260,8 +261,11 @@ sensor_task_work(void *xst) atomic_inc_int(&sensors_running); rw_enter_write(&st->lock); period = st->period; - if (period > 0 && !sensors_quiesced) + if (period > 0 && !sensors_quiesced) { + LLTRACE(lltrace_fn_enter, st->func); st->func(st->arg); + LLTRACE(lltrace_fn_leave, st->func); + } rw_exit_write(&st->lock); if (atomic_dec_int_nv(&sensors_running) == 0) { if (sensors_quiesced) Index: kern/kern_synch.c =================================================================== RCS file: /cvs/src/sys/kern/kern_synch.c,v retrieving revision 1.190 diff -u -p -r1.190 kern_synch.c --- kern/kern_synch.c 14 Aug 2022 01:58:27 -0000 1.190 +++ kern/kern_synch.c 8 Dec 2022 06:10:16 -0000 @@ -37,6 +37,8 @@ * @(#)kern_synch.c 8.6 (Berkeley) 1/21/94 */ +#include "llt.h" + #include #include #include @@ -299,7 +301,7 @@ rwsleep(const volatile void *ident, stru struct sleep_state sls; int error, status; - KASSERT((priority & ~(PRIMASK | PCATCH | PNORELOCK)) == 0); + KASSERT((priority & ~(PRIMASK | PCATCH | PNORELOCK | PLLTRACE)) == 0); KASSERT(ident != &nowake || ISSET(priority, PCATCH) || timo != 0); rw_assert_anylock(rwl); status = rw_status(rwl); @@ -307,11 +309,26 @@ rwsleep(const volatile void *ident, stru sleep_setup(&sls, ident, priority, wmesg, timo); rw_exit(rwl); + +#if NLLT > 0 + if (priority & PLLTRACE) + LLTRACE(lltrace_lock, rwl, LLTRACE_LOCK_WAKEUP); +#endif + /* signal may stop the process, release rwlock before that */ error = sleep_finish(&sls, 1); - if ((priority & PNORELOCK) == 0) + if ((priority & PNORELOCK) == 0) { +#if NLLT > 0 + if (priority & PLLTRACE) + LLTRACE(lltrace_lock, rwl, LLTRACE_LOCK_NOACQUIRE); +#endif rw_enter(rwl, status); +#if NLLT > 0 + if (priority & PLLTRACE) + LLTRACE(lltrace_lock, rwl, LLTRACE_LOCK_ACQUIRE); +#endif + } return error; } @@ -520,6 +537,7 @@ unsleep(struct proc *p) p->p_wchan = NULL; TRACEPOINT(sched, wakeup, p->p_tid + THREAD_PID_OFFSET, p->p_p->ps_pid); + LLTRACE(lltrace_runnable, p); } } Index: kern/kern_tc.c =================================================================== RCS file: /cvs/src/sys/kern/kern_tc.c,v retrieving revision 1.80 diff -u -p -r1.80 kern_tc.c --- kern/kern_tc.c 5 Dec 2022 23:18:37 -0000 1.80 +++ kern/kern_tc.c 8 Dec 2022 06:10:16 -0000 @@ -140,6 +140,14 @@ tc_delta(struct timehands *th) tc->tc_counter_mask); } +unsigned int +countertime(void) +{ + struct timecounter *tc = timehands->th_counter; + + return (tc->tc_get_timecount(tc)); +} + /* * Functions for reading the time. We have to loop until we are sure that * the timehands that we operated on was not updated under our feet. See Index: kern/kern_timeout.c =================================================================== RCS file: /cvs/src/sys/kern/kern_timeout.c,v retrieving revision 1.89 diff -u -p -r1.89 kern_timeout.c --- kern/kern_timeout.c 5 Dec 2022 23:18:37 -0000 1.89 +++ kern/kern_timeout.c 8 Dec 2022 06:10:16 -0000 @@ -35,6 +35,7 @@ #include /* _Q_INVALIDATE */ #include #include +#include #ifdef DDB #include @@ -701,6 +702,8 @@ softclock(void *arg) struct timeout *first_new, *to; int needsproc, new; + LLTRACE(lltrace_irq, LLTRACE_IRQ_BOTTOM_HALF, 0); + first_new = NULL; new = 0; @@ -724,6 +727,8 @@ softclock(void *arg) if (needsproc) wakeup(&timeout_proc); + + LLTRACE(lltrace_irqret, LLTRACE_IRQ_BOTTOM_HALF, 0); } void Index: kern/sched_bsd.c =================================================================== RCS file: /cvs/src/sys/kern/sched_bsd.c,v retrieving revision 1.73 diff -u -p -r1.73 sched_bsd.c --- kern/sched_bsd.c 5 Dec 2022 23:18:37 -0000 1.73 +++ kern/sched_bsd.c 8 Dec 2022 06:10:16 -0000 @@ -53,7 +53,6 @@ #include #endif - int lbolt; /* once a second sleep address */ int rrticks_init; /* # of hardclock ticks per roundrobin() */ @@ -322,6 +321,8 @@ mi_switch(void) int sched_count; #endif + LLTRACE(lltrace_sched_enter); + assertwaitok(); KASSERT(p->p_stat != SONPROC); @@ -369,10 +370,13 @@ mi_switch(void) if (p != nextproc) { uvmexp.swtch++; + TRACEPOINT(sched, off__cpu, nextproc->p_tid + THREAD_PID_OFFSET, nextproc->p_p->ps_pid); cpu_switchto(p, nextproc); TRACEPOINT(sched, on__cpu, NULL); + + LLTRACE(lltrace_pidname, p); } else { TRACEPOINT(sched, remain__cpu, NULL); p->p_stat = SONPROC; @@ -393,6 +397,8 @@ mi_switch(void) #endif SCHED_ASSERT_UNLOCKED(); + + LLTRACE(lltrace_sched_leave); smr_idle(); Index: kern/subr_pool.c =================================================================== RCS file: /cvs/src/sys/kern/subr_pool.c,v retrieving revision 1.236 diff -u -p -r1.236 subr_pool.c --- kern/subr_pool.c 14 Aug 2022 01:58:28 -0000 1.236 +++ kern/subr_pool.c 8 Dec 2022 06:10:16 -0000 @@ -2038,9 +2038,8 @@ pool_cache_gc(struct pool *pp) contention = pp->pr_cache_contention; delta = contention - pp->pr_cache_contention_prev; - if (delta > 8 /* magic */) { - if ((ncpusfound * 8 * 2) <= pp->pr_cache_nitems) - pp->pr_cache_items += 8; + if ((ncpusfound * 8 * 2) <= pp->pr_cache_nitems) { + pp->pr_cache_items += (delta > 8 /* magic */) ? 8 : 1; } else if (delta == 0) { if (pp->pr_cache_items > 8) pp->pr_cache_items--; Index: net/art.c =================================================================== RCS file: /cvs/src/sys/net/art.c,v retrieving revision 1.29 diff -u -p -r1.29 art.c --- net/art.c 12 Nov 2020 15:25:28 -0000 1.29 +++ net/art.c 8 Dec 2022 06:10:16 -0000 @@ -899,11 +899,27 @@ art_get(void *dst, uint8_t plen) an->an_plen = plen; SRPL_INIT(&an->an_rtlist); + refcnt_init(&an->an_refcnt); + + return (an); +} + +struct art_node * +art_take(struct art_node *an) +{ + refcnt_take(&an->an_refcnt); return (an); } void +art_rele(struct art_node *an) +{ + if (refcnt_rele(&an->an_refcnt)) + pool_put(&an_pool, an); +} + +void art_put(struct art_node *an) { KASSERT(SRPL_EMPTY_LOCKED(&an->an_rtlist)); @@ -931,7 +947,7 @@ art_gc(void *null) srp_finalize(an, "artnfini"); - pool_put(&an_pool, an); + art_rele(an); an = next; } Index: net/art.h =================================================================== RCS file: /cvs/src/sys/net/art.h,v retrieving revision 1.21 diff -u -p -r1.21 art.h --- net/art.h 2 Mar 2021 17:50:41 -0000 1.21 +++ net/art.h 8 Dec 2022 06:10:16 -0000 @@ -20,6 +20,7 @@ #define _NET_ART_H_ #include +#include #include #define ART_MAXLVL 32 /* We currently use 32 levels for IPv6. */ @@ -89,6 +90,7 @@ struct art_node { SRPL_HEAD(, rtentry) an__rtlist; /* Route related to this node */ struct art_node *an__gc; /* Entry on GC list */ } an_pointer; + struct refcnt an_refcnt; uint8_t an_plen; /* Prefix length */ }; #define an_rtlist an_pointer.an__rtlist @@ -107,6 +109,8 @@ int art_walk(struct art_root *, int (*)(struct art_node *, void *), void *); struct art_node *art_get(void *, uint8_t); +struct art_node *art_take(struct art_node *); +void art_rele(struct art_node *); void art_put(struct art_node *); #endif /* _NET_ART_H_ */ Index: net/ethertypes.h =================================================================== RCS file: /cvs/src/sys/net/ethertypes.h,v retrieving revision 1.16 diff -u -p -r1.16 ethertypes.h --- net/ethertypes.h 5 Jan 2022 05:19:22 -0000 1.16 +++ net/ethertypes.h 8 Dec 2022 06:10:16 -0000 @@ -303,6 +303,8 @@ #define ETHERTYPE_AOE 0x88A2 /* ATA over Ethernet */ #define ETHERTYPE_QINQ 0x88A8 /* 802.1ad VLAN stacking */ #define ETHERTYPE_LLDP 0x88CC /* Link Layer Discovery Protocol */ +#define ETHERTYPE_802_EX1 0x88B5 /* IEEE Std 802 - Local Experimental */ +#define ETHERTYPE_802_EX2 0x88B6 /* IEEE Std 802 - Local Experimental */ #define ETHERTYPE_MACSEC 0x88e5 /* 802.1AE MACsec */ #define ETHERTYPE_PBB 0x88e7 /* 802.1Q Provider Backbone Bridging */ #define ETHERTYPE_NSH 0x984F /* Network Service Header (RFC8300) */ Index: net/if_aggr.c =================================================================== RCS file: /cvs/src/sys/net/if_aggr.c,v retrieving revision 1.39 diff -u -p -r1.39 if_aggr.c --- net/if_aggr.c 5 Feb 2022 03:56:16 -0000 1.39 +++ net/if_aggr.c 8 Dec 2022 06:10:16 -0000 @@ -299,7 +299,10 @@ static const char *lacp_mux_event_names[ * aggr interface */ -#define AGGR_MAX_PORTS 32 +#define AGGR_PORT_BITS 5 +#define AGGR_FLOWID_SHIFT (16 - AGGR_PORT_BITS) + +#define AGGR_MAX_PORTS (1 << AGGR_PORT_BITS) #define AGGR_MAX_SLOW_PKTS (AGGR_MAX_PORTS * 3) struct aggr_multiaddr { @@ -660,7 +663,7 @@ aggr_transmit(struct aggr_softc *sc, con #endif if (ISSET(m->m_pkthdr.csum_flags, M_FLOWID)) - flow = m->m_pkthdr.ph_flowid; + flow = m->m_pkthdr.ph_flowid >> AGGR_FLOWID_SHIFT; ifp0 = map->m_ifp0s[flow % AGGR_MAX_PORTS]; Index: net/if_pfsync.c =================================================================== RCS file: /cvs/src/sys/net/if_pfsync.c,v retrieving revision 1.311 diff -u -p -r1.311 if_pfsync.c --- net/if_pfsync.c 11 Nov 2022 11:47:13 -0000 1.311 +++ net/if_pfsync.c 8 Dec 2022 06:10:16 -0000 @@ -157,16 +157,16 @@ const struct { }; struct pfsync_q { - void (*write)(struct pf_state *, void *); + int (*write)(struct pf_state *, void *); size_t len; u_int8_t action; }; /* we have one of these for every PFSYNC_S_ */ -void pfsync_out_state(struct pf_state *, void *); -void pfsync_out_iack(struct pf_state *, void *); -void pfsync_out_upd_c(struct pf_state *, void *); -void pfsync_out_del(struct pf_state *, void *); +int pfsync_out_state(struct pf_state *, void *); +int pfsync_out_iack(struct pf_state *, void *); +int pfsync_out_upd_c(struct pf_state *, void *); +int pfsync_out_del(struct pf_state *, void *); struct pfsync_q pfsync_qs[] = { { pfsync_out_iack, sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK }, @@ -1301,24 +1301,26 @@ pfsyncioctl(struct ifnet *ifp, u_long cm return (0); } -void +int pfsync_out_state(struct pf_state *st, void *buf) { struct pfsync_state *sp = buf; pf_state_export(sp, st); + return (0); } -void +int pfsync_out_iack(struct pf_state *st, void *buf) { struct pfsync_ins_ack *iack = buf; iack->id = st->id; iack->creatorid = st->creatorid; + return (0); } -void +int pfsync_out_upd_c(struct pf_state *st, void *buf) { struct pfsync_upd_c *up = buf; @@ -1329,9 +1331,10 @@ pfsync_out_upd_c(struct pf_state *st, vo pf_state_peer_hton(&st->dst, &up->dst); up->creatorid = st->creatorid; up->timeout = st->timeout; + return (0); } -void +int pfsync_out_del(struct pf_state *st, void *buf) { struct pfsync_del_c *dp = buf; @@ -1340,6 +1343,7 @@ pfsync_out_del(struct pf_state *st, void dp->creatorid = st->creatorid; SET(st->state_flags, PFSTATE_NOSYNC); + return (0); } void @@ -1671,8 +1675,8 @@ pfsync_sendout(void) KASSERT(st->snapped == 1); st->sync_state = PFSYNC_S_NONE; st->snapped = 0; - pfsync_qs[q].write(st, m->m_data + offset); - offset += pfsync_qs[q].len; + if (pfsync_qs[q].write(st, m->m_data + offset) == 0) + offset += pfsync_qs[q].len; pf_state_unref(st); count++; Index: net/if_pfsync.h =================================================================== RCS file: /cvs/src/sys/net/if_pfsync.h,v retrieving revision 1.59 diff -u -p -r1.59 if_pfsync.h --- net/if_pfsync.h 11 Nov 2022 11:47:13 -0000 1.59 +++ net/if_pfsync.h 8 Dec 2022 06:10:16 -0000 @@ -326,7 +326,7 @@ int pfsync_sysctl(int *, u_int, void #define PFSYNC_SI_CKSUM 0x02 #define PFSYNC_SI_ACK 0x04 int pfsync_state_import(struct pfsync_state *, int); -void pfsync_state_export(struct pfsync_state *, +int pfsync_state_export(struct pfsync_state *, struct pf_state *); void pfsync_insert_state(struct pf_state *); Index: net/if_vlan.c =================================================================== RCS file: /cvs/src/sys/net/if_vlan.c,v retrieving revision 1.210 diff -u -p -r1.210 if_vlan.c --- net/if_vlan.c 10 Aug 2022 09:01:48 -0000 1.210 +++ net/if_vlan.c 8 Dec 2022 06:10:16 -0000 @@ -253,8 +253,16 @@ vlan_transmit(struct vlan_softc *sc, str uint8_t prio; #if NBPFILTER > 0 - if (ifp->if_bpf) - bpf_mtap_ether(ifp->if_bpf, m, BPF_DIRECTION_OUT); + { + caddr_t if_bpf = READ_ONCE(ifp->if_bpf); + if (if_bpf) { + if (bpf_mtap_ether(ifp->if_bpf, + m, BPF_DIRECTION_OUT)) { + m_freem(m); + return; + } + } + } #endif /* NBPFILTER > 0 */ prio = (txprio == IF_HDRPRIO_PACKET) ? Index: net/ifq.c =================================================================== RCS file: /cvs/src/sys/net/ifq.c,v retrieving revision 1.47 diff -u -p -r1.47 ifq.c --- net/ifq.c 22 Nov 2022 03:40:53 -0000 1.47 +++ net/ifq.c 8 Dec 2022 06:10:16 -0000 @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -127,7 +128,10 @@ ifq_is_serialized(struct ifqueue *ifq) void ifq_start(struct ifqueue *ifq) { - if (ifq_len(ifq) >= min(ifq->ifq_if->if_txmit, ifq->ifq_maxlen)) { + struct ifnet *ifp = ifq->ifq_if; + + if (ISSET(ifp->if_xflags, IFXF_MPSAFE) && + ifq_len(ifq) >= min(ifp->if_txmit, ifq->ifq_maxlen)) { task_del(ifq->ifq_softnet, &ifq->ifq_bundle); ifq_run_start(ifq); } else @@ -144,6 +148,7 @@ ifq_start_task(void *p) ifq_empty(ifq) || ifq_is_oactive(ifq)) return; + LLTRACE(lltrace_pkts, LLTRACE_PKTS_T_IFQ, ifq_len(ifq)); ifp->if_qstart(ifq); } @@ -154,6 +159,7 @@ ifq_restart_task(void *p) struct ifnet *ifp = ifq->ifq_if; ifq_clr_oactive(ifq); + LLTRACE(lltrace_pkts, LLTRACE_PKTS_T_IFQ, ifq_len(ifq)); ifp->if_qstart(ifq); } @@ -243,7 +249,8 @@ void ifq_init(struct ifqueue *ifq, struct ifnet *ifp, unsigned int idx) { ifq->ifq_if = ifp; - ifq->ifq_softnet = net_tq(ifp->if_index + idx); + ifq->ifq_softnet = ISSET(ifp->if_xflags, IFXF_MPSAFE) ? + net_tq(idx) : systq; ifq->ifq_softc = NULL; mtx_init(&ifq->ifq_mtx, IPL_NET); @@ -577,8 +584,12 @@ ifq_mfreeml(struct ifqueue *ifq, struct #if NKSTAT > 0 struct ifiq_kstat_data { + struct kstat_kv kd_histogram[IFIQ_HISTOGRAM_BUCKETS]; + struct kstat_kv kd_packets; struct kstat_kv kd_bytes; + struct kstat_kv kd_hdrops; + struct kstat_kv kd_fdrops; struct kstat_kv kd_qdrops; struct kstat_kv kd_errors; struct kstat_kv kd_qlen; @@ -588,10 +599,37 @@ struct ifiq_kstat_data { }; static const struct ifiq_kstat_data ifiq_kstat_tpl = { + { + KSTAT_KV_UNIT_INITIALIZER("histo1", + KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS), + KSTAT_KV_UNIT_INITIALIZER("histo2", + KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS), + KSTAT_KV_UNIT_INITIALIZER("histo4", + KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS), + KSTAT_KV_UNIT_INITIALIZER("histo8", + KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS), + KSTAT_KV_UNIT_INITIALIZER("histo16", + KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS), + KSTAT_KV_UNIT_INITIALIZER("histo32", + KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS), + KSTAT_KV_UNIT_INITIALIZER("histo64", + KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS), + KSTAT_KV_UNIT_INITIALIZER("histo128", + KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS), + KSTAT_KV_UNIT_INITIALIZER("histo256", + KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS), + KSTAT_KV_UNIT_INITIALIZER("histo512", + KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS), + }, + KSTAT_KV_UNIT_INITIALIZER("packets", KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS), KSTAT_KV_UNIT_INITIALIZER("bytes", KSTAT_KV_T_COUNTER64, KSTAT_KV_U_BYTES), + KSTAT_KV_UNIT_INITIALIZER("hdrops", + KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS), + KSTAT_KV_UNIT_INITIALIZER("fdrops", + KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS), KSTAT_KV_UNIT_INITIALIZER("qdrops", KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS), KSTAT_KV_UNIT_INITIALIZER("errors", @@ -610,10 +648,16 @@ ifiq_kstat_copy(struct kstat *ks, void * { struct ifiqueue *ifiq = ks->ks_softc; struct ifiq_kstat_data *kd = dst; + size_t i; *kd = ifiq_kstat_tpl; + for (i = 0; i < IFIQ_HISTOGRAM_BUCKETS; i++) + kstat_kv_u64(&kd->kd_histogram[i]) = ifiq->ifiq_histogram[i]; + kstat_kv_u64(&kd->kd_packets) = ifiq->ifiq_packets; kstat_kv_u64(&kd->kd_bytes) = ifiq->ifiq_bytes; + kstat_kv_u64(&kd->kd_hdrops) = ifiq->ifiq_hdrops; + kstat_kv_u64(&kd->kd_fdrops) = ifiq->ifiq_fdrops; kstat_kv_u64(&kd->kd_qdrops) = ifiq->ifiq_qdrops; kstat_kv_u64(&kd->kd_errors) = ifiq->ifiq_errors; kstat_kv_u32(&kd->kd_qlen) = ml_len(&ifiq->ifiq_ml); @@ -631,7 +675,8 @@ void ifiq_init(struct ifiqueue *ifiq, struct ifnet *ifp, unsigned int idx) { ifiq->ifiq_if = ifp; - ifiq->ifiq_softnet = net_tq(ifp->if_index + idx); + ifiq->ifiq_softnet = ISSET(ifp->if_xflags, IFXF_MPSAFE) ? + net_tq(idx) : systq; ifiq->ifiq_softc = NULL; mtx_init(&ifiq->ifiq_mtx, IPL_NET); @@ -674,8 +719,8 @@ ifiq_destroy(struct ifiqueue *ifiq) ml_purge(&ifiq->ifiq_ml); } -unsigned int ifiq_maxlen_drop = 2048 * 5; -unsigned int ifiq_maxlen_return = 2048 * 3; +unsigned int ifiq_pressure_drop = 8; +unsigned int ifiq_pressure_return = 6; int ifiq_input(struct ifiqueue *ifiq, struct mbuf_list *ml) @@ -684,7 +729,10 @@ ifiq_input(struct ifiqueue *ifiq, struct struct mbuf *m; uint64_t packets; uint64_t bytes = 0; - unsigned int len; + unsigned int pressure = 0; + unsigned int fdrops = 0; + unsigned int hdrops = 0; + unsigned int bucket; #if NBPFILTER > 0 caddr_t if_bpf; #endif @@ -696,8 +744,17 @@ ifiq_input(struct ifiqueue *ifiq, struct m->m_pkthdr.ph_ifidx = ifp->if_index; m->m_pkthdr.ph_rtableid = ifp->if_rdomain; bytes += m->m_pkthdr.len; + hdrops += m->m_pkthdr.ph_drops; } packets = ml_len(ml); + bucket = fls(packets); + if (bucket > IFIQ_HISTOGRAM_BUCKETS) + bucket = IFIQ_HISTOGRAM_BUCKETS; + bucket--; + + LLTRACE(lltrace_pkts, LLTRACE_PKTS_T_IFIQ, packets); + if (hdrops) + LLTRACE(lltrace_pkts, LLTRACE_PKTS_T_HDROP, hdrops); #if NBPFILTER > 0 if_bpf = ifp->if_bpf; @@ -707,16 +764,20 @@ ifiq_input(struct ifiqueue *ifiq, struct ml_init(ml); while ((m = ml_dequeue(&ml0)) != NULL) { - if ((*ifp->if_bpf_mtap)(if_bpf, m, BPF_DIRECTION_IN)) + if ((*ifp->if_bpf_mtap)(if_bpf, m, BPF_DIRECTION_IN)) { + fdrops++; m_freem(m); - else + } else ml_enqueue(ml, m); } if (ml_empty(ml)) { mtx_enter(&ifiq->ifiq_mtx); + ifiq->ifiq_histogram[bucket]++; ifiq->ifiq_packets += packets; ifiq->ifiq_bytes += bytes; + ifiq->ifiq_hdrops += hdrops; + ifiq->ifiq_fdrops += fdrops; mtx_leave(&ifiq->ifiq_mtx); return (0); @@ -725,12 +786,15 @@ ifiq_input(struct ifiqueue *ifiq, struct #endif mtx_enter(&ifiq->ifiq_mtx); + ifiq->ifiq_histogram[bucket]++; ifiq->ifiq_packets += packets; ifiq->ifiq_bytes += bytes; + ifiq->ifiq_hdrops += hdrops; + ifiq->ifiq_fdrops += fdrops; - len = ml_len(&ifiq->ifiq_ml); if (__predict_true(!ISSET(ifp->if_xflags, IFXF_MONITOR))) { - if (len > ifiq_maxlen_drop) + pressure = ++ifiq->ifiq_pressure; + if (pressure > ifiq_pressure_drop) ifiq->ifiq_qdrops += ml_len(ml); else { ifiq->ifiq_enqueues++; @@ -741,10 +805,12 @@ ifiq_input(struct ifiqueue *ifiq, struct if (ml_empty(ml)) task_add(ifiq->ifiq_softnet, &ifiq->ifiq_task); - else + else { + LLTRACE(lltrace_pkts, LLTRACE_PKTS_T_QDROP, ml_len(ml)); ml_purge(ml); + } - return (len > ifiq_maxlen_return); + return (pressure > ifiq_pressure_return); } void @@ -754,6 +820,7 @@ ifiq_add_data(struct ifiqueue *ifiq, str data->ifi_ipackets += ifiq->ifiq_packets; data->ifi_ibytes += ifiq->ifiq_bytes; data->ifi_iqdrops += ifiq->ifiq_qdrops; + data->ifi_collisions += ifiq->ifiq_hdrops; mtx_leave(&ifiq->ifiq_mtx); } @@ -779,11 +846,14 @@ ifiq_process(void *arg) return; mtx_enter(&ifiq->ifiq_mtx); + ifiq->ifiq_pressure = 0; ifiq->ifiq_dequeues++; ml = ifiq->ifiq_ml; ml_init(&ifiq->ifiq_ml); mtx_leave(&ifiq->ifiq_mtx); + LLTRACE(lltrace_pkts, LLTRACE_PKTS_T_NETTQ, ml_len(&ml)); + if_input_process(ifiq->ifiq_if, &ml); } @@ -792,8 +862,6 @@ net_ifiq_sysctl(int *name, u_int namelen void *newp, size_t newlen) { int error = EOPNOTSUPP; -/* pressure is disabled for 6.6-release */ -#if 0 int val; if (namelen != 1) @@ -822,7 +890,6 @@ net_ifiq_sysctl(int *name, u_int namelen error = EOPNOTSUPP; break; } -#endif return (error); } Index: net/ifq.h =================================================================== RCS file: /cvs/src/sys/net/ifq.h,v retrieving revision 1.35 diff -u -p -r1.35 ifq.h --- net/ifq.h 22 Nov 2022 03:40:53 -0000 1.35 +++ net/ifq.h 8 Dec 2022 06:10:16 -0000 @@ -72,6 +72,8 @@ struct ifqueue { unsigned int ifq_idx; }; +#define IFIQ_HISTOGRAM_BUCKETS 10 + struct ifiqueue { struct ifnet *ifiq_if; struct taskq *ifiq_softnet; @@ -88,8 +90,11 @@ struct ifiqueue { unsigned int ifiq_pressure; /* counters */ + uint64_t ifiq_histogram[IFIQ_HISTOGRAM_BUCKETS]; uint64_t ifiq_packets; uint64_t ifiq_bytes; + uint64_t ifiq_hdrops; + uint64_t ifiq_fdrops; uint64_t ifiq_qdrops; uint64_t ifiq_errors; uint64_t ifiq_mcasts; Index: net/pf.c =================================================================== RCS file: /cvs/src/sys/net/pf.c,v retrieving revision 1.1156 diff -u -p -r1.1156 pf.c --- net/pf.c 25 Nov 2022 20:27:53 -0000 1.1156 +++ net/pf.c 8 Dec 2022 06:10:16 -0000 @@ -185,7 +185,8 @@ int pf_translate_icmp_af(struct pf_pd void pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t, int, sa_family_t, struct pf_rule *, u_int); void pf_detach_state(struct pf_state *); -void pf_state_key_detach(struct pf_state *, int); +void pf_state_key_detach(struct pf_state *, + struct pf_state_key *); u_int32_t pf_tcp_iss(struct pf_pdesc *); void pf_rule_to_actions(struct pf_rule *, struct pf_rule_actions *); @@ -260,6 +261,9 @@ void pf_state_key_unlink_inpcb(struct void pf_inpcb_unlink_state_key(struct inpcb *); void pf_pktenqueue_delayed(void *); int32_t pf_state_expires(const struct pf_state *, uint8_t); +void pf_state_keys_take(struct pf_state *, + struct pf_state_key **); +void pf_state_keys_rele(struct pf_state_key **); #if NPFLOG > 0 void pf_log_matches(struct pf_pdesc *, struct pf_rule *, @@ -776,7 +780,8 @@ pf_state_key_attach(struct pf_state_key s->key[idx] = sk; if ((si = pool_get(&pf_state_item_pl, PR_NOWAIT)) == NULL) { - pf_state_key_detach(s, idx); + pf_state_key_detach(s, s->key[idx]); + s->key[idx] = NULL; return (-1); } si->s = s; @@ -796,42 +801,50 @@ pf_state_key_attach(struct pf_state_key void pf_detach_state(struct pf_state *s) { - if (s->key[PF_SK_WIRE] == s->key[PF_SK_STACK]) - s->key[PF_SK_WIRE] = NULL; + struct pf_state_key *key[2]; + + mtx_enter(&s->mtx); + key[PF_SK_WIRE] = s->key[PF_SK_WIRE]; + key[PF_SK_STACK] = s->key[PF_SK_STACK]; + s->key[PF_SK_WIRE] = NULL; + s->key[PF_SK_STACK] = NULL; + mtx_leave(&s->mtx); + + if (key[PF_SK_WIRE] == key[PF_SK_STACK]) + key[PF_SK_WIRE] = NULL; - if (s->key[PF_SK_STACK] != NULL) - pf_state_key_detach(s, PF_SK_STACK); + if (key[PF_SK_STACK] != NULL) + pf_state_key_detach(s, key[PF_SK_STACK]); - if (s->key[PF_SK_WIRE] != NULL) - pf_state_key_detach(s, PF_SK_WIRE); + if (key[PF_SK_WIRE] != NULL) + pf_state_key_detach(s, key[PF_SK_WIRE]); } void -pf_state_key_detach(struct pf_state *s, int idx) +pf_state_key_detach(struct pf_state *s, struct pf_state_key *key) { struct pf_state_item *si; - struct pf_state_key *sk; - if (s->key[idx] == NULL) + PF_STATE_ASSERT_LOCKED(); + + if (key == NULL) return; - si = TAILQ_FIRST(&s->key[idx]->states); + si = TAILQ_FIRST(&key->states); while (si && si->s != s) si = TAILQ_NEXT(si, entry); if (si) { - TAILQ_REMOVE(&s->key[idx]->states, si, entry); + TAILQ_REMOVE(&key->states, si, entry); pool_put(&pf_state_item_pl, si); } - sk = s->key[idx]; - s->key[idx] = NULL; - if (TAILQ_EMPTY(&sk->states)) { - RB_REMOVE(pf_state_tree, &pf_statetbl, sk); - sk->removed = 1; - pf_state_key_unlink_reverse(sk); - pf_state_key_unlink_inpcb(sk); - pf_state_key_unref(sk); + if (TAILQ_EMPTY(&key->states)) { + RB_REMOVE(pf_state_tree, &pf_statetbl, key); + key->removed = 1; + pf_state_key_unlink_reverse(key); + pf_state_key_unlink_inpcb(key); + pf_state_key_unref(key); } } @@ -994,7 +1007,9 @@ pf_state_insert(struct pfi_kif *kif, str } *skw = s->key[PF_SK_WIRE]; if (pf_state_key_attach(*sks, s, PF_SK_STACK)) { - pf_state_key_detach(s, PF_SK_WIRE); + pf_state_key_detach(s, s->key[PF_SK_WIRE]); + s->key[PF_SK_WIRE] = NULL; + *skw = NULL; PF_STATE_EXIT_WRITE(); return (-1); } @@ -1224,30 +1239,35 @@ pf_state_peer_ntoh(const struct pfsync_s } } -void +int pf_state_export(struct pfsync_state *sp, struct pf_state *st) { int32_t expire; + struct pf_state_key *key[2]; memset(sp, 0, sizeof(struct pfsync_state)); /* copy from state key */ - sp->key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0]; - sp->key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1]; - sp->key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0]; - sp->key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1]; - sp->key[PF_SK_WIRE].rdomain = htons(st->key[PF_SK_WIRE]->rdomain); - sp->key[PF_SK_WIRE].af = st->key[PF_SK_WIRE]->af; - sp->key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0]; - sp->key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1]; - sp->key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0]; - sp->key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1]; - sp->key[PF_SK_STACK].rdomain = htons(st->key[PF_SK_STACK]->rdomain); - sp->key[PF_SK_STACK].af = st->key[PF_SK_STACK]->af; + pf_state_keys_take(st, key); + if ((key[PF_SK_WIRE] == NULL) || (key[PF_SK_STACK] == NULL)) + return (-1); + + sp->key[PF_SK_WIRE].addr[0] = key[PF_SK_WIRE]->addr[0]; + sp->key[PF_SK_WIRE].addr[1] = key[PF_SK_WIRE]->addr[1]; + sp->key[PF_SK_WIRE].port[0] = key[PF_SK_WIRE]->port[0]; + sp->key[PF_SK_WIRE].port[1] = key[PF_SK_WIRE]->port[1]; + sp->key[PF_SK_WIRE].rdomain = htons(key[PF_SK_WIRE]->rdomain); + sp->key[PF_SK_WIRE].af = key[PF_SK_WIRE]->af; + sp->key[PF_SK_STACK].addr[0] = key[PF_SK_STACK]->addr[0]; + sp->key[PF_SK_STACK].addr[1] = key[PF_SK_STACK]->addr[1]; + sp->key[PF_SK_STACK].port[0] = key[PF_SK_STACK]->port[0]; + sp->key[PF_SK_STACK].port[1] = key[PF_SK_STACK]->port[1]; + sp->key[PF_SK_STACK].rdomain = htons(key[PF_SK_STACK]->rdomain); + sp->key[PF_SK_STACK].af = key[PF_SK_STACK]->af; sp->rtableid[PF_SK_WIRE] = htonl(st->rtableid[PF_SK_WIRE]); sp->rtableid[PF_SK_STACK] = htonl(st->rtableid[PF_SK_STACK]); - sp->proto = st->key[PF_SK_WIRE]->proto; - sp->af = st->key[PF_SK_WIRE]->af; + sp->proto = key[PF_SK_WIRE]->proto; + sp->af = key[PF_SK_WIRE]->af; /* copy from state */ strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname)); @@ -1294,6 +1314,10 @@ pf_state_export(struct pfsync_state *sp, sp->set_tos = st->set_tos; sp->set_prio[0] = st->set_prio[0]; sp->set_prio[1] = st->set_prio[1]; + + pf_state_keys_rele(key); + + return (0); } int @@ -8031,4 +8055,20 @@ pf_pktenqueue_delayed(void *arg) m_freem(pdy->m); pool_put(&pf_pktdelay_pl, pdy); +} + +void +pf_state_keys_take(struct pf_state *st, struct pf_state_key *keys[]) +{ + mtx_enter(&st->mtx); + keys[PF_SK_WIRE] = pf_state_key_ref(st->key[PF_SK_WIRE]); + keys[PF_SK_STACK] = pf_state_key_ref(st->key[PF_SK_STACK]); + mtx_leave(&st->mtx); +} + +void +pf_state_keys_rele(struct pf_state_key *keys[]) +{ + pf_state_key_unref(keys[PF_SK_WIRE]); + pf_state_key_unref(keys[PF_SK_STACK]); } Index: net/pfvar.h =================================================================== RCS file: /cvs/src/sys/net/pfvar.h,v retrieving revision 1.521 diff -u -p -r1.521 pfvar.h --- net/pfvar.h 25 Nov 2022 20:27:53 -0000 1.521 +++ net/pfvar.h 8 Dec 2022 06:10:16 -0000 @@ -741,7 +741,6 @@ struct pf_state_cmp { u_int8_t pad[3]; }; -/* struct pf_state.state_flags */ #define PFSTATE_ALLOWOPTS 0x0001 #define PFSTATE_SLOPPY 0x0002 #define PFSTATE_PFLOW 0x0004 @@ -1656,7 +1655,7 @@ void pf_state_rm_src_node(struct pf_ extern struct pf_state *pf_find_state_byid(struct pf_state_cmp *); extern struct pf_state *pf_find_state_all(struct pf_state_key_cmp *, u_int, int *); -extern void pf_state_export(struct pfsync_state *, +extern int pf_state_export(struct pfsync_state *, struct pf_state *); int pf_state_import(const struct pfsync_state *, int); Index: net/route.c =================================================================== RCS file: /cvs/src/sys/net/route.c,v retrieving revision 1.414 diff -u -p -r1.414 route.c --- net/route.c 29 Aug 2022 07:51:45 -0000 1.414 +++ net/route.c 8 Dec 2022 06:10:16 -0000 @@ -370,7 +370,7 @@ rtalloc(struct sockaddr *dst, int flags, int rt_setgwroute(struct rtentry *rt, u_int rtableid) { - struct rtentry *prt, *nhrt; + struct rtentry *prt, *nhrt, *nhrt0; unsigned int rdomain = rtable_l2(rtableid); int error; @@ -379,35 +379,45 @@ rt_setgwroute(struct rtentry *rt, u_int KASSERT(ISSET(rt->rt_flags, RTF_GATEWAY)); /* If we cannot find a valid next hop bail. */ - nhrt = rt_match(rt->rt_gateway, NULL, RT_RESOLVE, rdomain); - if (nhrt == NULL) + nhrt0 = rt_match(rt->rt_gateway, NULL, RT_RESOLVE, rdomain); + if (nhrt0 == NULL) return (ENOENT); - /* Next hop entry must be on the same interface. */ - if (nhrt->rt_ifidx != rt->rt_ifidx) { - struct sockaddr_in6 sa_mask; - - if (!ISSET(nhrt->rt_flags, RTF_LLINFO) || - !ISSET(nhrt->rt_flags, RTF_CLONED)) { - rtfree(nhrt); - return (EHOSTUNREACH); + rtref(nhrt0); + nhrt = nhrt0; + + while (ISSET(nhrt->rt_flags, RTF_LLINFO)) { + if (nhrt->rt_ifidx == rt->rt_ifidx) { + rtfree(nhrt0); + goto llinfo; } + nhrt = rtable_iterate(nhrt); + if (nhrt == NULL) + break; + } + rtfree(nhrt); + + if (ISSET(nhrt0->rt_flags, RTF_LLINFO)) { /* * We found a L2 entry, so we might have multiple - * RTF_CLONING routes for the same subnet. Query - * the first route of the multipath chain and iterate - * until we find the correct one. + * RTF_CLONING routes for the same subnet. Try to + * pick the correct one. */ - prt = rtable_lookup(rdomain, rt_key(nhrt->rt_parent), - rt_plen2mask(nhrt->rt_parent, &sa_mask), NULL, RTP_ANY); - rtfree(nhrt); + prt = rtable_first(nhrt0->rt_parent); + rtfree(nhrt0); + for (;;) { + if (prt == NULL) + return (EHOSTUNREACH); + + if (prt->rt_ifidx == rt->rt_ifidx) + break; - while (prt != NULL && prt->rt_ifidx != rt->rt_ifidx) prt = rtable_iterate(prt); + } - /* We found nothing or a non-cloning MPATH route. */ - if (prt == NULL || !ISSET(prt->rt_flags, RTF_CLONING)) { + /* We found a non-cloning MPATH route, we're done. */ + if (!ISSET(prt->rt_flags, RTF_CLONING)) { rtfree(prt); return (EHOSTUNREACH); } @@ -429,6 +439,7 @@ rt_setgwroute(struct rtentry *rt, u_int return (ENETUNREACH); } +llinfo: /* Next hop is valid so remove possible old cache. */ rt_putgwroute(rt); KASSERT(rt->rt_gwroute == NULL); Index: net/route.h =================================================================== RCS file: /cvs/src/sys/net/route.h,v retrieving revision 1.196 diff -u -p -r1.196 route.h --- net/route.h 28 Jun 2022 10:01:13 -0000 1.196 +++ net/route.h 8 Dec 2022 06:10:16 -0000 @@ -92,6 +92,8 @@ struct rt_metrics { #include #include +struct art_node; + struct rttimer; /* @@ -104,6 +106,7 @@ struct rttimer; */ struct rtentry { + struct art_node *rt_node; struct sockaddr *rt_dest; /* destination */ SRPL_ENTRY(rtentry) rt_next; /* Next multipath entry to our dst. */ struct sockaddr *rt_gateway; /* value */ Index: net/rtable.c =================================================================== RCS file: /cvs/src/sys/net/rtable.c,v retrieving revision 1.80 diff -u -p -r1.80 rtable.c --- net/rtable.c 29 Jun 2022 22:20:47 -0000 1.80 +++ net/rtable.c 8 Dec 2022 06:10:16 -0000 @@ -597,6 +597,7 @@ rtable_insert(unsigned int rtableid, str rt->rt_flags &= ~RTF_MPATH; rt->rt_dest = dst; rt->rt_plen = plen; + rt->rt_node = art_take(an); SRPL_INSERT_HEAD_LOCKED(&rt_rc, &an->an_rtlist, rt, rt_next); prev = art_insert(ar, an, addr, plen); @@ -604,6 +605,7 @@ rtable_insert(unsigned int rtableid, str SRPL_REMOVE_LOCKED(&rt_rc, &an->an_rtlist, rt, rtentry, rt_next); rt->rt_flags = rt_flags; + art_rele(rt->rt_node); art_put(an); if (prev == NULL) { @@ -636,6 +638,7 @@ rtable_insert(unsigned int rtableid, str } /* Put newly inserted entry at the right place. */ + rt->rt_node = art_take(an); rtable_mpath_insert(an, rt); } leave: @@ -677,6 +680,9 @@ rtable_delete(unsigned int rtableid, str goto leave; } + art_rele(rt->rt_node); + rt->rt_node = NULL; + /* * If other multipath route entries are still attached to * this ART node we only have to unlink it. @@ -775,6 +781,22 @@ rtable_iterate(struct rtentry *rt0) rtref(rt); SRPL_LEAVE(&sr); rtfree(rt0); + return (rt); +} + +struct rtentry * +rtable_first(struct rtentry *rtn) +{ + struct art_node *an; + struct rtentry *rt; + struct srp_ref sr; + + an = rtn->rt_node; + rt = SRPL_FIRST(&sr, &an->an_rtlist); + if (rt != NULL) + rtref(rt); + SRPL_LEAVE(&sr); + return (rt); } Index: net/rtable.h =================================================================== RCS file: /cvs/src/sys/net/rtable.h,v retrieving revision 1.26 diff -u -p -r1.26 rtable.h --- net/rtable.h 7 Nov 2020 09:51:40 -0000 1.26 +++ net/rtable.h 8 Dec 2022 06:10:16 -0000 @@ -57,6 +57,7 @@ int rtable_walk(unsigned int, sa_famil int rtable_mpath_capable(unsigned int, sa_family_t); struct rtentry *rtable_mpath_match(unsigned int, struct rtentry *, struct sockaddr *, uint8_t); +struct rtentry *rtable_first(struct rtentry *); int rtable_mpath_reprio(unsigned int, struct sockaddr *, int, uint8_t, struct rtentry *); Index: netinet/if_ether.c =================================================================== RCS file: /cvs/src/sys/netinet/if_ether.c,v retrieving revision 1.252 diff -u -p -r1.252 if_ether.c --- netinet/if_ether.c 7 Dec 2022 14:38:29 -0000 1.252 +++ netinet/if_ether.c 8 Dec 2022 06:10:16 -0000 @@ -556,7 +556,7 @@ in_arpinput(struct ifnet *ifp, struct mb struct sockaddr_in sin; struct in_addr isaddr, itaddr; char addr[INET_ADDRSTRLEN]; - int op, target = 0; + int op, resolve = 0; unsigned int rdomain; rdomain = rtable_l2(m->m_pkthdr.ph_rtableid); @@ -568,9 +568,6 @@ in_arpinput(struct ifnet *ifp, struct mb memcpy(&itaddr, ea->arp_tpa, sizeof(itaddr)); memcpy(&isaddr, ea->arp_spa, sizeof(isaddr)); - memset(&sin, 0, sizeof(sin)); - sin.sin_len = sizeof(sin); - sin.sin_family = AF_INET; if (ETHER_IS_MULTICAST(ea->arp_sha) && ETHER_IS_BROADCAST(ea->arp_sha)) { @@ -584,44 +581,66 @@ in_arpinput(struct ifnet *ifp, struct mb goto out; /* it's from me, ignore it. */ /* Check target against our interface addresses. */ + memset(&sin, 0, sizeof(sin)); + sin.sin_len = sizeof(sin); + sin.sin_family = AF_INET; sin.sin_addr = itaddr; rt = rtalloc(sintosa(&sin), 0, rdomain); - if (rtisvalid(rt) && ISSET(rt->rt_flags, RTF_LOCAL) && - rt->rt_ifidx == ifp->if_index) - target = 1; + while (rtisvalid(rt)) { + if (ISSET(rt->rt_flags, RTF_LOCAL) && + rt->rt_ifidx == ifp->if_index) { + resolve = RT_RESOLVE; + break; + } + + rt = rtable_iterate(rt); + } rtfree(rt); rt = NULL; #if NCARP > 0 - if (target && op == ARPOP_REQUEST && ifp->if_type == IFT_CARP && + if (resolve && op == ARPOP_REQUEST && ifp->if_type == IFT_CARP && !carp_iamatch(ifp)) goto out; #endif /* Do we have an ARP cache for the sender? Create if we are target. */ - rt = arplookup(&isaddr, target, 0, rdomain); + sin.sin_addr = isaddr; + for (rt = rtalloc(sintosa(&sin), resolve, rdomain); + rtisvalid(rt); rt = rtable_iterate(rt)) { + if (rt->rt_ifidx != ifp->if_index) + continue; + + if (!ISSET(rt->rt_flags, RTF_GATEWAY) && + ISSET(rt->rt_flags, RTF_LLINFO) && + rt->rt_gateway->sa_family == AF_LINK) + break; + } /* Check sender against our interface addresses. */ - if (rtisvalid(rt) && ISSET(rt->rt_flags, RTF_LOCAL) && - rt->rt_ifidx == ifp->if_index && isaddr.s_addr != INADDR_ANY) { - inet_ntop(AF_INET, &isaddr, addr, sizeof(addr)); - log(LOG_ERR, "duplicate IP address %s sent from ethernet " - "address %s\n", addr, ether_sprintf(ea->arp_sha)); - itaddr = isaddr; - } else if (rt != NULL) { - int error; - - KERNEL_LOCK(); - error = arpcache(ifp, ea, rt); - KERNEL_UNLOCK(); - if (error) - goto out; + if (rt != NULL) { + if (ISSET(rt->rt_flags, RTF_LOCAL) && + isaddr.s_addr != INADDR_ANY) { + inet_ntop(AF_INET, &isaddr, addr, sizeof(addr)); + log(LOG_ERR, "duplicate IP address %s sent from " + "ethernet address %s\n", addr, + ether_sprintf(ea->arp_sha)); + itaddr = isaddr; + } else { + int error; + + KERNEL_LOCK(); + error = arpcache(ifp, ea, rt); + KERNEL_UNLOCK(); + if (error) + goto out; + } } if (op == ARPOP_REQUEST) { uint8_t *eaddr; - if (target) { + if (resolve) { /* We already have all info for the reply */ eaddr = LLADDR(ifp->if_sadl); } else { Index: sys/conf.h =================================================================== RCS file: /cvs/src/sys/sys/conf.h,v retrieving revision 1.160 diff -u -p -r1.160 conf.h --- sys/conf.h 6 Nov 2022 13:03:52 -0000 1.160 +++ sys/conf.h 8 Dec 2022 06:10:16 -0000 @@ -326,6 +326,21 @@ extern struct cdevsw cdevsw[]; (dev_type_stop((*))) enodev, 0, \ (dev_type_mmap((*))) enodev } +/* open, close, read, ioctl, poll, kqfilter */ +#define cdev_lltrace_init(c,n) { \ + .d_open = dev_init(c,n,open), \ + .d_close = dev_init(c,n,close), \ + .d_read = dev_init(c,n,read), \ + .d_write = (dev_type_write((*))) enodev, \ + .d_ioctl = dev_init(c,n,ioctl), \ + .d_stop = (dev_type_stop((*))) enodev, \ + .d_tty = NULL, \ + .d_mmap = (dev_type_mmap((*))) enodev, \ + .d_type = 0, \ + .d_flags = 0, \ + .d_kqfilter = dev_init(c,n,kqfilter), \ +} + /* open, close, read, write, ioctl, stop, tty, mmap, kqfilter */ #define cdev_wsdisplay_init(c,n) { \ dev_init(c,n,open), dev_init(c,n,close), dev_init(c,n,read), \ @@ -610,6 +625,7 @@ cdev_decl(wsmux); cdev_decl(ksyms); cdev_decl(kstat); +cdev_decl(lltrace); cdev_decl(bio); cdev_decl(vscsi); Index: sys/lltrace.h =================================================================== RCS file: sys/lltrace.h diff -N sys/lltrace.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/lltrace.h 8 Dec 2022 06:10:16 -0000 @@ -0,0 +1,325 @@ +/* $OpenBSD$ */ + +/* + * Copyright (c) 2022 David Gwynne + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef _SYS_LLTRACE_H_ +#define _SYS_LLTRACE_H_ + +/* + * lltrace is heavily based KUTrace (kernel/userland tracing) by + * Richard L. Sites. + */ + +#define LLTRACE_NSLOTS 8192 + +struct lltrace_buffer { + uint64_t llt_slots[LLTRACE_NSLOTS]; +}; + +#define LLTIOCSTART _IO('t',128) +#define LLTIOCSTOP _IO('t',129) +#define LLTIOCFLUSH _IO('t',130) + +/* + * trace until all the buffers are used, or trace and reuse buffers. + */ +#define LLTRACE_MODE_HEAD 0 +#define LLTRACE_MODE_TAIL 1 +#define LLTRACE_MODE_COUNT 2 + +#define LLTIOCSMODE _IOW('t', 131, unsigned int) +#define LLTIOCGMODE _IOR('t', 131, unsigned int) + +/* + * how much memory in MB to allocate for lltrace_buffer structs + * during tracing. + */ + +#define LLTRACE_BLEN_MIN 1 +#define LLTRACE_BLEN_MAX 128 + +#define LLTIOCSBLEN _IOW('t', 132, unsigned int) +#define LLTIOCGBLEN _IOR('t', 132, unsigned int) + +/* + * lltrace collects kernel events in per-CPU buffers. + */ + +/* + * The first 8 words of the per-CPU buffer are dedicated to metadata + * about the CPU and the period of time over which events were + * collected. + */ + +struct lltrace_header { + /* slots[0] */ + uint64_t h_cpu; + + /* slots[1] */ + uint64_t h_boottime; + + /* slots[2] */ + uint64_t h_start_cy; + /* slots[3] */ + uint64_t h_start_ns; + /* slots[4] */ + uint64_t h_end_cy; + /* slots[5] */ + uint64_t h_end_ns; + + /* slots[6] */ + uint32_t h_idletid; + uint32_t h_tid; + /* slots[7] */ + uint64_t h_zero; +}; + +/* + * The high 32-bits of the trace entry contain a timestamp and event id. + */ + +#define LLTRACE_TIMESTAMP_SHIFT 44 +#define LLTRACE_TIMESTAMP_BITS 20 +#define LLTRACE_TIMESTAMP_MASK ((1ULL << LLTRACE_TIMESTAMP_BITS) - 1) + +#define LLTRACE_EVENT_SHIFT 32 +#define LLTRACE_EVENT_BITS 12 +#define LLTRACE_EVENT_MASK ((1ULL << LLTRACE_EVENT_BITS) - 1) + +/* + * The low 32-bits vary depending on the event id. + */ + +/* full 32 bits are used */ +#define LLTRACE_ARG32_SHIFT 0 +#define LLTRACE_ARG32_BITS 32 +#define LLTRACE_ARG32_MASK ((1ULL << LLTRACE_ARG32_BITS) - 1) + +/* layout for syscalls/traps/irqs */ +#define LLTRACE_ARG0_SHIFT 0 +#define LLTRACE_ARG0_BITS 16 +#define LLTRACE_ARG0_MASK ((1ULL << LLTRACE_ARG0_BITS) - 1) + +#define LLTRACE_RETVAL_SHIFT 16 +#define LLTRACE_RETVAL_BITS 8 +#define LLTRACE_RETVAL_MASK ((1ULL << LLTRACE_RETVAL_BITS) - 1) + +#define LLTRACE_DUR_SHIFT 24 +#define LLTRACE_DUR_BITS 8 +#define LLTRACE_DUR_MASK ((1ULL << LLTRACE_DUR_BITS) - 1) + +/* + * lltrace event types + */ + +/* + * the high 3 bits of the event id defines how the rest of the bits are used. + */ + +#define LLTRACE_EVENT_T_MASK (0x7ULL << 9) +#define LLTRACE_EVENT_T_VARLEN (0x0ULL << 9) +#define LLTRACE_EVENT_T_MARK (0x1ULL << 9) +#define LLTRACE_EVENT_T_IRQ (0x2ULL << 9) +#define LLTRACE_EVENT_T_SYSCALL (0x4ULL << 9) +#define LLTRACE_EVENT_T_SYSRET (0x5ULL << 9) + +/* + * variable len events use extra slots on the ring. + */ + +#define LLTRACE_EVENT_VARLEN_MASK (0x00fULL) /* low 4bits are the len */ + +#define LLTRACE_EVENT_PID (LLTRACE_EVENT_T_VARLEN | 0x10) +#define LLTRACE_EVENT_PID_ARG_KTHREAD (1U << 31) +#define LLTRACE_EVENT_LOCKNAME (LLTRACE_EVENT_T_VARLEN | 0x70) + +/* hardcode the space used by PC entries */ +#define LLTRACE_EVENT_PC_K (LLTRACE_EVENT_T_VARLEN | 0x80) +#define LLTRACE_EVENT_PC_U (LLTRACE_EVENT_T_VARLEN | 0x90) + +/* + * mark a particular event occuring + */ + +#define LLTRACE_EVENT_IDLE (LLTRACE_EVENT_T_MARK | 0x0) + +#define LLTRACE_EVENT_RUNNABLE (LLTRACE_EVENT_T_MARK | 0x1) + /* arg32 is tid */ + +#define LLTRACE_EVENT_TRAP (LLTRACE_EVENT_T_MARK | 0x2) +#define LLTRACE_EVENT_TRAPRET (LLTRACE_EVENT_T_MARK | 0x3) + /* arg32 is trap id */ +#define LLTRACE_TRAP_PAGEFAULT 14 /* as per kutrace */ + +#define LLTRACE_EVENT_SCHED (LLTRACE_EVENT_T_MARK | 0x4) +#define LLTRACE_EVENT_SCHEDRET (LLTRACE_EVENT_T_MARK | 0x5) + +#define LLTRACE_EVENT_IPI (LLTRACE_EVENT_T_MARK | 0x6) + /* arg32 is cpu */ + +#define LLTRACE_EVENT_PKTS (LLTRACE_EVENT_T_MARK | 0x7) +#define LLTRACE_PKTS_T_SHIFT 28 +#define LLTRACE_PKTS_T_MASK (0xf << LLTRACE_PKTS_T_SHIFT) +#define LLTRACE_PKTS_T_IFIQ (0x0 << LLTRACE_PKTS_T_SHIFT) +#define LLTRACE_PKTS_T_NETTQ (0x1 << LLTRACE_PKTS_T_SHIFT) +#define LLTRACE_PKTS_T_IFQ (0x2 << LLTRACE_PKTS_T_SHIFT) +#define LLTRACE_PKTS_T_QDROP (0x3 << LLTRACE_PKTS_T_SHIFT) +#define LLTRACE_PKTS_T_HDROP (0x4 << LLTRACE_PKTS_T_SHIFT) +#define LLTRACE_PKTS_V_SHIFT 0 +#define LLTRACE_PKTS_V_MASK (0xffff << LLTRACE_PKTS_V_SHIFT) + +#define LLTRACE_EVENT_LOCK(_t) (LLTRACE_EVENT_T_MARK | 0x10 | (_t)) +#define LLTRACE_LOCK_NOACQUIRE (0x00) +#define LLTRACE_LOCK_ACQUIRE (0x01) +#define LLTRACE_LOCK_WAKEUP (0x02) +#define LLTRACE_RLOCK_NOACQUIRE (0x03) +#define LLTRACE_RLOCK_ACQUIRE (0x04) +#define LLTRACE_RLOCK_WAKEUP (0x05) +#define LLTRACE_KLOCK_NOACQUIRE (0x06) +#define LLTRACE_KLOCK_ACQUIRE (0x07) +#define LLTRACE_KLOCK_WAKEUP (0x08) + +#define LLTRACE_EVENT_KFUNC_ENTER (LLTRACE_EVENT_T_MARK | 0xf0) +#define LLTRACE_EVENT_KFUNC_LEAVE (LLTRACE_EVENT_T_MARK | 0xf1) +#define LLTRACE_EVENT_MARK (LLTRACE_EVENT_T_MARK | 0xff) + +/* + * irqs + */ + +#define LLTRACE_EVENT_IRQ(_c) (LLTRACE_EVENT_T_IRQ | 0x000 | (_c)) +#define LLTRACE_EVENT_IRQRET(_c) (LLTRACE_EVENT_T_IRQ | 0x100 | (_c)) + +#define LLTRACE_IRQ_LOCAL_TIMER (0xecULL) /* like linux */ +#define LLTRACE_IRQ_IPI (0xfdULL) /* like linux */ + +#define LLTRACE_IRQ_BOTTOM_HALF (0xffULL) /* like kutrace */ + +/* + * syscalls and returns from syscalls + */ + +#define LLTRACE_SYSCALL_MASK(_c) ((uint64_t)(_c) & 0x1ff) + +#define LLTRACE_EVENT_SYSCALL(_c) \ + (LLTRACE_EVENT_T_SYSCALL | LLTRACE_SYSCALL_MASK(_c)) +#define LLTRACE_EVENT_SYSRET(_c) \ + (LLTRACE_EVENT_T_SYSRET | LLTRACE_SYSCALL_MASK(_c)) + +/* + * KUTrace event types for compatibility + */ + +#define KUTRACE_FILENAME (0x001ULL) +#define KUTRACE_PIDNAME (0x002ULL) +#define KUTRACE_METHODNAME (0x003ULL) +#define KUTRACE_TRAPNAME (0x004ULL) +#define KUTRACE_LOCKNAME (0x007ULL) + +#define KUTRACE_USERPID (0x200ULL) +#define KUTRACE_RUNNABLE (0x206ULL) +#define KUTRACE_IPI (0x207ULL) +#define KUTRACE_MWAIT (0x208ULL) +#define KUTRACE_PSTATE (0x209ULL) + +#define KUTRACE_MARKA (0x20aULL) +#define KUTRACE_MARKB (0x20bULL) +#define KUTRACE_MARKC (0x20cULL) +#define KUTRACE_MARKD (0x20dULL) + +#define KUTRACE_LOCKNOACQUIRE (0x210ULL) +#define KUTRACE_LOCKACQUIRE (0x211ULL) +#define KUTRACE_LOCKWAKEUP (0x212ULL) + +#define KUTRACE_PC_U (0x280ULL) +#define KUTRACE_PC_K (0x281ULL) + +/* these are in blocks of 256 */ +#define KUTRACE_TRAP (0x400ULL) +#define KUTRACE_IRQ (0x500ULL) +#define KUTRACE_TRAPRET (0x600ULL) +#define KUTRACE_IRQRET (0x700ULL) + +#define KUTRACE_LOCAL_TIMER_VECTOR (0xec) + +/* these are in blocks of 512 */ +#define KUTRACE_SYSCALL_MASK(_c) ((uint64_t)(_c) & 0x1ff) +#define KUTRACE_SYSCALL_SCHED 511 + +#define KUTRACE_SYSCALL(_c) (0x800ULL | KUTRACE_SYSCALL_MASK(_c)) +#define KUTRACE_SYSRET(_c) (0xa00ULL | KUTRACE_SYSCALL_MASK(_c)) + +/* Specific trap number for page fault */ +#define KUTRACE_PAGEFAULT 14 + +#ifdef _KERNEL + +struct lltrace_cpu; + +static inline struct lltrace_cpu * +lltrace_enter_spc(struct schedstate_percpu *spc) +{ + return (READ_ONCE(spc->spc_lltrace)); +} + +static inline struct lltrace_cpu * +lltrace_enter_cpu(struct cpu_info *ci) +{ + return lltrace_enter_spc(&ci->ci_schedstate); +} + +static inline struct lltrace_cpu * +lltrace_enter(void) +{ + return lltrace_enter_cpu(curcpu()); +} + +void lltrace_idle(struct lltrace_cpu *); +void lltrace_statclock(struct lltrace_cpu *, int, unsigned long); + +void lltrace_syscall(struct lltrace_cpu *, register_t, + size_t, const register_t *); +void lltrace_sysret(struct lltrace_cpu *, register_t, + int, const register_t [2]); +void lltrace_pidname(struct lltrace_cpu *, struct proc *); +void lltrace_sched_enter(struct lltrace_cpu *); +void lltrace_sched_leave(struct lltrace_cpu *); +void lltrace_runnable(struct lltrace_cpu *, struct proc *); + +void lltrace_trap(struct lltrace_cpu *, unsigned int); +void lltrace_trapret(struct lltrace_cpu *, unsigned int); + +void lltrace_lock(struct lltrace_cpu *, void *, unsigned int); +void lltrace_klock(struct lltrace_cpu *, void *, unsigned int); + +void lltrace_pkts(struct lltrace_cpu *, unsigned int, unsigned int); +void lltrace_mark(struct lltrace_cpu *); + +void lltrace_fn_enter(struct lltrace_cpu *, void *); +void lltrace_fn_leave(struct lltrace_cpu *, void *); + +/* MD bits */ + +void lltrace_ipi(struct lltrace_cpu *, unsigned int); +#define lltrace_ipi_bcast(_llt) lltrace_ipi((_llt), ~0U); + +void lltrace_irq(struct lltrace_cpu *, unsigned int, unsigned int); +void lltrace_irqret(struct lltrace_cpu *, unsigned int, unsigned int); + +#endif /* _KERNEL */ + +#endif /* _SYS_LLTRACE_H_ */ Index: sys/mbuf.h =================================================================== RCS file: /cvs/src/sys/sys/mbuf.h,v retrieving revision 1.255 diff -u -p -r1.255 mbuf.h --- sys/mbuf.h 15 Aug 2022 16:15:37 -0000 1.255 +++ sys/mbuf.h 8 Dec 2022 06:10:16 -0000 @@ -137,6 +137,7 @@ struct pkthdr { u_int ph_ifidx; /* rcv interface index */ u_int8_t ph_loopcnt; /* mbuf is looping in kernel */ u_int8_t ph_family; /* af, used when queueing */ + u_int8_t ph_drops; /* hardware packet drops */ struct pkthdr_pf pf; }; Index: sys/param.h =================================================================== RCS file: /cvs/src/sys/sys/param.h,v retrieving revision 1.139 diff -u -p -r1.139 param.h --- sys/param.h 20 Jul 2022 15:12:39 -0000 1.139 +++ sys/param.h 8 Dec 2022 06:10:16 -0000 @@ -111,6 +111,8 @@ #define PCATCH 0x100 /* OR'd with pri for tsleep to check signals */ #define PNORELOCK 0x200 /* OR'd with pri for msleep to not reacquire the mutex */ +#define PLLTRACE 0x400 + #endif /* _KERNEL */ #define NODEV (dev_t)(-1) /* non-existent device */ Index: sys/pool.h =================================================================== RCS file: /cvs/src/sys/sys/pool.h,v retrieving revision 1.78 diff -u -p -r1.78 pool.h --- sys/pool.h 2 Jan 2021 03:23:59 -0000 1.78 +++ sys/pool.h 8 Dec 2022 06:10:16 -0000 @@ -150,8 +150,6 @@ union pool_lock { struct pool { union pool_lock pr_lock; - const struct pool_lock_ops * - pr_lock_ops; SIMPLEQ_ENTRY(pool) pr_poollist; struct pool_pagelist @@ -186,15 +184,18 @@ struct pool { #define PR_RWLOCK 0x0010 #define PR_WANTED 0x0100 - int pr_flags; - int pr_ipl; RBT_HEAD(phtree, pool_page_header) pr_phtree; - struct cpumem * pr_cache; + struct cpumem * pr_cache __aligned(512); unsigned long pr_cache_magic[2]; - union pool_lock pr_cache_lock; + int pr_flags; + int pr_ipl; + const struct pool_lock_ops * + pr_lock_ops; + + union pool_lock pr_cache_lock __aligned(512); struct pool_cache_lists pr_cache_lists; /* list of idle item lists */ u_int pr_cache_nitems; /* # of idle items */ @@ -205,7 +206,7 @@ struct pool { uint64_t pr_cache_ngc; /* # of times the gc released a list */ int pr_cache_nout; - u_int pr_align; + u_int pr_align __aligned(512); u_int pr_maxcolors; /* Cache coloring */ int pr_phoffset; /* Offset in page of page header */ @@ -239,6 +240,7 @@ struct pool { /* Physical memory configuration. */ const struct kmem_pa_mode * pr_crange; + }; #endif /* _KERNEL || _LIBKVM */ Index: sys/sched.h =================================================================== RCS file: /cvs/src/sys/sys/sched.h,v retrieving revision 1.57 diff -u -p -r1.57 sched.h --- sys/sched.h 25 Dec 2020 12:49:31 -0000 1.57 +++ sys/sched.h 8 Dec 2022 06:10:16 -0000 @@ -91,11 +91,13 @@ #define SCHED_NQS 32 /* 32 run queues. */ struct smr_entry; +struct lltrace_cpu; /* * Per-CPU scheduler state. */ struct schedstate_percpu { + struct lltrace_cpu *spc_lltrace; struct proc *spc_idleproc; /* idle proc for this cpu */ TAILQ_HEAD(prochead, proc) spc_qs[SCHED_NQS]; LIST_HEAD(,proc) spc_deadproc; Index: sys/syscall_mi.h =================================================================== RCS file: /cvs/src/sys/sys/syscall_mi.h,v retrieving revision 1.26 diff -u -p -r1.26 syscall_mi.h --- sys/syscall_mi.h 29 Jun 2022 12:06:11 -0000 1.26 +++ sys/syscall_mi.h 8 Dec 2022 06:10:16 -0000 @@ -45,7 +45,6 @@ #include #endif - /* * The MD setup for a system call has been done; here's the MI part. */ @@ -76,6 +75,7 @@ mi_syscall(struct proc *p, register_t co KERNEL_UNLOCK(); } #endif + LLTRACE_CPU(p->p_cpu, lltrace_syscall, code, callp->sy_argsize, argp); /* SP must be within MAP_STACK space */ if (!uvm_map_inentry(p, &p->p_spinentry, PROC_STACK(p), @@ -112,6 +112,7 @@ static inline void mi_syscall_return(struct proc *p, register_t code, int error, const register_t retval[2]) { + LLTRACE_CPU(p->p_cpu, lltrace_sysret, code, error, retval); #ifdef SYSCALL_DEBUG KERNEL_LOCK(); scdebug_ret(p, code, error, retval); @@ -139,12 +140,13 @@ mi_syscall_return(struct proc *p, regist static inline void mi_child_return(struct proc *p) { -#if defined(SYSCALL_DEBUG) || defined(KTRACE) || NDT > 0 +#if defined(SYSCALL_DEBUG) || defined(KTRACE) || NDT > 0 || NLLT > 0 int code = (p->p_flag & P_THREAD) ? SYS___tfork : (p->p_p->ps_flags & PS_PPWAIT) ? SYS_vfork : SYS_fork; const register_t child_retval[2] = { 0, 1 }; #endif + LLTRACE_CPU(p->p_cpu, lltrace_sysret, code, 0, child_retval); TRACEPOINT(sched, on__cpu, NULL); #ifdef SYSCALL_DEBUG Index: sys/time.h =================================================================== RCS file: /cvs/src/sys/sys/time.h,v retrieving revision 1.62 diff -u -p -r1.62 time.h --- sys/time.h 23 Jul 2022 22:58:51 -0000 1.62 +++ sys/time.h 8 Dec 2022 06:10:16 -0000 @@ -324,6 +324,8 @@ time_t getuptime(void); uint64_t nsecuptime(void); uint64_t getnsecuptime(void); +unsigned int countertime(void); + struct proc; int clock_gettime(struct proc *, clockid_t, struct timespec *); Index: sys/tracepoint.h =================================================================== RCS file: /cvs/src/sys/sys/tracepoint.h,v retrieving revision 1.2 diff -u -p -r1.2 tracepoint.h --- sys/tracepoint.h 28 Jun 2022 09:32:28 -0000 1.2 +++ sys/tracepoint.h 8 Dec 2022 06:10:16 -0000 @@ -34,5 +34,33 @@ #define TRACEINDEX(func, index, args...) #endif /* NDT > 0 */ + +#include "llt.h" +#if NLLT > 0 +#include + +#define LLTRACE_SPC(_spc, _fn, ...) { \ + struct lltrace_cpu *_llt = lltrace_enter_spc((_spc)); \ + if (_llt != NULL) \ + (_fn)(_llt __VA_OPT__(,) __VA_ARGS__); \ +} while (0) + +#define LLTRACE_CPU(_ci, _fn, ...) { \ + struct lltrace_cpu *_llt = lltrace_enter_cpu((_ci)); \ + if (_llt != NULL) \ + (_fn)(_llt __VA_OPT__(,) __VA_ARGS__); \ +} while (0) + +#define LLTRACE(_fn, ...) { \ + struct lltrace_cpu *_llt = lltrace_enter(); \ + if (_llt != NULL) \ + (_fn)(_llt __VA_OPT__(,) __VA_ARGS__); \ +} while (0) + +#else /* NLLT > 0 */ + +#define LLTRACE(_fn, ...) + +#endif /* NLLT > 0 */ #endif /* _KERNEL */ #endif /* _SYS_TRACEPOINT_H_ */ Index: uvm/uvm_fault.c =================================================================== RCS file: /cvs/src/sys/uvm/uvm_fault.c,v retrieving revision 1.133 diff -u -p -r1.133 uvm_fault.c --- uvm/uvm_fault.c 4 Nov 2022 09:36:44 -0000 1.133 +++ uvm/uvm_fault.c 8 Dec 2022 06:10:17 -0000 @@ -577,6 +577,8 @@ uvm_fault(vm_map_t orig_map, vaddr_t vad struct vm_page *pages[UVM_MAXRANGE]; int error; + LLTRACE(lltrace_trap, LLTRACE_TRAP_PAGEFAULT); + counters_inc(uvmexp_counters, faults); TRACEPOINT(uvm, fault, vaddr, fault_type, access_type, NULL); @@ -640,6 +642,8 @@ uvm_fault(vm_map_t orig_map, vaddr_t vad } } } + + LLTRACE(lltrace_trapret, LLTRACE_TRAP_PAGEFAULT); return error; }