Index: net/if_tun.c =================================================================== RCS file: /cvs/src/sys/net/if_tun.c,v retrieving revision 1.234 diff -u -p -r1.234 if_tun.c --- net/if_tun.c 16 Feb 2022 02:22:39 -0000 1.234 +++ net/if_tun.c 24 Feb 2022 05:54:17 -0000 @@ -53,9 +53,11 @@ #include #include #include +#include #include #include #include +#include #include #include @@ -78,6 +80,7 @@ #include struct tun_softc { + unsigned int sc_gen; struct arpcom sc_ac; /* ethernet common data */ #define sc_if sc_ac.ac_if struct selinfo sc_rsel; /* read select */ @@ -89,7 +92,7 @@ struct tun_softc { unsigned int sc_flags; /* misc flags */ #define TUN_DEAD (1 << 16) - dev_t sc_dev; + struct vnode *sc_vp; struct refcnt sc_refs; unsigned int sc_reading; }; @@ -154,6 +157,66 @@ struct if_clone tun_cloner = struct if_clone tap_cloner = IF_CLONE_INITIALIZER("tap", tap_clone_create, tun_clone_destroy); +struct tun_log_entry { + unsigned int gen; + const char *func; + unsigned int line; + unsigned int seq; + pid_t thrid; + unsigned int cpuid; + unsigned int syscall; + unsigned long arg; +}; + +unsigned int tun_log_seq = 0; +struct tun_log_entry tun_log_entries[128]; + +void +__tun_log(const char *func, unsigned int line, struct tun_softc *sc, + u_long arg) +{ + unsigned int seq; + struct tun_log_entry *entry; + + KERNEL_ASSERT_LOCKED(); + + seq = tun_log_seq++; + entry = &tun_log_entries[seq % nitems(tun_log_entries)]; + + if (sc != NULL) + entry->gen = sc->sc_gen; + else + entry->gen = 0; + entry->arg = arg; + entry->func = func; + entry->line = line; + entry->seq = seq; + entry->thrid = curproc->p_tid; + entry->cpuid = curcpu()->ci_cpuid; + entry->syscall = curcpu()->ci_syscalls; +} + +#define tun_log(_sc, x) __tun_log(__func__, __LINE__, _sc, x) + +void +tun_log_dump(void) +{ + const struct tun_log_entry *entry; + unsigned int end = tun_log_seq % nitems(tun_log_entries); + unsigned int slot = end; /* the end is the beginning is the end */ + + do { + entry = &tun_log_entries[slot]; + + printf("sc: %3u, cpu: %d+%u, tid: %6d, %s[%u], %lu\n", + entry->gen, entry->cpuid, entry->syscall, entry->thrid, + entry->func, entry->line, entry->arg); + + slot++; + slot %= nitems(tun_log_entries); + } while (slot != end); +} + void tunattach(int n) { @@ -200,6 +263,7 @@ tun_insert(struct tun_softc *sc) error = EEXIST; else { /* tun_name_lookup checks for the right lock already */ + tun_log(sc, 0); SMR_LIST_INSERT_HEAD_LOCKED(&tun_devs_list, sc, sc_entry); } @@ -211,6 +275,7 @@ tun_create(struct if_clone *ifc, int uni { struct tun_softc *sc; struct ifnet *ifp; + static unsigned int gen; if (unit > minor(~0U)) return (ENXIO); @@ -218,6 +283,7 @@ tun_create(struct if_clone *ifc, int uni KERNEL_ASSERT_LOCKED(); sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO); + sc->sc_gen = gen++; refcnt_init(&sc->sc_refs); ifp = &sc->sc_if; @@ -286,24 +352,32 @@ int tun_clone_destroy(struct ifnet *ifp) { struct tun_softc *sc = ifp->if_softc; - dev_t dev; + struct vnode *vp; int s; KERNEL_ASSERT_LOCKED(); if (ISSET(sc->sc_flags, TUN_DEAD)) return (ENXIO); + tun_log(sc, 0); SET(sc->sc_flags, TUN_DEAD); /* kick userland off the device */ - dev = sc->sc_dev; - if (dev) { - struct vnode *vp; - - if (vfinddev(dev, VCHR, &vp)) - VOP_REVOKE(vp, REVOKEALL); - - KASSERT(sc->sc_dev == 0); + vp = SMR_PTR_GET_LOCKED(&sc->sc_vp); + if (vp != NULL) { + int rv; + tun_log(sc, vp->v_usecount); + rv = VOP_REVOKE(vp, REVOKEALL); + if (rv != 0) { + tun_log(sc, 0); + printf("%s: revoke %d\n", ifp->if_xname, rv); + uprintf("%s: revoke %d\n", ifp->if_xname, rv); + } else + tun_log(sc, 0); + + // KASSERT(SMR_PTR_GET_LOCKED(&sc->sc_vp) == NULL); + if (SMR_PTR_GET_LOCKED(&sc->sc_vp) != NULL) + db_enter(); } /* prevent userland from getting to the device again */ @@ -314,14 +388,16 @@ tun_clone_destroy(struct ifnet *ifp) if (sc->sc_reading) wakeup(&ifp->if_snd); - /* wait for device entrypoints to finish */ - refcnt_finalize(&sc->sc_refs, "tundtor"); - s = splhigh(); klist_invalidate(&sc->sc_rsel.si_note); klist_invalidate(&sc->sc_wsel.si_note); splx(s); + /* wait for device entrypoints to finish */ + refcnt_finalize(&sc->sc_refs, "tundtor"); + + tun_log(sc, 0); + if (ISSET(sc->sc_flags, TUN_LAYER2)) ether_ifdetach(ifp); @@ -339,7 +415,10 @@ tun_get(dev_t dev) smr_read_enter(); SMR_LIST_FOREACH(sc, &tun_devs_list, sc_entry) { - if (sc->sc_dev == dev) { + struct vnode *vp = SMR_PTR_GET(&sc->sc_vp); + if (vp == NULL) + continue; + if (dev == vp->v_rdev && VCHR == vp->v_type) { refcnt_take(&sc->sc_refs); break; } @@ -374,21 +453,33 @@ tun_dev_open(dev_t dev, const struct if_ struct ifnet *ifp; int error; u_short stayup = 0; + struct vnode *vp; char name[IFNAMSIZ]; unsigned int rdomain; + /* + * Find the vnode associated with this open before we sleep + * and let something else revoke it. Our caller has a reference + * to it, so we don't need to account for it again just yet. + */ + if (!vfinddev(dev, VCHR, &vp)) + panic("%s vfinddev failed", __func__); + snprintf(name, sizeof(name), "%s%u", ifc->ifc_name, minor(dev)); rdomain = rtable_l2(p->p_p->ps_rtableid); /* let's find or make an interface to work with */ while ((sc = tun_name_lookup(name)) == NULL) { + stayup = 0; +tun_log(sc, stayup); error = if_clone_create(name, rdomain); switch (error) { case 0: /* it's probably ours */ stayup = TUN_STAYUP; /* FALLTHROUGH */ case EEXIST: /* we may have lost a race with someone else */ +tun_log(sc, stayup); break; default: return (error); @@ -400,6 +491,7 @@ tun_dev_open(dev_t dev, const struct if_ /* wait for it to be fully constructed before we use it */ for (;;) { if (ISSET(sc->sc_flags, TUN_DEAD)) { + tun_log(sc, 0); error = ENXIO; goto done; } @@ -407,20 +499,34 @@ tun_dev_open(dev_t dev, const struct if_ if (ISSET(sc->sc_flags, TUN_INITED)) break; + tun_log(sc, 0); error = tsleep_nsec(sc, PCATCH, "tuninit", INFSLP); if (error != 0) { + tun_log(sc, 0); /* XXX if_clone_destroy if stayup? */ goto done; } } - if (sc->sc_dev != 0) { + /* Has tun_clone_destroy torn the rug out under us? */ + if (vp->v_type == VBAD) { + error = ENXIO; + goto done; + } + + KERNEL_ASSERT_LOCKED(); + if (SMR_PTR_GET_LOCKED(&sc->sc_vp) != NULL) { + tun_log(sc, 0); /* aww, we lost */ error = EBUSY; goto done; } - /* it's ours now */ - sc->sc_dev = dev; + + /* It's ours now. */ + tun_log(sc, 0); + vref(vp); /* Take a ref so we can read vp from tun_get(). */ + SMR_PTR_SET_LOCKED(&sc->sc_vp, vp); + CLR(sc->sc_flags, stayup); /* automatically mark the interface running on open */ @@ -431,6 +537,8 @@ tun_dev_open(dev_t dev, const struct if_ tun_link_state(ifp, LINK_STATE_FULL_DUPLEX); error = 0; + tun_log(sc, 0); + done: tun_put(sc); return (error); @@ -460,10 +568,14 @@ tun_dev_close(dev_t dev, struct proc *p) int error = 0; char name[IFNAMSIZ]; int destroy = 0; + struct vnode *vp; sc = tun_get(dev); - if (sc == NULL) + if (sc == NULL) { + printf("%s[%u]: wat\n", __func__, __LINE__); return (ENXIO); + } + tun_log(sc, 0); ifp = &sc->sc_if; @@ -473,6 +585,7 @@ tun_dev_close(dev_t dev, struct proc *p) NET_LOCK(); CLR(ifp->if_flags, IFF_UP | IFF_RUNNING); NET_UNLOCK(); + tun_link_state(ifp, LINK_STATE_DOWN); ifq_purge(&ifp->if_snd); CLR(sc->sc_flags, TUN_ASYNC); @@ -484,18 +597,22 @@ tun_dev_close(dev_t dev, struct proc *p) if (!ISSET(sc->sc_flags, TUN_STAYUP)) { destroy = 1; strlcpy(name, ifp->if_xname, sizeof(name)); - } else { - tun_link_state(ifp, LINK_STATE_DOWN); } } - sc->sc_dev = 0; + KERNEL_ASSERT_LOCKED(); + vp = SMR_PTR_GET_LOCKED(&sc->sc_vp); + SMR_PTR_SET_LOCKED(&sc->sc_vp, NULL); + tun_log(sc, 0); tun_put(sc); if (destroy) if_clone_destroy(name); + smr_barrier(); /* Sigh sigh. Don't let other threads trip up. */ + vrele(vp); + return (error); } @@ -794,7 +911,7 @@ tun_dev_read(dev_t dev, struct uio *uio, ifp = &sc->sc_if; error = ifq_deq_sleep(&ifp->if_snd, &m0, ISSET(ioflag, IO_NDELAY), - (PZERO + 1)|PCATCH, "tunread", &sc->sc_reading, &sc->sc_dev); + (PZERO + 1)|PCATCH, "tunread", &sc->sc_reading, &sc->sc_vp); if (error != 0) goto put; Index: net/ifq.c =================================================================== RCS file: /cvs/src/sys/net/ifq.c,v retrieving revision 1.45 diff -u -p -r1.45 ifq.c --- net/ifq.c 18 Jan 2022 10:54:05 -0000 1.45 +++ net/ifq.c 24 Feb 2022 05:54:17 -0000 @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -457,12 +458,13 @@ ifq_dequeue(struct ifqueue *ifq) int ifq_deq_sleep(struct ifqueue *ifq, struct mbuf **mp, int nbio, int priority, - const char *wmesg, volatile unsigned int *sleeping, - volatile unsigned int *alive) + const char *wmesg, volatile unsigned int *sleeping, void *smrp) { struct mbuf *m; void *cookie; int error = 0; + void **smr = smrp; + void *alive; ifq_deq_enter(ifq); if (ifq->ifq_len == 0 && nbio) @@ -483,7 +485,11 @@ ifq_deq_sleep(struct ifqueue *ifq, struc (*sleeping)--; if (error != 0) break; - if (!(*alive)) { + + smr_read_enter(); + alive = SMR_PTR_GET(smr); + smr_read_leave(); + if (alive == NULL) { error = EIO; break; } Index: net/ifq.h =================================================================== RCS file: /cvs/src/sys/net/ifq.h,v retrieving revision 1.34 diff -u -p -r1.34 ifq.h --- net/ifq.h 28 Jan 2022 07:11:15 -0000 1.34 +++ net/ifq.h 24 Feb 2022 05:54:17 -0000 @@ -438,8 +438,7 @@ void ifq_barrier(struct ifqueue *); int ifq_deq_sleep(struct ifqueue *, struct mbuf **, int, int, - const char *, volatile unsigned int *, - volatile unsigned int *); + const char *, volatile unsigned int *, void *); #define ifq_len(_ifq) ((_ifq)->ifq_len) #define ifq_empty(_ifq) (ifq_len(_ifq) == 0) Index: kern/spec_vnops.c =================================================================== RCS file: /cvs/src/sys/kern/spec_vnops.c,v retrieving revision 1.107 diff -u -p -r1.107 spec_vnops.c --- kern/spec_vnops.c 11 Dec 2021 09:28:26 -0000 1.107 +++ kern/spec_vnops.c 24 Feb 2022 05:54:17 -0000 @@ -56,6 +56,10 @@ #include +struct tun_softc; +void __tun_log(const char *, unsigned int, struct tun_softc *, u_long); +#define tun_log(x) __tun_log(__func__, __LINE__, NULL, x) + #define v_lastr v_specinfo->si_lastr int spec_open_clone(struct vop_open_args *); @@ -154,8 +158,11 @@ spec_open(void *v) if (cdevsw[maj].d_flags & D_CLONE) return (spec_open_clone(ap)); VOP_UNLOCK(vp); +tun_log(vp->v_usecount); error = (*cdevsw[maj].d_open)(dev, ap->a_mode, S_IFCHR, p); +tun_log(vp->v_usecount); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); +tun_log(vp->v_usecount); return (error); case VBLK: Index: kern/vfs_subr.c =================================================================== RCS file: /cvs/src/sys/kern/vfs_subr.c,v retrieving revision 1.314 diff -u -p -r1.314 vfs_subr.c --- kern/vfs_subr.c 25 Jan 2022 04:04:40 -0000 1.314 +++ kern/vfs_subr.c 24 Feb 2022 05:54:17 -0000 @@ -74,6 +74,10 @@ #include "softraid.h" +struct tun_softc; +void __tun_log(const char *, unsigned int, struct tun_softc *, u_long); +#define tun_log(x) __tun_log(__func__, __LINE__, NULL, x) + void sr_quiesce(void); enum vtype iftovt_tab[16] = { @@ -992,6 +996,7 @@ vclean(struct vnode *vp, int flags, stru { int active, do_wakeup = 0; +tun_log(vp->v_usecount); /* * Check to see if the vnode is in use. * If so we have to reference it before we clean it out @@ -1022,6 +1027,7 @@ vclean(struct vnode *vp, int flags, stru } mtx_leave(&vnode_mtx); +tun_log(vp->v_usecount); /* * Even if the count is zero, the VOP_INACTIVE routine may still * have the object locked while it cleans it out. The VOP_LOCK @@ -1046,14 +1052,18 @@ vclean(struct vnode *vp, int flags, stru * VOP_INACTIVE will unlock the vnode */ if (active) { - if (flags & DOCLOSE) +tun_log(vp->v_usecount); + if (flags & DOCLOSE) { +tun_log(vp->v_usecount); VOP_CLOSE(vp, FNONBLOCK, NOCRED, p); + } VOP_INACTIVE(vp, p); } else { /* * Any other processes trying to obtain this lock must first * wait for VXLOCK to clear, then call the new lock operation. */ +tun_log(vp->v_usecount); VOP_UNLOCK(vp); } @@ -1136,9 +1146,11 @@ vgonel(struct vnode *vp, struct proc *p) vp->v_lflag |= VXWANT; msleep_nsec(vp, &vnode_mtx, PINOD, "vgone", INFSLP); mtx_leave(&vnode_mtx); +tun_log(vp->v_usecount); return; } mtx_leave(&vnode_mtx); +tun_log(vp->v_usecount); /* * Clean out the filesystem specific data. @@ -1155,6 +1167,7 @@ vgonel(struct vnode *vp, struct proc *p) */ if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != NULL) { +tun_log(vp->v_usecount); if ((vp->v_flag & VALIASED) == 0 && vp->v_type == VCHR && (cdevsw[major(vp->v_rdev)].d_flags & D_CLONE) && (minor(vp->v_rdev) >> CLONE_SHIFT == 0)) { @@ -1162,6 +1175,7 @@ vgonel(struct vnode *vp, struct proc *p) } SLIST_REMOVE(vp->v_hashchain, vp, vnode, v_specnext); if (vp->v_flag & VALIASED) { +tun_log(vp->v_usecount); vx = NULL; SLIST_FOREACH(vq, vp->v_hashchain, v_specnext) { if (vq->v_rdev != vp->v_rdev || @@ -1215,16 +1229,14 @@ int vfinddev(dev_t dev, enum vtype type, struct vnode **vpp) { struct vnode *vp; - int rc =0; SLIST_FOREACH(vp, &speclisth[SPECHASH(dev)], v_specnext) { if (dev != vp->v_rdev || type != vp->v_type) continue; *vpp = vp; - rc = 1; - break; + return 1; } - return (rc); + return 0; } /* Index: arch/amd64/amd64/trap.c =================================================================== RCS file: /cvs/src/sys/arch/amd64/amd64/trap.c,v retrieving revision 1.90 diff -u -p -r1.90 trap.c --- arch/amd64/amd64/trap.c 9 Dec 2021 00:26:11 -0000 1.90 +++ arch/amd64/amd64/trap.c 24 Feb 2022 05:54:17 -0000 @@ -526,6 +526,9 @@ syscall(struct trapframe *frame) verify_smap(__func__); uvmexp.syscalls++; +#if DIAGNOSTIC + curcpu()->ci_syscalls++; +#endif p = curproc; code = frame->tf_rax; Index: arch/amd64/include/cpu.h =================================================================== RCS file: /cvs/src/sys/arch/amd64/include/cpu.h,v retrieving revision 1.141 diff -u -p -r1.141 cpu.h --- arch/amd64/include/cpu.h 31 Aug 2021 17:40:59 -0000 1.141 +++ arch/amd64/include/cpu.h 24 Feb 2022 05:54:17 -0000 @@ -135,6 +135,7 @@ struct cpu_info { u_int64_t ci_iunmask[NIPL]; #ifdef DIAGNOSTIC int ci_mutex_level; + unsigned int ci_syscalls; #endif volatile u_int ci_flags;