Index: if_tun.c =================================================================== RCS file: /cvs/src/sys/net/if_tun.c,v retrieving revision 1.199 diff -u -p -r1.199 if_tun.c --- if_tun.c 23 Jan 2020 03:10:18 -0000 1.199 +++ if_tun.c 23 Jan 2020 11:13:04 -0000 @@ -56,7 +56,7 @@ #include #include #include - +#include #include #include @@ -88,6 +88,8 @@ struct tun_softc { struct sigio_ref sc_sigio; /* async I/O registration */ u_short sc_flags; /* misc flags */ + + unsigned int sc_reading; }; #ifdef TUN_DEBUG @@ -131,6 +133,7 @@ int tun_dev_kqfilter(struct tun_softc *, int tun_ioctl(struct ifnet *, u_long, caddr_t); int tun_output(struct ifnet *, struct mbuf *, struct sockaddr *, struct rtentry *); +int tun_enqueue(struct ifnet *, struct mbuf *); int tun_clone_create(struct if_clone *, int); int tap_clone_create(struct if_clone *, int); int tun_create(struct if_clone *, int, int); @@ -239,12 +242,14 @@ tun_create(struct if_clone *ifc, int uni ifp->if_softc = sc; ifp->if_ioctl = tun_ioctl; - ifp->if_output = tun_output; ifp->if_start = tun_start; ifp->if_hardmtu = TUNMRU; ifp->if_link_state = LINK_STATE_DOWN; + ifp->if_enqueue = tun_enqueue; IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); + if_counters_alloc(ifp); + if ((flags & TUN_LAYER2) == 0) { if (tun_list_insert(&tun_softc_list, sc) != 0) goto exists; @@ -254,6 +259,7 @@ tun_create(struct if_clone *ifc, int uni ifp->if_flags = (IFF_POINTOPOINT|IFF_MULTICAST); ifp->if_type = IFT_TUNNEL; ifp->if_hdrlen = sizeof(u_int32_t); + ifp->if_output = tun_output; ifp->if_rtrequest = p2p_rtrequest; if_attach(ifp); @@ -554,56 +560,46 @@ tun_output(struct ifnet *ifp, struct mbu struct rtentry *rt) { struct tun_softc *sc = ifp->if_softc; - int error; u_int32_t *af; - if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING)) { + if (!ISSET(ifp->if_flags, IFF_RUNNING) || + !ISSET(sc->sc_flags, TUN_OPEN)) { m_freem(m0); return (EHOSTDOWN); } - TUNDEBUG(("%s: tun_output\n", ifp->if_xname)); - - if ((sc->sc_flags & TUN_READY) != TUN_READY) { - TUNDEBUG(("%s: not ready %#x\n", ifp->if_xname, - sc->sc_flags)); - m_freem(m0); - return (EHOSTDOWN); - } - - if (sc->sc_flags & TUN_LAYER2) - return (ether_output(ifp, m0, dst, rt)); - - M_PREPEND(m0, sizeof(*af), M_DONTWAIT); + m0 = m_prepend(m0, sizeof(*af), M_DONTWAIT); if (m0 == NULL) return (ENOBUFS); + af = mtod(m0, u_int32_t *); *af = htonl(dst->sa_family); -#if NBPFILTER > 0 - if (ifp->if_bpf) - bpf_mtap(ifp->if_bpf, m0, BPF_DIRECTION_OUT); -#endif + return (if_enqueue(ifp, m0)); +} - error = if_enqueue(ifp, m0); +int +tun_enqueue(struct ifnet *ifp, struct mbuf *m) +{ + struct tun_softc *sc = ifp->if_softc; + int error; - if (error) { - ifp->if_collisions++; + error = ifq_enqueue(&ifp->if_snd, m); + if (error != 0) return (error); - } tun_wakeup(sc); + return (0); } void tun_wakeup(struct tun_softc *sc) { + if (sc->sc_reading > 0) + wakeup(&sc->sc_if.if_snd); + KERNEL_LOCK(); - if (sc->sc_flags & TUN_RWAIT) { - sc->sc_flags &= ~TUN_RWAIT; - wakeup((caddr_t)sc); - } selwakeup(&sc->sc_rsel); if (sc->sc_flags & TUN_ASYNC) pgsigio(&sc->sc_sigio, SIGIO, 0); @@ -728,20 +724,32 @@ int tunread(dev_t dev, struct uio *uio, int ioflag) { struct tun_softc *sc; + int error; if ((sc = tun_lookup(minor(dev))) == NULL) return (ENXIO); - return (tun_dev_read(sc, uio, ioflag)); + + KERNEL_UNLOCK(); + error = tun_dev_read(sc, uio, ioflag); + KERNEL_LOCK(); + + return (error); } int tapread(dev_t dev, struct uio *uio, int ioflag) { struct tun_softc *sc; + int error; if ((sc = tap_lookup(minor(dev))) == NULL) return (ENXIO); - return (tun_dev_read(sc, uio, ioflag)); + + KERNEL_UNLOCK(); + error = tun_dev_read(sc, uio, ioflag); + KERNEL_LOCK(); + + return (error); } int @@ -749,69 +757,38 @@ tun_dev_read(struct tun_softc *sc, struc { struct ifnet *ifp = &sc->sc_if; struct mbuf *m, *m0; - unsigned int ifidx; int error = 0; - size_t len; if ((sc->sc_flags & TUN_READY) != TUN_READY) return (EHOSTDOWN); - ifidx = ifp->if_index; - sc->sc_flags &= ~TUN_RWAIT; + error = ifq_deq_sleep(&ifp->if_snd, &m0, ISSET(ioflag, IO_NDELAY), + (PZERO + 1)|PCATCH, "tunread", &sc->sc_reading); + if (error != 0) + return (error); - do { - struct ifnet *ifp1; - int destroyed; - - while ((sc->sc_flags & TUN_READY) != TUN_READY) { - if ((error = tsleep_nsec(sc, - (PZERO + 1)|PCATCH, "tunread", INFSLP)) != 0) - return (error); - /* Make sure the interface still exists. */ - ifp1 = if_get(ifidx); - destroyed = (ifp1 == NULL); - if_put(ifp1); - if (destroyed) - return (ENXIO); - } - IFQ_DEQUEUE(&ifp->if_snd, m0); - if (m0 == NULL) { - if (sc->sc_flags & TUN_NBIO && ioflag & IO_NDELAY) - return (EWOULDBLOCK); - sc->sc_flags |= TUN_RWAIT; - if ((error = tsleep_nsec(sc, - (PZERO + 1)|PCATCH, "tunread", INFSLP)) != 0) - return (error); - /* Make sure the interface still exists. */ - ifp1 = if_get(ifidx); - destroyed = (ifp1 == NULL); - if_put(ifp1); - if (destroyed) - return (ENXIO); - } - } while (m0 == NULL); + KASSERT(m0 != NULL); - if (sc->sc_flags & TUN_LAYER2) { #if NBPFILTER > 0 - if (ifp->if_bpf) - bpf_mtap(ifp->if_bpf, m0, BPF_DIRECTION_OUT); + if (ifp->if_bpf) + bpf_mtap(ifp->if_bpf, m0, BPF_DIRECTION_OUT); #endif - } - while (m0 != NULL && uio->uio_resid > 0 && error == 0) { - len = ulmin(uio->uio_resid, m0->m_len); - if (len != 0) - error = uiomove(mtod(m0, caddr_t), len, uio); - m = m_free(m0); - m0 = m; - } + m = m0; + while (uio->uio_resid > 0) { + size_t len = ulmin(uio->uio_resid, m->m_len); + if (len != 0) { + error = uiomove(mtod(m, void *), len, uio); + if (error != 0) + break; + } - if (m0 != NULL) { - TUNDEBUG(("Dropping mbuf\n")); - m_freem(m0); + m = m->m_next; + if (m == NULL) + break; } - if (error) - ifp->if_oerrors++; + + m_freem(m0); return (error); } @@ -823,30 +800,42 @@ int tunwrite(dev_t dev, struct uio *uio, int ioflag) { struct tun_softc *sc; + int error; if ((sc = tun_lookup(minor(dev))) == NULL) return (ENXIO); - return (tun_dev_write(sc, uio, ioflag)); + + KERNEL_UNLOCK(); + error = tun_dev_write(sc, uio, ioflag); + KERNEL_LOCK(); + + return (error); } int tapwrite(dev_t dev, struct uio *uio, int ioflag) { struct tun_softc *sc; + int error; if ((sc = tap_lookup(minor(dev))) == NULL) return (ENXIO); - return (tun_dev_write(sc, uio, ioflag)); + + KERNEL_UNLOCK(); + error = tun_dev_write(sc, uio, ioflag); + KERNEL_LOCK(); + + return (error); } int tun_dev_write(struct tun_softc *sc, struct uio *uio, int ioflag) { struct ifnet *ifp; - u_int32_t *th; - struct mbuf *top, **mp, *m; - int error = 0, tlen; - size_t mlen; + struct mbuf *m; + int error = 0; + int mlen; + int align; ifp = &sc->sc_if; TUNDEBUG(("%s: tunwrite\n", ifp->if_xname)); @@ -856,112 +845,82 @@ tun_dev_write(struct tun_softc *sc, stru TUNDEBUG(("%s: len=%d!\n", ifp->if_xname, uio->uio_resid)); return (EMSGSIZE); } - tlen = uio->uio_resid; - /* get a header mbuf */ - MGETHDR(m, M_DONTWAIT, MT_DATA); + align = max_linkhdr; + if (sc->sc_flags & TUN_LAYER2) + align += ETHER_ALIGN; + + mlen = uio->uio_resid + align; + + m = m_gethdr(M_DONTWAIT, MT_DATA); if (m == NULL) - return (ENOBUFS); - mlen = MHLEN; - if (uio->uio_resid >= MINCLSIZE) { - MCLGET(m, M_DONTWAIT); - if (!(m->m_flags & M_EXT)) { - m_free(m); - return (ENOBUFS); + return (ENOMEM); + if (mlen > MHLEN) { + m_clget(m, M_WAITOK, mlen); + if (!ISSET(m->m_flags, M_EXT)) { + error = ENOMEM; + goto drop; } - mlen = MCLBYTES; } - top = NULL; - mp = ⊤ - if (sc->sc_flags & TUN_LAYER2) { - /* - * Pad so that IP header is correctly aligned - * this is necessary for all strict aligned architectures. - */ - mlen -= ETHER_ALIGN; - m->m_data += ETHER_ALIGN; - } - while (error == 0 && uio->uio_resid > 0) { - m->m_len = ulmin(mlen, uio->uio_resid); - error = uiomove(mtod (m, caddr_t), m->m_len, uio); - *mp = m; - mp = &m->m_next; - if (error == 0 && uio->uio_resid > 0) { - MGET(m, M_DONTWAIT, MT_DATA); - if (m == NULL) { - error = ENOBUFS; - break; - } - mlen = MLEN; - if (uio->uio_resid >= MINCLSIZE) { - MCLGET(m, M_DONTWAIT); - if (!(m->m_flags & M_EXT)) { - error = ENOBUFS; - m_free(m); - break; - } - mlen = MCLBYTES; - } - } - } - if (error) { - m_freem(top); - ifp->if_ierrors++; - return (error); - } + m_align(m, mlen); + m->m_pkthdr.len = m->m_len = mlen; + m_adj(m, align); + + error = uiomove(mtod(m, void *), m->m_len, uio); + if (error != 0) + goto drop; - top->m_pkthdr.len = tlen; + NET_RLOCK(); - if (sc->sc_flags & TUN_LAYER2) { - struct mbuf_list ml = MBUF_LIST_INITIALIZER(); + if (sc->sc_flags & TUN_LAYER2) + if_vinput(ifp, m); + else { + uint32_t af; - ml_enqueue(&ml, top); - if_input(ifp, &ml); - return (0); - } + m->m_pkthdr.ph_ifidx = ifp->if_index; + m->m_pkthdr.ph_rtableid = ifp->if_rdomain; #if NBPFILTER > 0 - if (ifp->if_bpf) { - bpf_mtap(ifp->if_bpf, top, BPF_DIRECTION_IN); - } + if (ifp->if_bpf) { + bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_IN); + /* XXX BPF can ask to drop */ + } #endif - th = mtod(top, u_int32_t *); - /* strip the tunnel header */ - top->m_data += sizeof(*th); - top->m_len -= sizeof(*th); - top->m_pkthdr.len -= sizeof(*th); - top->m_pkthdr.ph_rtableid = ifp->if_rdomain; - top->m_pkthdr.ph_ifidx = ifp->if_index; - - ifp->if_ipackets++; - ifp->if_ibytes += top->m_pkthdr.len; - - NET_LOCK(); - - switch (ntohl(*th)) { - case AF_INET: - ipv4_input(ifp, top); - break; + af = *mtod(m, uint32_t *); + m_adj(m, sizeof(af)); + + counters_pkt(ifp->if_counters, + ifc_ipackets, ifc_ibytes, m->m_pkthdr.len); + + switch (af) { + case AF_INET: + ipv4_input(ifp, m); + break; #ifdef INET6 - case AF_INET6: - ipv6_input(ifp, top); - break; + case AF_INET6: + ipv6_input(ifp, m); + break; #endif #ifdef MPLS - case AF_MPLS: - mpls_input(ifp, top); - break; + case AF_MPLS: + mpls_input(ifp, m); + break; #endif - default: - m_freem(top); - error = EAFNOSUPPORT; - break; + default: + m_freem(m); + error = EAFNOSUPPORT; + break; + } } - NET_UNLOCK(); + NET_RUNLOCK(); + + return (error); +drop: + m_freem(m); return (error); } Index: ifq.c =================================================================== RCS file: /cvs/src/sys/net/ifq.c,v retrieving revision 1.35 diff -u -p -r1.35 ifq.c --- ifq.c 8 Oct 2019 04:18:00 -0000 1.35 +++ ifq.c 23 Jan 2020 11:13:04 -0000 @@ -389,6 +389,38 @@ ifq_dequeue(struct ifqueue *ifq) } int +ifq_deq_sleep(struct ifqueue *ifq, struct mbuf **mp, int nbio, int priority, + const char *wmesg, volatile unsigned int *sleeping) +{ + struct mbuf *m; + void *cookie; + int error; + + ifq_deq_enter(ifq); + if (ifq->ifq_len == 0 && nbio) + error = EWOULDBLOCK; + else { + do { + m = ifq->ifq_ops->ifqop_deq_begin(ifq, &cookie); + if (m != NULL) { + ifq->ifq_ops->ifqop_deq_commit(ifq, m, cookie); + ifq->ifq_len--; + *mp = m; + break; + } + + (*sleeping)++; + error = msleep_nsec(ifq, &ifq->ifq_mtx, + priority, wmesg, INFSLP); + (*sleeping)--; + } while (error == 0); + } + ifq_deq_leave(ifq); + + return (error); +} + +int ifq_hdatalen(struct ifqueue *ifq) { struct mbuf *m; Index: ifq.h =================================================================== RCS file: /cvs/src/sys/net/ifq.h,v retrieving revision 1.26 diff -u -p -r1.26 ifq.h --- ifq.h 16 Apr 2019 04:04:19 -0000 1.26 +++ ifq.h 23 Jan 2020 11:13:04 -0000 @@ -413,6 +413,10 @@ void ifq_q_leave(struct ifqueue *, voi void ifq_serialize(struct ifqueue *, struct task *); void ifq_barrier(struct ifqueue *); + +int ifq_deq_sleep(struct ifqueue *, struct mbuf **, int, int, + const char *, volatile unsigned int *); + #define ifq_len(_ifq) ((_ifq)->ifq_len) #define ifq_empty(_ifq) (ifq_len(_ifq) == 0) #define ifq_set_maxlen(_ifq, _l) ((_ifq)->ifq_maxlen = (_l))