Index: net/if.c =================================================================== RCS file: /cvs/src/sys/net/if.c,v retrieving revision 1.486 diff -u -p -r1.486 if.c --- net/if.c 7 Feb 2017 10:08:21 -0000 1.486 +++ net/if.c 18 Feb 2017 13:25:56 -0000 @@ -1651,25 +1651,39 @@ ifunit(const char *name) /* * Map interface index to interface structure pointer. */ + struct ifnet * -if_get(unsigned int index) +if_enter(struct srp_ref *sr, unsigned int index) { - struct srp_ref sr; struct if_map *if_map; struct srp *map; - struct ifnet *ifp = NULL; - if_map = srp_enter(&sr, &if_idxmap.map); - if (index < if_map->limit) { - map = (struct srp *)(if_map + 1); - - ifp = srp_follow(&sr, &map[index]); - if (ifp != NULL) { - KASSERT(ifp->if_index == index); - if_ref(ifp); - } + if_map = srp_enter(sr, &if_idxmap.map); + if (index >= if_map->limit) + index = 0; + + map = (struct srp *)(if_map + 1); + return (srp_follow(sr, &map[index])); +} + +void +if_leave(struct srp_ref *sr, struct ifnet *ifp) +{ + srp_leave(sr); +} + +struct ifnet * +if_get(unsigned int index) +{ + struct srp_ref sr; + struct ifnet *ifp; + + ifp = if_enter(&sr, index); + if (ifp != NULL) { + KASSERT(ifp->if_index == index); + if_ref(ifp); } - srp_leave(&sr); + if_leave(&sr, ifp); return (ifp); } Index: net/if.h =================================================================== RCS file: /cvs/src/sys/net/if.h,v retrieving revision 1.186 diff -u -p -r1.186 if.h --- net/if.h 24 Jan 2017 10:08:30 -0000 1.186 +++ net/if.h 18 Feb 2017 13:25:56 -0000 @@ -455,6 +455,7 @@ struct if_parent { #include #ifdef _KERNEL +struct srp_ref; struct socket; struct ifnet; struct ifq_ops; @@ -482,6 +483,8 @@ int if_addgroup(struct ifnet *, const ch int if_delgroup(struct ifnet *, const char *); void if_group_routechange(struct sockaddr *, struct sockaddr *); struct ifnet *ifunit(const char *); +struct ifnet *if_enter(struct srp_ref *, unsigned int); +void if_leave(struct srp_ref *, struct ifnet *); struct ifnet *if_get(unsigned int); void if_put(struct ifnet *); void ifnewlladdr(struct ifnet *); Index: net/if_ethersubr.c =================================================================== RCS file: /cvs/src/sys/net/if_ethersubr.c,v retrieving revision 1.242 diff -u -p -r1.242 if_ethersubr.c --- net/if_ethersubr.c 24 Jan 2017 10:08:30 -0000 1.242 +++ net/if_ethersubr.c 18 Feb 2017 13:25:56 -0000 @@ -178,24 +178,18 @@ ether_rtrequest(struct ifnet *ifp, int r break; } } -/* - * Ethernet output routine. - * Encapsulate a packet of type family for the local net. - * Assumes that ifp is actually pointer to arpcom structure. - */ + int -ether_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, - struct rtentry *rt) +ether_resolve(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, + struct rtentry *rt, struct ether_header *eh) { - u_int16_t etype; - u_char edst[ETHER_ADDR_LEN]; - u_char *esrc; - struct mbuf *mcopy = NULL; - struct ether_header *eh; struct arpcom *ac = (struct arpcom *)ifp; sa_family_t af = dst->sa_family; int error = 0; + if (!ISSET(ifp->if_flags, IFF_RUNNING)) + senderr(ENETDOWN); + KASSERT(rt != NULL || ISSET(m->m_flags, M_MCAST|M_BCAST) || af == AF_UNSPEC || af == pseudo_AF_HDRCMPLT); @@ -207,28 +201,31 @@ ether_output(struct ifnet *ifp, struct m } #endif - esrc = ac->ac_enaddr; - - if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING)) - senderr(ENETDOWN); - switch (af) { case AF_INET: - error = arpresolve(ifp, rt, m, dst, edst); + error = arpresolve(ifp, rt, m, dst, eh->ether_dhost); if (error) - return (error == EAGAIN ? 0 : error); + return (error); + eh->ether_type = htons(ETHERTYPE_IP); + /* If broadcasting on a simplex interface, loopback a copy */ - if ((m->m_flags & M_BCAST) && (ifp->if_flags & IFF_SIMPLEX) && - !m->m_pkthdr.pf.routed) + if (ISSET(m->m_flags, M_BCAST) && + ISSET(ifp->if_flags, IFF_SIMPLEX) && + !m->m_pkthdr.pf.routed) { + struct mbuf *mcopy; + + /* XXX Should we input an unencrypted IPsec packet? */ mcopy = m_copym(m, 0, M_COPYALL, M_NOWAIT); - etype = htons(ETHERTYPE_IP); + if (mcopy != NULL) + if_input_local(ifp, mcopy, af); + } break; #ifdef INET6 case AF_INET6: - error = nd6_resolve(ifp, rt, m, dst, edst); + error = nd6_resolve(ifp, rt, m, dst, eh->ether_dhost); if (error) - return (error == EAGAIN ? 0 : error); - etype = htons(ETHERTYPE_IPV6); + return (error); + eh->ether_type = htons(ETHERTYPE_IPV6); break; #endif #ifdef MPLS @@ -242,63 +239,83 @@ ether_output(struct ifnet *ifp, struct m senderr(ENETUNREACH); switch (dst->sa_family) { - case AF_LINK: - if (satosdl(dst)->sdl_alen < sizeof(edst)) - senderr(EHOSTUNREACH); - memcpy(edst, LLADDR(satosdl(dst)), - sizeof(edst)); - break; - case AF_INET: - case AF_MPLS: - error = arpresolve(ifp, rt, m, dst, edst); - if (error) - return (error == EAGAIN ? 0 : error); - break; - default: + case AF_LINK: + if (satosdl(dst)->sdl_alen < sizeof(eh->ether_dhost)) senderr(EHOSTUNREACH); + memcpy(eh->ether_dhost, LLADDR(satosdl(dst)), + sizeof(eh->ether_dhost)); + break; + case AF_INET: + case AF_MPLS: + error = arpresolve(ifp, rt, m, dst, eh->ether_dhost); + if (error) + return (error); + break; + default: + senderr(EHOSTUNREACH); } /* XXX handling for simplex devices in case of M/BCAST ?? */ if (m->m_flags & (M_BCAST | M_MCAST)) - etype = htons(ETHERTYPE_MPLS_MCAST); + eh->ether_type = htons(ETHERTYPE_MPLS_MCAST); else - etype = htons(ETHERTYPE_MPLS); + eh->ether_type = htons(ETHERTYPE_MPLS); break; #endif /* MPLS */ + case pseudo_AF_HDRCMPLT: - eh = (struct ether_header *)dst->sa_data; - esrc = eh->ether_shost; - /* FALLTHROUGH */ + /* take the whole header from the sa */ + memcpy(eh, (struct ether_header *)dst->sa_data, sizeof(*eh)); + return (0); case AF_UNSPEC: - eh = (struct ether_header *)dst->sa_data; - memcpy(edst, eh->ether_dhost, sizeof(edst)); - /* AF_UNSPEC doesn't swap the byte order of the ether_type. */ - etype = eh->ether_type; + /* take the dst and type from the sa, but fall for the src */ + memcpy(eh, (struct ether_header *)dst->sa_data, sizeof(*eh)); break; default: - printf("%s: can't handle af%d\n", ifp->if_xname, - dst->sa_family); + printf("%s: can't handle af%d\n", ifp->if_xname, af); senderr(EAFNOSUPPORT); } - /* XXX Should we feed-back an unencrypted IPsec packet ? */ - if (mcopy) - if_input_local(ifp, mcopy, dst->sa_family); + memcpy(eh->ether_shost, ac->ac_enaddr, sizeof(eh->ether_shost)); + + return (0); + +bad: + m_freem(m); + return (error); +} + +/* + * Ethernet output routine. + * Encapsulate a packet of type family for the local net. + * Assumes that ifp is actually pointer to arpcom structure. + */ +int +ether_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, + struct rtentry *rt) +{ + struct ether_header eh; + int error = 0; + + error = ether_resolve(ifp, m, dst, rt, &eh); + switch (error) { + case 0: + break; + case EAGAIN: + return (0); + default: + return (error); + } - M_PREPEND(m, sizeof(struct ether_header) + ETHER_ALIGN, M_DONTWAIT); + M_PREPEND(m, ETHER_ALIGN + sizeof(eh), M_DONTWAIT); if (m == NULL) return (ENOBUFS); + m_adj(m, ETHER_ALIGN); - eh = mtod(m, struct ether_header *); - eh->ether_type = etype; - memcpy(eh->ether_dhost, edst, sizeof(eh->ether_dhost)); - memcpy(eh->ether_shost, esrc, sizeof(eh->ether_shost)); + memcpy(mtod(m, struct ether_header *), &eh, sizeof(eh)); return (if_enqueue(ifp, m)); -bad: - m_freem(m); - return (error); } /* Index: net/if_vlan.c =================================================================== RCS file: /cvs/src/sys/net/if_vlan.c,v retrieving revision 1.171 diff -u -p -r1.171 if_vlan.c --- net/if_vlan.c 24 Jan 2017 10:08:30 -0000 1.171 +++ net/if_vlan.c 18 Feb 2017 13:25:56 -0000 @@ -85,6 +85,8 @@ int vlan_clone_create(struct if_clone *, int vlan_clone_destroy(struct ifnet *); int vlan_input(struct ifnet *, struct mbuf *, void *); +int vlan_output(struct ifnet *, struct mbuf *, struct sockaddr *, + struct rtentry *); void vlan_start(struct ifqueue *ifq); int vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr); @@ -181,6 +183,7 @@ vlan_clone_create(struct if_clone *ifc, ifp->if_link_state = LINK_STATE_DOWN; if_attach(ifp); ether_ifattach(ifp); + ifp->if_output = vlan_output; ifp->if_hdrlen = EVL_ENCAPLEN; return (0); @@ -235,6 +238,105 @@ vlan_mplstunnel(int ifidx) #else return (0); #endif +} + +int +vlan_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *edst, + struct rtentry *rt) +{ + struct ifvlan *ifv; + struct ifnet *ifp0; + struct srp_ref sr; +#if NBPFILTER > 0 + caddr_t if_bpf; +#endif + struct sockaddr_storage ss; + struct sockaddr *vdst = (struct sockaddr *)&ss; + struct ether_header *eh = (struct ether_header *)vdst->sa_data; + uint16_t tag; + uint8_t prio; + int error; + + if (!ifq_priq(&ifp->if_snd)) { + /* + * user wants to delay packets, which relies on the ifq + * machinery. fall back to if_enqueue via ether_output. + */ + return (ether_output(ifp, m, edst, rt)); + } + + error = ether_resolve(ifp, m, edst, rt, eh); + switch (error) { + case 0: + break; + case EAGAIN: + return (0); + default: + return (error); + } + + ifv = ifp->if_softc; + ifp0 = if_enter(&sr, ifv->ifv_ifp0); + if (ifp0 == NULL || !ISSET(ifp0->if_flags, IFF_RUNNING)) { + m_freem(m); + error = ENETDOWN; + goto leave; + } + +#if NBPFILTER > 0 + if_bpf = ifp->if_bpf; + if (if_bpf) { + bpf_mtap_hdr(if_bpf, (caddr_t)eh, sizeof(*eh), m, + BPF_DIRECTION_OUT, NULL); + } +#endif + + /* IEEE 802.1p has prio 0 and 1 swapped */ + prio = m->m_pkthdr.pf.prio; + if (prio <= 1) + prio = !prio; + + tag = (prio << EVL_PRIO_BITS) | ifv->ifv_tag; + + /* + * If this packet came from a pseudowire it means it already + * has all tags it needs, so just output it. + */ + if (vlan_mplstunnel(m->m_pkthdr.ph_ifidx)) { + /* NOTHING */ + + /* + * If the underlying interface cannot do VLAN tag insertion + * itself, create an encapsulation header. + */ + } else if (ISSET(ifp0->if_capabilities, IFCAP_VLAN_HWTAGGING) && + ifv->ifv_type == ETHERTYPE_VLAN) { + m->m_pkthdr.ether_vtag = tag; + m->m_flags |= M_VLANTAG; + } else { + struct ether_vlan_shim *evl; + + M_PREPEND(m, sizeof(*evl), M_DONTWAIT); + if (m == NULL) { + error = ENOBUFS; + goto leave; + } + + evl = mtod(m, struct ether_vlan_shim *); + evl->evl_tag = htons(tag); + evl->evl_proto = eh->ether_type; + + eh->ether_type = htons(ifv->ifv_type); + } + + ifq_count(&ifp->if_snd, m); + + vdst->sa_family = pseudo_AF_HDRCMPLT; + error = ifp0->if_output(ifp0, m, vdst, NULL); + +leave: + if_leave(&sr, ifp0); + return (error); } void Index: net/ifq.c =================================================================== RCS file: /cvs/src/sys/net/ifq.c,v retrieving revision 1.6 diff -u -p -r1.6 ifq.c --- net/ifq.c 24 Jan 2017 03:57:35 -0000 1.6 +++ net/ifq.c 18 Feb 2017 13:25:56 -0000 @@ -251,6 +251,15 @@ ifq_destroy(struct ifqueue *ifq) ml_purge(&ml); } +static inline void +ifq_count_locked(struct ifqueue *ifq, const struct mbuf *m) +{ + ifq->ifq_packets++; + ifq->ifq_bytes += m->m_pkthdr.len; + if (ISSET(m->m_flags, M_MCAST)) + ifq->ifq_mcasts++; +} + int ifq_enqueue_try(struct ifqueue *ifq, struct mbuf *m) { @@ -261,10 +270,7 @@ ifq_enqueue_try(struct ifqueue *ifq, str if (rv == 0) { ifq->ifq_len++; - ifq->ifq_packets++; - ifq->ifq_bytes += m->m_pkthdr.len; - if (ISSET(m->m_flags, M_MCAST)) - ifq->ifq_mcasts++; + ifq_count_locked(ifq, m); } else ifq->ifq_qdrops++; mtx_leave(&ifq->ifq_mtx); @@ -290,8 +296,11 @@ ifq_deq_begin(struct ifqueue *ifq) struct mbuf *m = NULL; void *cookie; + if (ifq_empty(ifq)) + return (NULL); + mtx_enter(&ifq->ifq_mtx); - if (ifq->ifq_len == 0 || + if (ifq_empty(ifq) || (m = ifq->ifq_ops->ifqop_deq_begin(ifq, &cookie)) == NULL) { mtx_leave(&ifq->ifq_mtx); return (NULL); @@ -355,6 +364,14 @@ ifq_purge(struct ifqueue *ifq) ml_purge(&ml); return (rv); +} + +void +ifq_count(struct ifqueue *ifq, const struct mbuf *m) +{ + mtx_enter(&ifq->ifq_mtx); + ifq_count_locked(ifq, m); + mtx_leave(&ifq->ifq_mtx); } void * Index: net/ifq.h =================================================================== RCS file: /cvs/src/sys/net/ifq.h,v retrieving revision 1.9 diff -u -p -r1.9 ifq.h --- net/ifq.h 24 Jan 2017 10:08:30 -0000 1.9 +++ net/ifq.h 18 Feb 2017 13:25:56 -0000 @@ -348,6 +348,7 @@ void ifq_deq_commit(struct ifqueue *, void ifq_deq_rollback(struct ifqueue *, struct mbuf *); struct mbuf *ifq_dequeue(struct ifqueue *); unsigned int ifq_purge(struct ifqueue *); +void ifq_count(struct ifqueue *, const struct mbuf *); void *ifq_q_enter(struct ifqueue *, const struct ifq_ops *); void ifq_q_leave(struct ifqueue *, void *); void ifq_serialize(struct ifqueue *, struct task *); @@ -357,6 +358,7 @@ void ifq_barrier(struct ifqueue *); #define ifq_len(_ifq) ((_ifq)->ifq_len) #define ifq_empty(_ifq) (ifq_len(_ifq) == 0) #define ifq_set_maxlen(_ifq, _l) ((_ifq)->ifq_maxlen = (_l)) +#define ifq_priq(_ifq) ((_ifq)->ifq_ops == ifq_priq_ops) static inline void ifq_set_oactive(struct ifqueue *ifq) Index: netinet/if_ether.h =================================================================== RCS file: /cvs/src/sys/netinet/if_ether.h,v retrieving revision 1.73 diff -u -p -r1.73 if_ether.h --- netinet/if_ether.h 29 Nov 2016 10:09:57 -0000 1.73 +++ netinet/if_ether.h 18 Feb 2017 13:25:56 -0000 @@ -92,6 +92,11 @@ struct ether_vlan_header { u_int16_t evl_proto; }; +struct ether_vlan_shim { + u_int16_t evl_tag; + u_int16_t evl_proto; +}; + #define EVL_VLID_MASK 0xFFF #define EVL_VLID_NULL 0x000 /* 0x000 and 0xfff are reserved */ @@ -240,6 +245,8 @@ void ether_ifattach(struct ifnet *); void ether_ifdetach(struct ifnet *); int ether_ioctl(struct ifnet *, struct arpcom *, u_long, caddr_t); int ether_input(struct ifnet *, struct mbuf *, void *); +int ether_resolve(struct ifnet *, struct mbuf *, struct sockaddr *, + struct rtentry *, struct ether_header *); int ether_output(struct ifnet *, struct mbuf *, struct sockaddr *, struct rtentry *); void ether_rtrequest(struct ifnet *, int, struct rtentry *);