Index: netinet/udp_usrreq.c =================================================================== RCS file: /cvs/src/sys/netinet/udp_usrreq.c,v retrieving revision 1.262 diff -u -p -r1.262 udp_usrreq.c --- netinet/udp_usrreq.c 22 Aug 2020 17:54:57 -0000 1.262 +++ netinet/udp_usrreq.c 3 Mar 2021 01:35:03 -0000 @@ -112,11 +112,6 @@ #include #endif -#include "vxlan.h" -#if NVXLAN > 0 -#include -#endif - /* * UDP protocol implementation. * Per RFC 768, August, 1980. @@ -350,15 +345,6 @@ udp_input(struct mbuf **mp, int *offp, i break; #endif /* INET6 */ } - -#if NVXLAN > 0 - if (vxlan_enable > 0 && -#if NPF > 0 - !(m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) && -#endif - vxlan_lookup(m, uh, iphlen, &srcsa.sa, &dstsa.sa) != 0) - return IPPROTO_DONE; -#endif if (m->m_flags & (M_BCAST|M_MCAST)) { struct inpcb *last; Index: net/if_vxlan.c =================================================================== RCS file: /cvs/src/sys/net/if_vxlan.c,v retrieving revision 1.82 diff -u -p -r1.82 if_vxlan.c --- net/if_vxlan.c 25 Feb 2021 02:48:21 -0000 1.82 +++ net/if_vxlan.c 3 Mar 2021 01:35:03 -0000 @@ -1,7 +1,7 @@ -/* $OpenBSD: if_vxlan.c,v 1.82 2021/02/25 02:48:21 dlg Exp $ */ +/* $OpenBSD$ */ /* - * Copyright (c) 2013 Reyk Floeter + * Copyright (c) 2021 David Gwynne * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -17,475 +17,604 @@ */ #include "bpfilter.h" -#include "vxlan.h" -#include "vlan.h" #include "pf.h" -#include "bridge.h" #include #include +#include #include #include -#include #include +#include +#include +#include +#include +#include + +#include +#include #include #include +#include #include +#include #include - -#if NBPFILTER > 0 -#include -#endif +#include #include #include #include #include -#include #include -#include #include +#include -#if NPF > 0 -#include +#ifdef INET6 +#include +#include +#include #endif -#if NBRIDGE > 0 +/* for bridge stuff */ #include +#include + +#if NBPFILTER > 0 +#include #endif -#include +/* + * The protocol. + */ + +#define VXLANMTU 1492 +#define VXLAN_PORT 4789 + +struct vxlan_header { + uint32_t vxlan_flags; +#define VXLAN_F_I (1U << 27) + uint32_t vxlan_id; +#define VXLAN_VNI_SHIFT 8 +#define VXLAN_VNI_MASK (0xffffff << VXLAN_VNI_SHIFT) +}; + +#define VXLAN_VNI_MAX 0x00ffffff +#define VXLAN_VNI_MIN 0x00000000 + +/* + * The driver. + */ + +union vxlan_addr { + struct in_addr in4; + struct in6_addr in6; +}; + +struct vxlan_softc; + +struct vxlan_peer { + RBT_ENTRY(vxlan_peer) p_entry; + + unsigned int p_mask; /* do we use addr in the comparison */ + union vxlan_addr p_addr; + struct vxlan_header p_header; + + struct vxlan_softc *p_sc; +}; + +RBT_HEAD(vxlan_peers, vxlan_peer); + +struct vxlan_tep { + TAILQ_ENTRY(vxlan_tep) vt_entry; + + sa_family_t vt_af; + unsigned int vt_rdomain; + union vxlan_addr vt_addr; +#define vt_addr4 vt_addr.in4 +#define vt_addr6 vt_addr.in6 + in_port_t vt_port; + + struct socket *vt_so; + + struct mutex vt_mtx; + struct vxlan_peers vt_peers; +}; + +TAILQ_HEAD(vxlan_teps, vxlan_tep); + +enum vxlan_tunnel_mode { + VXLAN_TMODE_UNSET, + VXLAN_TMODE_P2P, /* unicast destination, no learning */ + VXLAN_TMODE_LEARNING, /* multicast destination, learning */ + VXLAN_TMODE_ENDPOINT, /* unset destination, no learning */ +}; struct vxlan_softc { struct arpcom sc_ac; - struct ifmedia sc_media; + struct etherbridge sc_eb; + + unsigned int sc_rdomain; + sa_family_t sc_af; + union vxlan_addr sc_src; + union vxlan_addr sc_dst; + in_port_t sc_port; + struct vxlan_header sc_header; + unsigned int sc_if_index0; - struct ip_moptions sc_imo; - struct task sc_atask; - struct task sc_ltask; struct task sc_dtask; - struct sockaddr_storage sc_src; - struct sockaddr_storage sc_dst; - in_port_t sc_dstport; - u_int sc_rdomain; - int64_t sc_vnetid; + enum vxlan_tunnel_mode sc_mode; + struct vxlan_peer *sc_ucast_peer; + struct vxlan_peer *sc_mcast_peer; + struct refcnt sc_refs; + uint16_t sc_df; - u_int8_t sc_ttl; + int sc_ttl; int sc_txhprio; + int sc_rxhprio; +}; - struct task sc_sendtask; +void vxlanattach(int); - LIST_ENTRY(vxlan_softc) sc_entry; -}; +static int vxlan_clone_create(struct if_clone *, int); +static int vxlan_clone_destroy(struct ifnet *); + +static void vxlan_start(struct ifnet *); -void vxlanattach(int); -int vxlanioctl(struct ifnet *, u_long, caddr_t); -void vxlanstart(struct ifnet *); -int vxlan_clone_create(struct if_clone *, int); -int vxlan_clone_destroy(struct ifnet *); -void vxlan_multicast_cleanup(struct ifnet *); -int vxlan_multicast_join(struct ifnet *, struct sockaddr *, - struct sockaddr *); -int vxlan_media_change(struct ifnet *); -void vxlan_media_status(struct ifnet *, struct ifmediareq *); -int vxlan_config(struct ifnet *, struct sockaddr *, struct sockaddr *); -int vxlan_output(struct ifnet *, struct mbuf *); -void vxlan_addr_change(void *); -void vxlan_if_change(void *); -void vxlan_link_change(void *); -void vxlan_send_dispatch(void *); +static int vxlan_ioctl(struct ifnet *, u_long, caddr_t); +static int vxlan_up(struct vxlan_softc *); +static int vxlan_down(struct vxlan_softc *); + +static struct mbuf * + vxlan_input(void *, struct mbuf *, + struct ip *, struct ip6_hdr *, void *, int); + +static int vxlan_set_rdomain(struct vxlan_softc *, const struct ifreq *); +static int vxlan_get_rdomain(struct vxlan_softc *, struct ifreq *); +static int vxlan_set_tunnel(struct vxlan_softc *, + const struct if_laddrreq *); +static int vxlan_get_tunnel(struct vxlan_softc *, struct if_laddrreq *); +static int vxlan_del_tunnel(struct vxlan_softc *); +static int vxlan_set_vnetid(struct vxlan_softc *, const struct ifreq *); +static int vxlan_get_vnetid(struct vxlan_softc *, struct ifreq *); +static int vxlan_del_vnetid(struct vxlan_softc *); +static int vxlan_set_parent(struct vxlan_softc *, + const struct if_parent *); +static int vxlan_get_parent(struct vxlan_softc *, struct if_parent *); +static int vxlan_del_parent(struct vxlan_softc *); -int vxlan_sockaddr_cmp(struct sockaddr *, struct sockaddr *); -uint16_t vxlan_sockaddr_port(struct sockaddr *); +static int vxlan_add_addr(struct vxlan_softc *, const struct ifbareq *); +static int vxlan_del_addr(struct vxlan_softc *, const struct ifbareq *); -struct if_clone vxlan_cloner = +static void vxlan_detach_hook(void *); + +static struct if_clone vxlan_cloner = IF_CLONE_INITIALIZER("vxlan", vxlan_clone_create, vxlan_clone_destroy); -int vxlan_enable = 0; -u_long vxlan_tagmask; +static int vxlan_eb_port_eq(void *, void *, void *); +static void *vxlan_eb_port_take(void *, void *); +static void vxlan_eb_port_rele(void *, void *); +static size_t vxlan_eb_port_ifname(void *, char *, size_t, void *); +static void vxlan_eb_port_sa(void *, struct sockaddr_storage *, void *); + +static const struct etherbridge_ops vxlan_etherbridge_ops = { + vxlan_eb_port_eq, + vxlan_eb_port_take, + vxlan_eb_port_rele, + vxlan_eb_port_ifname, + vxlan_eb_port_sa, +}; + +static struct rwlock vxlan_lock = RWLOCK_INITIALIZER("vteps"); +static struct vxlan_teps vxlan_teps = TAILQ_HEAD_INITIALIZER(vxlan_teps); +static struct pool vxlan_endpoint_pool; + +static inline int vxlan_peer_cmp(const struct vxlan_peer *, + const struct vxlan_peer *); -#define VXLAN_TAGHASHSIZE 32 -#define VXLAN_TAGHASH(tag) ((unsigned int)tag & vxlan_tagmask) -LIST_HEAD(vxlan_taghash, vxlan_softc) *vxlan_tagh, vxlan_any; +RBT_PROTOTYPE(vxlan_peers, vxlan_peer, p_entry, vxlan_peer_cmp); void vxlanattach(int count) { - /* Regular vxlan interfaces with a VNI */ - if ((vxlan_tagh = hashinit(VXLAN_TAGHASHSIZE, M_DEVBUF, M_NOWAIT, - &vxlan_tagmask)) == NULL) - panic("vxlanattach: hashinit"); - - /* multipoint-to-multipoint interfaces that accept any VNI */ - LIST_INIT(&vxlan_any); - if_clone_attach(&vxlan_cloner); } -int +static int vxlan_clone_create(struct if_clone *ifc, int unit) { - struct ifnet *ifp; - struct vxlan_softc *sc; + struct vxlan_softc *sc; + struct ifnet *ifp; + int error; + + if (vxlan_endpoint_pool.pr_size == 0) { + pool_init(&vxlan_endpoint_pool, sizeof(union vxlan_addr), + 0, IPL_SOFTNET, 0, "vxlanep", NULL); + } - sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO); - sc->sc_imo.imo_membership = mallocarray(IP_MIN_MEMBERSHIPS, - sizeof(struct in_multi *), M_IPMOPTS, M_WAITOK|M_ZERO); - sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS; - sc->sc_dstport = htons(VXLAN_PORT); - sc->sc_vnetid = VXLAN_VNI_UNSET; - sc->sc_txhprio = IFQ_TOS2PRIO(IPTOS_PREC_ROUTINE); /* 0 */ - sc->sc_df = htons(0); - task_set(&sc->sc_atask, vxlan_addr_change, sc); - task_set(&sc->sc_ltask, vxlan_link_change, sc); - task_set(&sc->sc_dtask, vxlan_if_change, sc); - task_set(&sc->sc_sendtask, vxlan_send_dispatch, sc); + sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO|M_CANFAIL); + if (sc == NULL) + return (ENOMEM); ifp = &sc->sc_ac.ac_if; - snprintf(ifp->if_xname, sizeof ifp->if_xname, "vxlan%d", unit); - ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; - ether_fakeaddr(ifp); - ifp->if_softc = sc; - ifp->if_ioctl = vxlanioctl; - ifp->if_start = vxlanstart; + snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d", + ifc->ifc_name, unit); + + error = etherbridge_init(&sc->sc_eb, ifp->if_xname, + &vxlan_etherbridge_ops, sc); + if (error == -1) { + free(sc, M_DEVBUF, sizeof(*sc)); + return (error); + } + + sc->sc_af = AF_UNSPEC; + sc->sc_txhprio = IF_HDRPRIO_PACKET; + sc->sc_rxhprio = IF_HDRPRIO_OUTER; + sc->sc_df = 0; + sc->sc_ttl = IP_DEFAULT_MULTICAST_TTL; + + task_set(&sc->sc_dtask, vxlan_detach_hook, sc); + refcnt_init(&sc->sc_refs); + ifp->if_softc = sc; ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN; - ifp->if_capabilities = IFCAP_VLAN_MTU; + ifp->if_ioctl = vxlan_ioctl; + ifp->if_start = vxlan_start; + ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST | IFF_SIMPLEX; ifp->if_xflags = IFXF_CLONED; - - ifmedia_init(&sc->sc_media, 0, vxlan_media_change, - vxlan_media_status); - ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL); - ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO); + ether_fakeaddr(ifp); if_counters_alloc(ifp); if_attach(ifp); ether_ifattach(ifp); -#if 0 - /* - * Instead of using a decreased MTU of 1450 bytes, prefer - * to use the default Ethernet-size MTU of 1500 bytes and to - * increase the MTU of the outer transport interfaces to - * at least 1550 bytes. The following is disabled by default. - */ - ifp->if_mtu = ETHERMTU - sizeof(struct ether_header); - ifp->if_mtu -= sizeof(struct vxlanudphdr) + sizeof(struct ipovly); -#endif - - LIST_INSERT_HEAD(&vxlan_tagh[VXLAN_TAGHASH(0)], sc, sc_entry); - vxlan_enable++; - return (0); } -int +static int vxlan_clone_destroy(struct ifnet *ifp) { - struct vxlan_softc *sc = ifp->if_softc; + struct vxlan_softc *sc = ifp->if_softc; NET_LOCK(); - vxlan_multicast_cleanup(ifp); + if (ISSET(ifp->if_flags, IFF_RUNNING)) + vxlan_down(sc); NET_UNLOCK(); - vxlan_enable--; - LIST_REMOVE(sc, sc_entry); - - ifmedia_delete_instance(&sc->sc_media, IFM_INST_ANY); ether_ifdetach(ifp); if_detach(ifp); - if (!task_del(net_tq(ifp->if_index), &sc->sc_sendtask)) - taskq_barrier(net_tq(ifp->if_index)); + etherbridge_destroy(&sc->sc_eb); + + refcnt_finalize(&sc->sc_refs, "vxlanfini"); - free(sc->sc_imo.imo_membership, M_IPMOPTS, - sc->sc_imo.imo_max_memberships * sizeof(struct in_multi *)); free(sc, M_DEVBUF, sizeof(*sc)); return (0); } -void -vxlan_multicast_cleanup(struct ifnet *ifp) +static struct vxlan_softc * +vxlan_take(struct vxlan_softc *sc) +{ + refcnt_take(&sc->sc_refs); + return (sc); +} + +static void +vxlan_rele(struct vxlan_softc *sc) { - struct vxlan_softc *sc = (struct vxlan_softc *)ifp->if_softc; - struct ip_moptions *imo = &sc->sc_imo; - struct ifnet *mifp; + refcnt_rele_wake(&sc->sc_refs); +} - mifp = if_get(imo->imo_ifidx); - if (mifp != NULL) { - if_addrhook_del(mifp, &sc->sc_atask); - if_linkstatehook_del(mifp, &sc->sc_ltask); - if_detachhook_del(mifp, &sc->sc_dtask); +static void +vxlan_transmit(struct vxlan_softc *sc, struct mbuf *m) +{ + struct mbuf *m0; + union vxlan_addr gateway; + const union vxlan_addr *endpoint; + struct vxlan_header *vh; + struct udphdr *uh; + int prio; + uint8_t tos; - if_put(mifp); - } + if (sc->sc_mode == VXLAN_TMODE_UNSET) + goto drop; - if (imo->imo_num_memberships > 0) { - in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); - imo->imo_ifidx = 0; + if (sc->sc_mode == VXLAN_TMODE_P2P) + endpoint = &sc->sc_dst; + else { /* VXLAN_TMODE_LEARNING || VXLAN_TMODE_ENDPOINT */ + struct ether_header *eh = mtod(m, struct ether_header *); + + smr_read_enter(); + endpoint = etherbridge_resolve_ea(&sc->sc_eb, + (struct ether_addr *)eh->ether_dhost); + if (endpoint != NULL) { + gateway = *endpoint; + endpoint = &gateway; + } + smr_read_leave(); + + if (endpoint == NULL) { + if (sc->sc_mode == VXLAN_TMODE_ENDPOINT) + goto drop; + + /* "flood" to unknown destinations */ + endpoint = &sc->sc_dst; + } } -} -int -vxlan_multicast_join(struct ifnet *ifp, struct sockaddr *src, - struct sockaddr *dst) -{ - struct vxlan_softc *sc = ifp->if_softc; - struct ip_moptions *imo = &sc->sc_imo; - struct sockaddr_in *src4, *dst4; -#ifdef INET6 - struct sockaddr_in6 *dst6; -#endif /* INET6 */ - struct ifaddr *ifa; - struct ifnet *mifp; + /* force prepend mbuf because of payload alignment */ + m0 = m_get(M_DONTWAIT, m->m_type); + if (m0 == NULL) + goto drop; - switch (dst->sa_family) { - case AF_INET: - dst4 = satosin(dst); - if (!IN_MULTICAST(dst4->sin_addr.s_addr)) - return (0); - break; -#ifdef INET6 - case AF_INET6: - dst6 = satosin6(dst); - if (!IN6_IS_ADDR_MULTICAST(&dst6->sin6_addr)) - return (0); + m_align(m0, 0); + m0->m_len = 0; - /* Multicast mode is currently not supported for IPv6 */ - return (EAFNOSUPPORT); -#endif /* INET6 */ - default: - return (EAFNOSUPPORT); + M_MOVE_PKTHDR(m0, m); + m0->m_next = m; + + m = m_prepend(m0, sizeof(*vh), M_DONTWAIT); + if (m == NULL) + return; + + vh = mtod(m, struct vxlan_header *); + *vh = sc->sc_header; + + m = m_prepend(m, sizeof(*uh), M_DONTWAIT); + if (m == NULL) + return; + + uh = mtod(m, struct udphdr *); + uh->uh_sport = sc->sc_port; /* XXX */ + uh->uh_dport = sc->sc_port; + htobem16(&uh->uh_ulen, m->m_pkthdr.len); + uh->uh_sum = htons(0); + + prio = sc->sc_txhprio; + if (prio == IF_HDRPRIO_PACKET) + prio = m->m_pkthdr.pf.prio; + tos = IFQ_PRIO2TOS(prio); + + switch (sc->sc_af) { + case AF_INET: { + struct ip *ip; + + m = m_prepend(m, sizeof(*ip), M_DONTWAIT); + if (m == NULL) + return; + + ip = mtod(m, struct ip *); + ip->ip_v = IPVERSION; + ip->ip_hl = sizeof(*ip) >> 2; + ip->ip_off = sc->sc_df; + ip->ip_tos = tos; + ip->ip_len = htons(m->m_pkthdr.len); + ip->ip_ttl = sc->sc_ttl; + ip->ip_p = IPPROTO_UDP; + ip->ip_src = sc->sc_src.in4; + ip->ip_dst = endpoint->in4; + break; } +#ifdef INET6 + case AF_INET6: { + struct ip6_hdr *ip6; + int len = m->m_pkthdr.len; - src4 = satosin(src); - dst4 = satosin(dst); + m = m_prepend(m, sizeof(*ip6), M_DONTWAIT); + if (m == NULL) + return; - if (src4->sin_addr.s_addr == INADDR_ANY || - IN_MULTICAST(src4->sin_addr.s_addr)) - return (EINVAL); - if ((ifa = ifa_ifwithaddr(src, sc->sc_rdomain)) == NULL || - (mifp = ifa->ifa_ifp) == NULL || - (mifp->if_flags & IFF_MULTICAST) == 0) - return (EADDRNOTAVAIL); + ip6 = mtod(m, struct ip6_hdr *); + ip6->ip6_flow = ISSET(m->m_pkthdr.csum_flags, M_FLOWID) ? + htonl(m->m_pkthdr.ph_flowid) : 0; + ip6->ip6_vfc |= IPV6_VERSION; + ip6->ip6_flow |= htonl((uint32_t)tos << 20); + ip6->ip6_plen = htons(len); + ip6->ip6_nxt = IPPROTO_UDP; + ip6->ip6_hlim = sc->sc_ttl; + ip6->ip6_src = sc->sc_src.in6; + ip6->ip6_dst = endpoint->in6; - if ((imo->imo_membership[0] = - in_addmulti(&dst4->sin_addr, mifp)) == NULL) - return (ENOBUFS); - - imo->imo_num_memberships++; - imo->imo_ifidx = mifp->if_index; - if (sc->sc_ttl > 0) - imo->imo_ttl = sc->sc_ttl; - else - imo->imo_ttl = IP_DEFAULT_MULTICAST_TTL; - imo->imo_loop = 0; + if (sc->sc_df) + SET(m->m_pkthdr.csum_flags, M_IPV6_DF_OUT); - /* - * Use interface hooks to track any changes on the interface - * that is used to send out the tunnel traffic as multicast. - */ - if_addrhook_add(mifp, &sc->sc_atask); - if_linkstatehook_add(mifp, &sc->sc_ltask); - if_detachhook_add(mifp, &sc->sc_dtask); + break; + } +#endif /* INET6 */ + default: + unhandled_af(sc->sc_af); + } - return (0); +drop: + m_freem(m); } -void -vxlanstart(struct ifnet *ifp) +static void +vxlan_start(struct ifnet *ifp) { - struct vxlan_softc *sc = (struct vxlan_softc *)ifp->if_softc; + struct vxlan_softc *sc = ifp->if_softc; + struct mbuf *m; - task_add(net_tq(ifp->if_index), &sc->sc_sendtask); + while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) + vxlan_transmit(sc, m); } -void -vxlan_send_dispatch(void *xsc) +static struct mbuf * +vxlan_input(void *arg, struct mbuf *m, struct ip *ip, struct ip6_hdr *ip6, + void *uhp, int hlen) { - struct vxlan_softc *sc = xsc; - struct ifnet *ifp = &sc->sc_ac.ac_if; - struct mbuf *m; - struct mbuf_list ml; - - ml_init(&ml); - for (;;) { - m = ifq_dequeue(&ifp->if_snd); - if (m == NULL) - break; + struct vxlan_tep *vt = arg; + struct vxlan_peer key, *p; + struct udphdr *uh; + struct vxlan_header *vh; + struct ether_header *eh; + int vhlen = hlen + sizeof(*vh); + struct mbuf *n; + int off; + in_port_t port; + struct vxlan_softc *sc = NULL; + struct ifnet *ifp; -#if NBPFILTER > 0 - if (ifp->if_bpf) - bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT); + if (m->m_pkthdr.len < vhlen) + goto drop; + + uh = uhp; + port = uh->uh_sport; + + memset(&key, 0, sizeof(key)); + key.p_mask = 0; + + if (ip != NULL) + key.p_addr.in4 = ip->ip_src; +#ifdef INET6 + else + key.p_addr.in6 = ip6->ip6_src; #endif - ml_enqueue(&ml, m); + if (m->m_len < vhlen) { + m = m_pullup(m, vhlen); + if (m == NULL) + return (NULL); } - if (ml_empty(&ml)) - return; + vh = (struct vxlan_header *)(mtod(m, caddr_t) + hlen); + key.p_header.vxlan_flags = vh->vxlan_flags & htonl(VXLAN_F_I); + key.p_header.vxlan_id = vh->vxlan_id & htonl(VXLAN_VNI_MASK); - NET_LOCK(); - while ((m = ml_dequeue(&ml)) != NULL) { - vxlan_output(ifp, m); - } - NET_UNLOCK(); -} + mtx_enter(&vt->vt_mtx); + p = RBT_FIND(vxlan_peers, &vt->vt_peers, &key); + if (p != NULL) + sc = vxlan_take(p->p_sc); + mtx_leave(&vt->vt_mtx); + if (sc == NULL) + goto drop; -int -vxlan_config(struct ifnet *ifp, struct sockaddr *src, struct sockaddr *dst) -{ - struct vxlan_softc *sc = (struct vxlan_softc *)ifp->if_softc; - int reset = 0, error, af; - socklen_t slen; - in_port_t port; - struct vxlan_taghash *tagh; - - if (src != NULL && dst != NULL) { - if ((af = src->sa_family) != dst->sa_family) - return (EAFNOSUPPORT); - } else { - /* Reset current configuration */ - af = sc->sc_src.ss_family; - src = sstosa(&sc->sc_src); - dst = sstosa(&sc->sc_dst); - reset = 1; - } + ifp = &sc->sc_ac.ac_if; + if (ISSET(ifp->if_flags, IFF_LINK0) && port != sc->sc_port) + goto drop; - switch (af) { - case AF_INET: - slen = sizeof(struct sockaddr_in); - break; -#ifdef INET6 - case AF_INET6: - slen = sizeof(struct sockaddr_in6); - break; -#endif /* INET6 */ - default: - return (EAFNOSUPPORT); - } + m_adj(m, vhlen); - if (src->sa_len != slen || dst->sa_len != slen) - return (EINVAL); + if (m->m_pkthdr.len < sizeof(*eh)) + goto drop; - vxlan_multicast_cleanup(ifp); + m = m_pullup(m, sizeof(*eh)); + if (m == NULL) + return (NULL); - /* returns without error if multicast is not configured */ - if ((error = vxlan_multicast_join(ifp, src, dst)) != 0) - return (error); + n = m_getptr(m, sizeof(*eh), &off); + if (n == NULL) + goto drop; - if ((port = vxlan_sockaddr_port(dst)) != 0) - sc->sc_dstport = port; + if (!ALIGNED_POINTER(mtod(n, caddr_t) + off, uint32_t)) { + n = m_dup_pkt(m, ETHER_ALIGN, M_NOWAIT); + m_freem(m); + if (n == NULL) + return (NULL); + m = n; + } - if (!reset) { - bzero(&sc->sc_src, sizeof(sc->sc_src)); - bzero(&sc->sc_dst, sizeof(sc->sc_dst)); - memcpy(&sc->sc_src, src, src->sa_len); - memcpy(&sc->sc_dst, dst, dst->sa_len); + if (sc->sc_mode == VXLAN_TMODE_LEARNING) { + eh = mtod(m, struct ether_header *); + etherbridge_map_ea(&sc->sc_eb, &key.p_addr, + (struct ether_addr *)eh->ether_shost); } - if (sc->sc_vnetid == VXLAN_VNI_ANY) { - /* - * If the interface accepts any VNI, put it into a separate - * list that is not part of the main hash. - */ - tagh = &vxlan_any; - } else - tagh = &vxlan_tagh[VXLAN_TAGHASH(sc->sc_vnetid)]; + /* XXX prio */ - LIST_REMOVE(sc, sc_entry); - LIST_INSERT_HEAD(tagh, sc, sc_entry); + if_vinput(ifp, m); - return (0); + return (NULL); + +drop: + m_freem(m); + return (NULL); } -int -vxlanioctl(struct ifnet *ifp, u_long cmd, caddr_t data) +static int +vxlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { - struct vxlan_softc *sc = (struct vxlan_softc *)ifp->if_softc; - struct ifreq *ifr = (struct ifreq *)data; - struct if_laddrreq *lifr = (struct if_laddrreq *)data; - int error = 0; + struct vxlan_softc *sc = ifp->if_softc; + struct ifreq *ifr = (struct ifreq *)data; + struct ifbrparam *bparam = (struct ifbrparam *)data; + int error = 0; switch (cmd) { case SIOCSIFADDR: - ifp->if_flags |= IFF_UP; - /* FALLTHROUGH */ - + break; case SIOCSIFFLAGS: - if (ifp->if_flags & IFF_UP) { - ifp->if_flags |= IFF_RUNNING; + if (ISSET(ifp->if_flags, IFF_UP)) { + if (!ISSET(ifp->if_flags, IFF_RUNNING)) + error = vxlan_up(sc); + else + error = 0; } else { - ifp->if_flags &= ~IFF_RUNNING; + if (ISSET(ifp->if_flags, IFF_RUNNING)) + error = vxlan_down(sc); } break; - case SIOCADDMULTI: - case SIOCDELMULTI: + case SIOCSLIFPHYRTABLE: + error = vxlan_set_rdomain(sc, ifr); break; - - case SIOCGIFMEDIA: - case SIOCSIFMEDIA: - error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd); + case SIOCGLIFPHYRTABLE: + error = vxlan_get_rdomain(sc, ifr); break; case SIOCSLIFPHYADDR: - error = vxlan_config(ifp, - sstosa(&lifr->addr), - sstosa(&lifr->dstaddr)); + error = vxlan_set_tunnel(sc, (const struct if_laddrreq *)data); + break; + case SIOCGLIFPHYADDR: + error = vxlan_get_tunnel(sc, (struct if_laddrreq *)data); break; - case SIOCDIFPHYADDR: - vxlan_multicast_cleanup(ifp); - bzero(&sc->sc_src, sizeof(sc->sc_src)); - bzero(&sc->sc_dst, sizeof(sc->sc_dst)); - sc->sc_dstport = htons(VXLAN_PORT); + error = vxlan_del_tunnel(sc); break; - case SIOCGLIFPHYADDR: - if (sc->sc_dst.ss_family == AF_UNSPEC) { - error = EADDRNOTAVAIL; - break; - } - bzero(&lifr->addr, sizeof(lifr->addr)); - bzero(&lifr->dstaddr, sizeof(lifr->dstaddr)); - memcpy(&lifr->addr, &sc->sc_src, sc->sc_src.ss_len); - memcpy(&lifr->dstaddr, &sc->sc_dst, sc->sc_dst.ss_len); + case SIOCSVNETID: + error = vxlan_set_vnetid(sc, ifr); break; - - case SIOCSLIFPHYRTABLE: - if (ifr->ifr_rdomainid < 0 || - ifr->ifr_rdomainid > RT_TABLEID_MAX || - !rtable_exists(ifr->ifr_rdomainid)) { - error = EINVAL; - break; - } - sc->sc_rdomain = ifr->ifr_rdomainid; - (void)vxlan_config(ifp, NULL, NULL); + case SIOCGVNETID: + error = vxlan_get_vnetid(sc, ifr); + break; + case SIOCDVNETID: + error = vxlan_del_vnetid(sc); break; - case SIOCGLIFPHYRTABLE: - ifr->ifr_rdomainid = sc->sc_rdomain; + case SIOCSIFPARENT: + error = vxlan_set_parent(sc, (struct if_parent *)data); + break; + case SIOCGIFPARENT: + error = vxlan_get_parent(sc, (struct if_parent *)data); + break; + case SIOCDIFPARENT: + error = vxlan_del_parent(sc); break; - case SIOCSLIFPHYTTL: - if (ifr->ifr_ttl < 0 || ifr->ifr_ttl > 0xff) { - error = EINVAL; - break; - } - if (sc->sc_ttl == (u_int8_t)ifr->ifr_ttl) + case SIOCSTXHPRIO: + error = if_txhprio_l2_check(ifr->ifr_hdrprio); + if (error != 0) break; - sc->sc_ttl = (u_int8_t)(ifr->ifr_ttl); - (void)vxlan_config(ifp, NULL, NULL); + + sc->sc_txhprio = ifr->ifr_hdrprio; + break; + case SIOCGTXHPRIO: + ifr->ifr_hdrprio = sc->sc_txhprio; break; - case SIOCGLIFPHYTTL: - ifr->ifr_ttl = (int)sc->sc_ttl; + case SIOCSRXHPRIO: + error = if_rxhprio_l2_check(ifr->ifr_hdrprio); + if (error != 0) + break; + + sc->sc_rxhprio = ifr->ifr_hdrprio; + break; + case SIOCGRXHPRIO: + ifr->ifr_hdrprio = sc->sc_rxhprio; break; case SIOCSLIFPHYDF: @@ -496,50 +625,45 @@ vxlanioctl(struct ifnet *ifp, u_long cmd ifr->ifr_df = sc->sc_df ? 1 : 0; break; - case SIOCSTXHPRIO: - if (ifr->ifr_hdrprio == IF_HDRPRIO_PACKET) - ; /* fall through */ - else if (ifr->ifr_hdrprio < IF_HDRPRIO_MIN || - ifr->ifr_hdrprio > IF_HDRPRIO_MAX) { + case SIOCSLIFPHYTTL: + if (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff) { error = EINVAL; break; } - sc->sc_txhprio = ifr->ifr_hdrprio; + /* commit */ + sc->sc_ttl = (uint8_t)ifr->ifr_ttl; break; - case SIOCGTXHPRIO: - ifr->ifr_hdrprio = sc->sc_txhprio; + case SIOCGLIFPHYTTL: + ifr->ifr_ttl = (int)sc->sc_ttl; break; - case SIOCSVNETID: - if (sc->sc_vnetid == ifr->ifr_vnetid) - break; - - if ((ifr->ifr_vnetid != VXLAN_VNI_ANY) && - (ifr->ifr_vnetid > VXLAN_VNI_MAX || - ifr->ifr_vnetid < VXLAN_VNI_MIN)) { - error = EINVAL; - break; - } - - sc->sc_vnetid = (int)ifr->ifr_vnetid; - (void)vxlan_config(ifp, NULL, NULL); + case SIOCBRDGSCACHE: + error = etherbridge_set_max(&sc->sc_eb, bparam); break; - - case SIOCGVNETID: - if ((sc->sc_vnetid != VXLAN_VNI_ANY) && - (sc->sc_vnetid > VXLAN_VNI_MAX || - sc->sc_vnetid < VXLAN_VNI_MIN)) { - error = EADDRNOTAVAIL; - break; - } - - ifr->ifr_vnetid = sc->sc_vnetid; + case SIOCBRDGGCACHE: + error = etherbridge_get_max(&sc->sc_eb, bparam); + break; + case SIOCBRDGSTO: + error = etherbridge_set_tmo(&sc->sc_eb, bparam); + break; + case SIOCBRDGGTO: + error = etherbridge_get_tmo(&sc->sc_eb, bparam); break; - case SIOCDVNETID: - sc->sc_vnetid = VXLAN_VNI_UNSET; - (void)vxlan_config(ifp, NULL, NULL); + case SIOCBRDGRTS: + error = etherbridge_rtfind(&sc->sc_eb, + (struct ifbaconf *)data); + break; + case SIOCBRDGFLUSH: + etherbridge_flush(&sc->sc_eb, + ((struct ifbreq *)data)->ifbr_ifsflags); + break; + case SIOCBRDGSADDR: + error = vxlan_add_addr(sc, (struct ifbareq *)data); + break; + case SIOCBRDGDADDR: + error = vxlan_del_addr(sc, (struct ifbareq *)data); break; default: @@ -550,465 +674,879 @@ vxlanioctl(struct ifnet *ifp, u_long cmd return (error); } -int -vxlan_media_change(struct ifnet *ifp) +static struct vxlan_tep * +vxlan_tep_get(struct vxlan_softc *sc, const union vxlan_addr *addr) { - return (0); -} + struct vxlan_tep *vt; -void -vxlan_media_status(struct ifnet *ifp, struct ifmediareq *imr) -{ - imr->ifm_status = IFM_AVALID | IFM_ACTIVE; + TAILQ_FOREACH(vt, &vxlan_teps, vt_entry) { + if (sc->sc_af == vt->vt_af && + sc->sc_rdomain == vt->vt_rdomain && + memcmp(addr, &vt->vt_addr, sizeof(*addr)) == 0 && + sc->sc_port == vt->vt_port) + return (vt); + } + + return (NULL); } -int -vxlan_sockaddr_cmp(struct sockaddr *srcsa, struct sockaddr *dstsa) +static int +vxlan_tep_add_addr(struct vxlan_softc *sc, const union vxlan_addr *addr, + struct vxlan_peer *p) { - struct sockaddr_in *src4, *dst4; + struct mbuf m; + struct vxlan_tep *vt; + struct socket *so; + struct sockaddr_in *sin; #ifdef INET6 - struct sockaddr_in6 *src6, *dst6; -#endif /* INET6 */ + struct sockaddr_in6 *sin6; +#endif + int error; +#ifdef notyet + int s; +#endif - if (srcsa->sa_family != dstsa->sa_family) - return (1); + vt = vxlan_tep_get(sc, addr); + if (vt != NULL) { + struct vxlan_peer *op; + + mtx_enter(&vt->vt_mtx); + op = RBT_INSERT(vxlan_peers, &vt->vt_peers, p); + mtx_leave(&vt->vt_mtx); + + if (op != NULL) + return (EADDRINUSE); + + return (0); + } + + vt = malloc(sizeof(*vt), M_DEVBUF, M_NOWAIT|M_ZERO); + if (vt == NULL) + return (ENOMEM); + + vt->vt_af = sc->sc_af; + vt->vt_rdomain = sc->sc_rdomain; + vt->vt_addr = *addr; + vt->vt_port = sc->sc_port; + + mtx_init(&vt->vt_mtx, IPL_SOFTNET); + RBT_INIT(vxlan_peers, &vt->vt_peers); + RBT_INSERT(vxlan_peers, &vt->vt_peers, p); + + error = socreate(vt->vt_af, &so, SOCK_DGRAM, IPPROTO_UDP); + if (error != 0) + goto free; + +#ifdef notyet + s = solock(so); +#else + soassertlocked(so); +#endif - switch (dstsa->sa_family) { + sotoinpcb(so)->inp_upcall = vxlan_input; + sotoinpcb(so)->inp_upcall_arg = vt; + + m_inithdr(&m); + m.m_len = sizeof(vt->vt_rdomain); + *mtod(&m, unsigned int *) = vt->vt_rdomain; + error = sosetopt(so, SOL_SOCKET, SO_RTABLE, &m); + if (error != 0) + goto close; + + m_inithdr(&m); + switch (vt->vt_af) { case AF_INET: - src4 = satosin(srcsa); - dst4 = satosin(dstsa); - if (src4->sin_addr.s_addr == dst4->sin_addr.s_addr) - return (0); + sin = mtod(&m, struct sockaddr_in *); + memset(sin, 0, sizeof(*sin)); + sin->sin_len = sizeof(*sin); + sin->sin_family = AF_INET; + sin->sin_addr = addr->in4; + sin->sin_port = vt->vt_port; break; + #ifdef INET6 case AF_INET6: - src6 = satosin6(srcsa); - dst6 = satosin6(dstsa); - if (IN6_ARE_ADDR_EQUAL(&src6->sin6_addr, &dst6->sin6_addr) && - src6->sin6_scope_id == dst6->sin6_scope_id) - return (0); + sin6 = mtod(&m, struct sockaddr_in6 *); + sin6->sin6_len = sizeof(*sin6); + sin6->sin6_family = AF_INET6; + in6_recoverscope(sin6, &addr->in6); + sin6->sin6_port = sc->sc_port; break; -#endif /* INET6 */ +#endif + default: + unhandled_af(vt->vt_af); } - return (1); + error = sobind(so, &m, curproc); + if (error != 0) + goto close; + +#ifdef notyet + sounlock(so, s); +#endif + + rw_assert_wrlock(&vxlan_lock); + TAILQ_INSERT_TAIL(&vxlan_teps, vt, vt_entry); + + vt->vt_so = so; + + return (0); + +close: +#ifdef notyet + sounlock(so, s); +#endif + soclose(so, MSG_DONTWAIT); +free: + free(vt, M_DEVBUF, sizeof(*vt)); + return (error); } -uint16_t -vxlan_sockaddr_port(struct sockaddr *sa) +static void +vxlan_tep_del_addr(struct vxlan_softc *sc, const union vxlan_addr *addr, + struct vxlan_peer *p) { - struct sockaddr_in *sin4; -#ifdef INET6 - struct sockaddr_in6 *sin6; -#endif /* INET6 */ + struct vxlan_tep *vt; + int empty; - switch (sa->sa_family) { - case AF_INET: - sin4 = satosin(sa); - return (sin4->sin_port); -#ifdef INET6 - case AF_INET6: - sin6 = satosin6(sa); - return (sin6->sin6_port); -#endif /* INET6 */ - default: - break; - } + vt = vxlan_tep_get(sc, addr); + if (vt == NULL) + panic("unable to find vxlan_tep for peer %p (sc %p)", p, sc); + + mtx_enter(&vt->vt_mtx); + RBT_REMOVE(vxlan_peers, &vt->vt_peers, p); + empty = RBT_EMPTY(vxlan_peers, &vt->vt_peers); + mtx_leave(&vt->vt_mtx); - return (0); + if (!empty) + return; + + rw_assert_wrlock(&vxlan_lock); + TAILQ_REMOVE(&vxlan_teps, vt, vt_entry); + + soclose(vt->vt_so, MSG_DONTWAIT); + free(vt, M_DEVBUF, sizeof(*vt)); } -int -vxlan_lookup(struct mbuf *m, struct udphdr *uh, int iphlen, - struct sockaddr *srcsa, struct sockaddr *dstsa) -{ - struct vxlan_softc *sc = NULL, *sc_cand = NULL; - struct vxlan_header v; - int vni; - struct ifnet *ifp; - int skip; -#if NBRIDGE > 0 - struct bridge_tunneltag *brtag; -#endif - struct mbuf *n; - int off; - - /* XXX Should verify the UDP port first before copying the packet */ - skip = iphlen + sizeof(*uh); - if (m->m_pkthdr.len - skip < sizeof(v)) - return (0); - m_copydata(m, skip, sizeof(v), &v); - skip += sizeof(v); +static int +vxlan_tep_up(struct vxlan_softc *sc) +{ + struct vxlan_peer *up, *mp; + int error; + + up = malloc(sizeof(*up), M_DEVBUF, M_NOWAIT|M_ZERO); + if (up == NULL) + return (ENOMEM); + + up->p_mask = (sc->sc_mode != VXLAN_TMODE_P2P); + up->p_addr = sc->sc_dst; + up->p_header = sc->sc_header; + up->p_sc = vxlan_take(sc); + + error = vxlan_tep_add_addr(sc, &sc->sc_src, up); + if (error != 0) + goto freeup; + + sc->sc_ucast_peer = up; - if (v.vxlan_flags & htonl(VXLAN_RESERVED1) || - v.vxlan_id & htonl(VXLAN_RESERVED2)) + if (sc->sc_mode != VXLAN_TMODE_LEARNING) return (0); - vni = ntohl(v.vxlan_id) >> VXLAN_VNI_S; - if ((v.vxlan_flags & htonl(VXLAN_FLAGS_VNI)) == 0) { - if (vni != 0) - return (0); + mp = malloc(sizeof(*mp), M_DEVBUF, M_NOWAIT|M_ZERO); + if (mp == NULL) { + error = ENOMEM; + goto delup; + } + + mp->p_mask = 1; + /* addr is masked, leave it as 0s */ + mp->p_header = sc->sc_header; + mp->p_sc = vxlan_take(sc); + + /* destination address is a multicast group we want to join */ + error = vxlan_tep_add_addr(sc, &sc->sc_dst, up); + if (error != 0) + goto freemp; + + sc->sc_mcast_peer = mp; + + return (0); - vni = VXLAN_VNI_UNSET; +freemp: + vxlan_rele(mp->p_sc); + free(mp, M_DEVBUF, sizeof(*mp)); +delup: + vxlan_tep_del_addr(sc, &sc->sc_src, up); +freeup: + vxlan_rele(up->p_sc); + free(up, M_DEVBUF, sizeof(*up)); + return (error); +} + +static void +vxlan_tep_down(struct vxlan_softc *sc) +{ + struct vxlan_peer *up = sc->sc_ucast_peer; + + if (sc->sc_mode == VXLAN_TMODE_LEARNING) { + struct vxlan_peer *mp = sc->sc_mcast_peer; + vxlan_tep_del_addr(sc, &sc->sc_dst, mp); + vxlan_rele(mp->p_sc); + free(mp, M_DEVBUF, sizeof(*mp)); } + vxlan_tep_del_addr(sc, &sc->sc_src, up); + vxlan_rele(up->p_sc); + free(up, M_DEVBUF, sizeof(*up)); +} + +static int +vxlan_up(struct vxlan_softc *sc) +{ + struct ifnet *ifp = &sc->sc_ac.ac_if; + struct ifnet *ifp0 = NULL; + int error; + + KASSERT(!ISSET(ifp->if_flags, IFF_RUNNING)); NET_ASSERT_LOCKED(); - /* First search for a vxlan(4) interface with the packet's VNI */ - LIST_FOREACH(sc, &vxlan_tagh[VXLAN_TAGHASH(vni)], sc_entry) { - if ((uh->uh_dport == sc->sc_dstport) && - vni == sc->sc_vnetid && - sc->sc_rdomain == rtable_l2(m->m_pkthdr.ph_rtableid)) { - sc_cand = sc; - if (vxlan_sockaddr_cmp(srcsa, sstosa(&sc->sc_dst)) == 0) - goto found; - } + + if (sc->sc_af == AF_UNSPEC) + return (EDESTADDRREQ); + KASSERT(sc->sc_mode != VXLAN_TMODE_UNSET); + + if (sc->sc_mode != VXLAN_TMODE_P2P) { + error = etherbridge_up(&sc->sc_eb); + if (error != 0) + return (error); } - /* - * Now loop through all the vxlan(4) interfaces that are configured - * to accept any VNI and operating in multipoint-to-multipoint mode - * that is used in combination with bridge(4) or switch(4). - * If a vxlan(4) interface has been found for the packet's VNI, this - * code is not reached as the other interface is more specific. - */ - LIST_FOREACH(sc, &vxlan_any, sc_entry) { - if ((uh->uh_dport == sc->sc_dstport) && - (sc->sc_rdomain == rtable_l2(m->m_pkthdr.ph_rtableid))) { - sc_cand = sc; - goto found; + if (sc->sc_mode == VXLAN_TMODE_LEARNING) { + ifp0 = if_get(sc->sc_if_index0); + if (ifp0 == NULL) { + error = ENXIO; + goto down; + } + + /* check again if multicast will work on top of the parent */ + if (!ISSET(ifp0->if_flags, IFF_MULTICAST)) { + error = EPROTONOSUPPORT; + goto put; } - } - if (sc_cand) { - sc = sc_cand; - goto found; + /* Register callback if parent wants to unregister */ + if_detachhook_add(ifp0, &sc->sc_dtask); + } else { + if (sc->sc_if_index0 != 0) { + error = EPROTONOSUPPORT; + goto down; + } } - /* not found */ + error = rw_enter(&vxlan_lock, RW_WRITE|RW_INTR); + if (error != 0) + goto hook_del; + + error = vxlan_tep_up(sc); + rw_exit(&vxlan_lock); + if (error != 0) + goto hook_del; + + /* we're running now */ + SET(ifp->if_flags, IFF_RUNNING); + + if_put(ifp0); + return (0); - found: - if (m->m_pkthdr.len < skip + sizeof(struct ether_header)) { - m_freem(m); - return (EINVAL); - } +hook_del: + if (ifp != NULL) + if_detachhook_del(ifp0, &sc->sc_dtask); +put: + if_put(ifp0); +down: + etherbridge_down(&sc->sc_eb); - m_adj(m, skip); - ifp = &sc->sc_ac.ac_if; + return (error); +} -#if NBRIDGE > 0 - /* Store the tunnel src/dst IP and vni for the bridge or switch */ - if ((ifp->if_bridgeidx != 0 || ifp->if_switchport != NULL) && - srcsa->sa_family != AF_UNSPEC && - ((brtag = bridge_tunneltag(m)) != NULL)) { - memcpy(&brtag->brtag_peer.sa, srcsa, srcsa->sa_len); - memcpy(&brtag->brtag_local.sa, dstsa, dstsa->sa_len); - brtag->brtag_id = vni; - } -#endif +static int +vxlan_down(struct vxlan_softc *sc) +{ + struct ifnet *ifp = &sc->sc_ac.ac_if; + struct ifnet *ifp0; - m->m_flags &= ~(M_BCAST|M_MCAST); + NET_ASSERT_LOCKED(); -#if NPF > 0 - pf_pkt_addr_changed(m); -#endif - if ((m->m_len < sizeof(struct ether_header)) && - (m = m_pullup(m, sizeof(struct ether_header))) == NULL) - return (ENOBUFS); + CLR(ifp->if_flags, IFF_RUNNING); - n = m_getptr(m, sizeof(struct ether_header), &off); - if (n == NULL) { - m_freem(m); - return (EINVAL); - } - if (!ALIGNED_POINTER(mtod(n, caddr_t) + off, uint32_t)) { - n = m_dup_pkt(m, ETHER_ALIGN, M_NOWAIT); - /* Dispose of the original mbuf chain */ - m_freem(m); - if (n == NULL) - return (ENOBUFS); - m = n; + vxlan_tep_down(sc); + + if (sc->sc_mode == VXLAN_TMODE_LEARNING) { + ifp0 = if_get(sc->sc_if_index0); + if (ifp0 != NULL) + if_detachhook_del(ifp0, &sc->sc_dtask); + if_put(ifp0); } - if_vinput(ifp, m); + if (sc->sc_mode != VXLAN_TMODE_P2P) + etherbridge_down(&sc->sc_eb); - /* success */ - return (1); + return (0); } -struct mbuf * -vxlan_encap4(struct ifnet *ifp, struct mbuf *m, - struct sockaddr *src, struct sockaddr *dst) -{ - struct vxlan_softc *sc = (struct vxlan_softc *)ifp->if_softc; - struct ip *ip; - - /* - * Remove multicast and broadcast flags or encapsulated packet - * ends up as multicast or broadcast packet. - */ - m->m_flags &= ~(M_BCAST|M_MCAST); +static int +vxlan_set_rdomain(struct vxlan_softc *sc, const struct ifreq *ifr) +{ + struct ifnet *ifp = &sc->sc_ac.ac_if; - M_PREPEND(m, sizeof(*ip), M_DONTWAIT); - if (m == NULL) - return (NULL); + if (ifr->ifr_rdomainid < 0 || + ifr->ifr_rdomainid > RT_TABLEID_MAX) + return (EINVAL); + if (!rtable_exists(ifr->ifr_rdomainid)) + return (EADDRNOTAVAIL); - ip = mtod(m, struct ip *); - ip->ip_v = IPVERSION; - ip->ip_hl = sizeof(struct ip) >> 2; - ip->ip_id = htons(ip_randomid()); - ip->ip_off = sc->sc_df; - ip->ip_p = IPPROTO_UDP; - ip->ip_tos = IFQ_PRIO2TOS(sc->sc_txhprio == IF_HDRPRIO_PACKET ? - m->m_pkthdr.pf.prio : sc->sc_txhprio); - ip->ip_len = htons(m->m_pkthdr.len); + if (sc->sc_rdomain == ifr->ifr_rdomainid) + return (0); - ip->ip_src = satosin(src)->sin_addr; - ip->ip_dst = satosin(dst)->sin_addr; + if (!ISSET(ifp->if_flags, IFF_RUNNING)) + return (EBUSY); - if (sc->sc_ttl > 0) - ip->ip_ttl = sc->sc_ttl; - else - ip->ip_ttl = IPDEFTTL; + /* commit */ + sc->sc_rdomain = ifr->ifr_rdomainid; + etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL); - return (m); + return (0); } -#ifdef INET6 -struct mbuf * -vxlan_encap6(struct ifnet *ifp, struct mbuf *m, - struct sockaddr *src, struct sockaddr *dst) +static int +vxlan_get_rdomain(struct vxlan_softc *sc, struct ifreq *ifr) { - struct vxlan_softc *sc = (struct vxlan_softc *)ifp->if_softc; - struct ip6_hdr *ip6; - struct in6_addr *in6a; - uint32_t flow; + ifr->ifr_rdomainid = sc->sc_rdomain; - /* - * Remove multicast and broadcast flags or encapsulated packet - * ends up as multicast or broadcast packet. - */ - m->m_flags &= ~(M_BCAST|M_MCAST); + return (0); +} - M_PREPEND(m, sizeof(struct ip6_hdr), M_DONTWAIT); - if (m == NULL) - return (NULL); +static int +vxlan_set_tunnel(struct vxlan_softc *sc, const struct if_laddrreq *req) +{ + struct ifnet *ifp = &sc->sc_ac.ac_if; + struct sockaddr *src = (struct sockaddr *)&req->addr; + struct sockaddr *dst = (struct sockaddr *)&req->dstaddr; + struct sockaddr_in *src4, *dst4; +#ifdef INET6 + struct sockaddr_in6 *src6, *dst6; + int error; +#endif + union vxlan_addr saddr, daddr; + unsigned int mode = VXLAN_TMODE_ENDPOINT; + in_port_t port = htons(VXLAN_PORT); - flow = (uint32_t)IFQ_PRIO2TOS(sc->sc_txhprio == IF_HDRPRIO_PACKET ? - m->m_pkthdr.pf.prio : sc->sc_txhprio) << 20; + memset(&saddr, 0, sizeof(saddr)); + memset(&daddr, 0, sizeof(daddr)); - ip6 = mtod(m, struct ip6_hdr *); - ip6->ip6_flow = htonl(flow); - ip6->ip6_vfc &= ~IPV6_VERSION_MASK; - ip6->ip6_vfc |= IPV6_VERSION; - ip6->ip6_nxt = IPPROTO_UDP; - ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(struct ip6_hdr)); - if (in6_embedscope(&ip6->ip6_src, satosin6(src), NULL) != 0) - goto drop; - if (in6_embedscope(&ip6->ip6_dst, satosin6(dst), NULL) != 0) - goto drop; + /* validate */ + switch (src->sa_family) { + case AF_INET: + src4 = (struct sockaddr_in *)src; + if (in_nullhost(src4->sin_addr) || + IN_MULTICAST(src4->sin_addr.s_addr)) + return (EINVAL); - if (sc->sc_ttl > 0) - ip6->ip6_hlim = sc->sc_ttl; - else - ip6->ip6_hlim = ip6_defhlim; + if (src4->sin_port != htons(0)) + port = src4->sin_port; + + if (dst->sa_family != AF_UNSPEC) { + if (dst->sa_family != AF_INET) + return (EINVAL); + + dst4 = (struct sockaddr_in *)dst; + if (in_nullhost(dst4->sin_addr)) + return (EINVAL); + + /* all good */ + mode = IN_MULTICAST(dst4->sin_addr.s_addr) ? + VXLAN_TMODE_LEARNING : VXLAN_TMODE_P2P; + daddr.in4 = dst4->sin_addr; + } + + saddr.in4 = src4->sin_addr; + break; + +#ifdef INET6 + case AF_INET6: + src6 = (struct sockaddr_in6 *)src; + if (IN6_IS_ADDR_UNSPECIFIED(&src6->sin6_addr) || + IN6_IS_ADDR_MULTICAST(&src6->sin6_addr)) + return (EINVAL); - if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr)) { - if (in6_selectsrc(&in6a, satosin6(dst), NULL, - sc->sc_rdomain) != 0) - goto drop; + if (src6->sin6_port != htons(0)) + port = src6->sin6_port; - ip6->ip6_src = *in6a; + if (dst->sa_family != AF_UNSPEC) { + if (dst->sa_family != AF_INET6) + return (EINVAL); + + dst6 = (struct sockaddr_in6 *)dst; + if (IN6_IS_ADDR_UNSPECIFIED(&dst6->sin6_addr)) + return (EINVAL); + + if (src6->sin6_scope_id != dst6->sin6_scope_id) + return (EINVAL); + + /* all good */ + mode = IN6_IS_ADDR_MULTICAST(&dst6->sin6_addr) ? + VXLAN_TMODE_LEARNING : VXLAN_TMODE_P2P; + error = in6_embedscope(&daddr.in6, dst6, NULL); + if (error != 0) + return (error); + } + + error = in6_embedscope(&sc->sc_src.in6, src6, NULL); + if (error != 0) + return (error); + + break; +#endif + default: + return (EAFNOSUPPORT); } - if (sc->sc_df) - SET(m->m_pkthdr.csum_flags, M_IPV6_DF_OUT); + if (memcmp(&sc->sc_src, &saddr, sizeof(sc->sc_src)) == 0 && + memcmp(&sc->sc_dst, &daddr, sizeof(sc->sc_dst)) == 0 && + sc->sc_port == port) + return (0); - /* - * The UDP checksum of VXLAN packets should be set to zero, - * but the IPv6 UDP checksum is not optional. There is an RFC 6539 - * to relax the IPv6 UDP checksum requirement for tunnels, but it - * is currently not supported by most implementations. - */ - m->m_pkthdr.csum_flags |= M_UDP_CSUM_OUT; + if (ISSET(ifp->if_flags, IFF_RUNNING)) + return (EBUSY); - return (m); + /* commit */ + sc->sc_af = src->sa_family; + sc->sc_src = saddr; + sc->sc_dst = daddr; + sc->sc_port = port; + sc->sc_mode = mode; + etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL); -drop: - m_freem(m); - return (NULL); + return (0); } -#endif /* INET6 */ -int -vxlan_output(struct ifnet *ifp, struct mbuf *m) +static int +vxlan_get_tunnel(struct vxlan_softc *sc, struct if_laddrreq *req) { - struct vxlan_softc *sc = (struct vxlan_softc *)ifp->if_softc; - struct vxlanudphdr *vu; - struct sockaddr *src, *dst; -#if NBRIDGE > 0 - struct bridge_tunneltag *brtag; -#endif - int error, af; - uint32_t tag; - struct mbuf *m0; - - /* VXLAN header, needs new mbuf because of alignment issues */ - MGET(m0, M_DONTWAIT, m->m_type); - if (m0 == NULL) { - ifp->if_oerrors++; - return (ENOBUFS); - } - M_MOVE_PKTHDR(m0, m); - m0->m_next = m; - m = m0; - m_align(m, sizeof(*vu)); - m->m_len = sizeof(*vu); - m->m_pkthdr.len += sizeof(*vu); - - src = sstosa(&sc->sc_src); - dst = sstosa(&sc->sc_dst); - af = src->sa_family; - - vu = mtod(m, struct vxlanudphdr *); - vu->vu_u.uh_sport = sc->sc_dstport; - vu->vu_u.uh_dport = sc->sc_dstport; - vu->vu_u.uh_ulen = htons(m->m_pkthdr.len); - vu->vu_u.uh_sum = 0; - tag = sc->sc_vnetid; - -#if NBRIDGE > 0 - if ((brtag = bridge_tunnel(m)) != NULL) { - dst = &brtag->brtag_peer.sa; - - /* If accepting any VNI, source ip address is from brtag */ - if (sc->sc_vnetid == VXLAN_VNI_ANY) { - src = &brtag->brtag_local.sa; - tag = (uint32_t)brtag->brtag_id; - af = src->sa_family; - } - - if (dst->sa_family != af) { - ifp->if_oerrors++; - m_freem(m); - return (EINVAL); - } - } else + struct sockaddr *dstaddr = (struct sockaddr *)&req->dstaddr; + struct sockaddr_in *sin; +#ifdef INET6 + struct sockaddr_in6 *sin6; #endif - if (sc->sc_vnetid == VXLAN_VNI_ANY) { - /* - * If accepting any VNI, build the vxlan header only by - * bridge_tunneltag or drop packet if the tag does not exist. - */ - ifp->if_oerrors++; - m_freem(m); - return (ENETUNREACH); - } - if (sc->sc_vnetid != VXLAN_VNI_UNSET) { - vu->vu_v.vxlan_flags = htonl(VXLAN_FLAGS_VNI); - vu->vu_v.vxlan_id = htonl(tag << VXLAN_VNI_S); - } else { - vu->vu_v.vxlan_flags = htonl(0); - vu->vu_v.vxlan_id = htonl(0); - } + if (sc->sc_af == AF_UNSPEC) + return (EADDRNOTAVAIL); + KASSERT(sc->sc_mode != VXLAN_TMODE_UNSET); + + memset(&req->addr, 0, sizeof(req->addr)); + memset(&req->dstaddr, 0, sizeof(req->dstaddr)); - switch (af) { + /* default to endpoint */ + dstaddr->sa_len = 2; + dstaddr->sa_family = AF_UNSPEC; + + switch (sc->sc_af) { case AF_INET: - m = vxlan_encap4(ifp, m, src, dst); + sin = (struct sockaddr_in *)&req->addr; + sin->sin_len = sizeof(*sin); + sin->sin_family = AF_INET; + sin->sin_addr = sc->sc_src.in4; + sin->sin_port = sc->sc_port; + + if (sc->sc_mode == VXLAN_TMODE_ENDPOINT) + break; + + sin = (struct sockaddr_in *)&req->dstaddr; + sin->sin_len = sizeof(*sin); + sin->sin_family = AF_INET; + sin->sin_addr = sc->sc_dst.in4; break; + #ifdef INET6 case AF_INET6: - m = vxlan_encap6(ifp, m, src, dst); + sin6 = (struct sockaddr_in6 *)&req->addr; + sin6->sin6_len = sizeof(*sin6); + sin6->sin6_family = AF_INET6; + in6_recoverscope(sin6, &sc->sc_src.in6); + sin6->sin6_port = sc->sc_port; + + if (sc->sc_mode == VXLAN_TMODE_ENDPOINT) + break; + + sin6 = (struct sockaddr_in6 *)&req->dstaddr; + sin6->sin6_len = sizeof(*sin6); + sin6->sin6_family = AF_INET6; + in6_recoverscope(sin6, &sc->sc_dst.in6); break; -#endif /* INET6 */ +#endif default: - m_freem(m); - m = NULL; + unhandled_af(sc->sc_af); } - if (m == NULL) { - ifp->if_oerrors++; - return (ENOBUFS); + return (0); +} + +static int +vxlan_del_tunnel(struct vxlan_softc *sc) +{ + struct ifnet *ifp = &sc->sc_ac.ac_if; + + if (sc->sc_af == AF_UNSPEC) + return (0); + + if (ISSET(ifp->if_flags, IFF_RUNNING)) + return (EBUSY); + + /* commit */ + sc->sc_af = AF_UNSPEC; + memset(&sc->sc_src, 0, sizeof(sc->sc_src)); + memset(&sc->sc_dst, 0, sizeof(sc->sc_dst)); + sc->sc_port = htons(0); + sc->sc_mode = VXLAN_TMODE_UNSET; + etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL); + + return (0); +} + +static int +vxlan_set_vnetid(struct vxlan_softc *sc, const struct ifreq *ifr) +{ + struct ifnet *ifp = &sc->sc_ac.ac_if; + uint32_t vni; + + if (ifr->ifr_vnetid < VXLAN_VNI_MIN || + ifr->ifr_vnetid > VXLAN_VNI_MAX) + return (EINVAL); + + vni = htonl(ifr->ifr_vnetid << VXLAN_VNI_SHIFT); + if (ISSET(sc->sc_header.vxlan_flags, htonl(VXLAN_F_I)) && + sc->sc_header.vxlan_id == vni) + return (0); + + if (ISSET(ifp->if_flags, IFF_RUNNING)) + return (EBUSY); + + /* commit */ + SET(sc->sc_header.vxlan_flags, htonl(VXLAN_F_I)); + sc->sc_header.vxlan_id = vni; + etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL); + + return (0); +} + +static int +vxlan_get_vnetid(struct vxlan_softc *sc, struct ifreq *ifr) +{ + uint32_t vni; + + if (!ISSET(sc->sc_header.vxlan_flags, htonl(VXLAN_F_I))) + return (EADDRNOTAVAIL); + + vni = ntohl(sc->sc_header.vxlan_flags); + vni &= VXLAN_VNI_MASK; + vni >>= VXLAN_VNI_SHIFT; + + ifr->ifr_vnetid = vni; + + return (0); +} + +static int +vxlan_del_vnetid(struct vxlan_softc *sc) +{ + struct ifnet *ifp = &sc->sc_ac.ac_if; + + if (!ISSET(sc->sc_header.vxlan_flags, htonl(VXLAN_F_I))) + return (0); + + if (ISSET(ifp->if_flags, IFF_RUNNING)) + return (EBUSY); + + /* commit */ + CLR(sc->sc_header.vxlan_flags, htonl(VXLAN_F_I)); + sc->sc_header.vxlan_id = htonl(0); + etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL); + + return (0); +} + +static int +vxlan_set_parent(struct vxlan_softc *sc, const struct if_parent *p) +{ + struct ifnet *ifp = &sc->sc_ac.ac_if; + struct ifnet *ifp0; + int error = 0; + + ifp0 = if_unit(p->ifp_parent); + if (ifp0 == NULL) + return (ENXIO); + + if (!ISSET(ifp0->if_flags, IFF_MULTICAST)) { + error = ENXIO; + goto put; } -#if NBRIDGE > 0 - if (brtag != NULL) - bridge_tunneluntag(m); -#endif + if (sc->sc_if_index0 == ifp0->if_index) + goto put; + + if (ISSET(ifp->if_flags, IFF_RUNNING)) { + error = EBUSY; + goto put; + } + + /* commit */ + sc->sc_if_index0 = ifp0->if_index; + etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL); + +put: + if_put(ifp0); + return (error); +} - m->m_pkthdr.ph_rtableid = sc->sc_rdomain; +static int +vxlan_get_parent(struct vxlan_softc *sc, struct if_parent *p) +{ + struct ifnet *ifp0; + int error = 0; + + ifp0 = if_get(sc->sc_if_index0); + if (ifp0 == NULL) + error = EADDRNOTAVAIL; + else + strlcpy(p->ifp_parent, ifp0->if_xname, sizeof(p->ifp_parent)); + if_put(ifp0); + + return (error); +} + +static int +vxlan_del_parent(struct vxlan_softc *sc) +{ + struct ifnet *ifp = &sc->sc_ac.ac_if; + + if (sc->sc_if_index0 == 0) + return (0); + + if (ISSET(ifp->if_flags, IFF_RUNNING)) + return (EBUSY); + + /* commit */ + sc->sc_if_index0 = 0; + etherbridge_flush(&sc->sc_eb, IFBF_FLUSHALL); -#if NPF > 0 - pf_pkt_addr_changed(m); + return (0); +} + +static int +vxlan_add_addr(struct vxlan_softc *sc, const struct ifbareq *ifba) +{ + struct sockaddr_in *sin; +#ifdef INET6 + struct sockaddr_in6 *sin6; + struct sockaddr_in6 src6 = { + .sin6_len = sizeof(src6), + .sin6_family = AF_UNSPEC, + }; + int error; #endif + union vxlan_addr endpoint; + unsigned int type; - switch (af) { + switch (sc->sc_mode) { + case VXLAN_TMODE_UNSET: + return (ENOPROTOOPT); + case VXLAN_TMODE_P2P: + return (EPROTONOSUPPORT); + default: + break; + } + + /* ignore ifba_ifsname */ + + if (ISSET(ifba->ifba_flags, ~IFBAF_TYPEMASK)) + return (EINVAL); + switch (ifba->ifba_flags & IFBAF_TYPEMASK) { + case IFBAF_DYNAMIC: + type = EBE_DYNAMIC; + break; + case IFBAF_STATIC: + type = EBE_STATIC; + break; + default: + return (EINVAL); + } + + memset(&endpoint, 0, sizeof(endpoint)); + + if (ifba->ifba_dstsa.ss_family != sc->sc_af) + return (EAFNOSUPPORT); + switch (ifba->ifba_dstsa.ss_family) { case AF_INET: - error = ip_output(m, NULL, NULL, IP_RAWOUTPUT, - &sc->sc_imo, NULL, 0); + sin = (struct sockaddr_in *)&ifba->ifba_dstsa; + if (in_nullhost(sin->sin_addr) || + IN_MULTICAST(sin->sin_addr.s_addr)) + return (EADDRNOTAVAIL); + + if (sin->sin_port != htons(0)) + return (EADDRNOTAVAIL); + + endpoint.in4 = sin->sin_addr; break; + #ifdef INET6 case AF_INET6: - error = ip6_output(m, 0, NULL, IPV6_MINMTU, 0, NULL); + sin6 = (struct sockaddr_in6 *)&ifba->ifba_dstsa; + if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) || + IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) + return (EADDRNOTAVAIL); + + in6_recoverscope(&src6, &sc->sc_src.in6); + if (src6.sin6_scope_id != sin6->sin6_scope_id) + return (EADDRNOTAVAIL); + + if (sin6->sin6_port != htons(0)) + return (EADDRNOTAVAIL); + + error = in6_embedscope(&endpoint.in6, sin6, NULL); + if (error != 0) + return (error); + break; -#endif /* INET6 */ - default: - m_freem(m); - error = EAFNOSUPPORT; +#endif + default: /* AF_UNSPEC */ + return (EADDRNOTAVAIL); } - if (error) - ifp->if_oerrors++; + return (etherbridge_add_addr(&sc->sc_eb, &endpoint, + &ifba->ifba_dst, type)); +} - return (error); +static int +vxlan_del_addr(struct vxlan_softc *sc, const struct ifbareq *ifba) +{ + return (etherbridge_del_addr(&sc->sc_eb, &ifba->ifba_dst)); } void -vxlan_addr_change(void *arg) +vxlan_detach_hook(void *arg) { - struct vxlan_softc *sc = arg; - struct ifnet *ifp = &sc->sc_ac.ac_if; - int error; - - /* - * Reset the configuration after resume or any possible address - * configuration changes. - */ - if ((error = vxlan_config(ifp, NULL, NULL))) { - /* - * The source address of the tunnel can temporarily disappear, - * after a link state change when running the DHCP client, - * so keep it configured. - */ + struct vxlan_softc *sc = arg; + struct ifnet *ifp = &sc->sc_ac.ac_if; + + if (ISSET(ifp->if_flags, IFF_RUNNING)) { + vxlan_down(sc); + CLR(ifp->if_flags, IFF_UP); } + + sc->sc_if_index0 = 0; } -void -vxlan_if_change(void *arg) +static int +vxlan_eb_port_eq(void *arg, void *a, void *b) { - struct vxlan_softc *sc = arg; - struct ifnet *ifp = &sc->sc_ac.ac_if; + const union vxlan_addr *va = a, *vb = b; + size_t i; - /* - * Reset the configuration after the parent interface disappeared. - */ - vxlan_multicast_cleanup(ifp); - memset(&sc->sc_src, 0, sizeof(sc->sc_src)); - memset(&sc->sc_dst, 0, sizeof(sc->sc_dst)); - sc->sc_dstport = htons(VXLAN_PORT); + for (i = 0; i < nitems(va->in6.s6_addr32); i++) { + if (va->in6.s6_addr32[i] != vb->in6.s6_addr32[i]) + return (0); + } + + return (1); } -void -vxlan_link_change(void *arg) +static void * +vxlan_eb_port_take(void *arg, void *port) +{ + union vxlan_addr *endpoint; + + endpoint = pool_get(&vxlan_endpoint_pool, PR_NOWAIT); + if (endpoint == NULL) + return (NULL); + + *endpoint = *(union vxlan_addr *)port; + + return (endpoint); +} + +static void +vxlan_eb_port_rele(void *arg, void *port) +{ + union vxlan_addr *endpoint = port; + + pool_put(&vxlan_endpoint_pool, endpoint); +} + +static size_t +vxlan_eb_port_ifname(void *arg, char *dst, size_t len, void *port) +{ + struct vxlan_softc *sc = arg; + + return (strlcpy(dst, sc->sc_ac.ac_if.if_xname, len)); +} + +static void +vxlan_eb_port_sa(void *arg, struct sockaddr_storage *ss, void *port) +{ + struct vxlan_softc *sc = arg; + union vxlan_addr *endpoint = port; + + switch (sc->sc_af) { + case AF_INET: { + struct sockaddr_in *sin = (struct sockaddr_in *)ss; + + sin->sin_len = sizeof(*sin); + sin->sin_family = AF_INET; + sin->sin_addr = endpoint->in4; + break; + } +#ifdef INET6 + case AF_INET6: { + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)ss; + + sin6->sin6_len = sizeof(*sin6); + sin6->sin6_family = AF_INET6; + in6_recoverscope(sin6, &endpoint->in6); + break; + } +#endif /* INET6 */ + default: + unhandled_af(sc->sc_af); + } +} + +static inline int +vxlan_peer_cmp(const struct vxlan_peer *ap, const struct vxlan_peer *bp) { - struct vxlan_softc *sc = arg; - struct ifnet *ifp = &sc->sc_ac.ac_if; + size_t i; - /* - * The machine might have lost its multicast associations after - * link state changes. This fixes a problem with VMware after - * suspend/resume of the host or guest. - */ - (void)vxlan_config(ifp, NULL, NULL); + if (ap->p_header.vxlan_id > bp->p_header.vxlan_id) + return (1); + if (ap->p_header.vxlan_id < bp->p_header.vxlan_id) + return (-1); + if (ap->p_header.vxlan_flags > bp->p_header.vxlan_flags) + return (1); + if (ap->p_header.vxlan_flags < bp->p_header.vxlan_flags) + return (-1); + + if (ap->p_mask || bp->p_mask) + return (0); + + for (i = 0; i < nitems(ap->p_addr.in6.s6_addr32); i++) { + if (ap->p_addr.in6.s6_addr32[i] > + bp->p_addr.in6.s6_addr32[i]) + return (1); + if (ap->p_addr.in6.s6_addr32[i] < + bp->p_addr.in6.s6_addr32[i]) + return (-1); + } + + return (0); } + +RBT_GENERATE(vxlan_peers, vxlan_peer, p_entry, vxlan_peer_cmp);