Index: conf/files =================================================================== RCS file: /cvs/src/sys/conf/files,v retrieving revision 1.666 diff -u -p -r1.666 files --- conf/files 20 Dec 2018 23:00:55 -0000 1.666 +++ conf/files 26 Feb 2019 03:46:02 -0000 @@ -59,6 +59,7 @@ define onewire_bitbang # net device attributes - we have generic code for ether(net) define crypto define ether +define mpls define sppp define wlan @@ -563,6 +564,7 @@ pseudo-device crypto: ifnet pseudo-device trunk: ifnet, ether, ifmedia pseudo-device mpe: ifnet, ether pseudo-device mpw: ifnet, ether +pseudo-device mpip: ifnet, mpls pseudo-device bpe: ifnet, ether, ifmedia pseudo-device vether: ifnet, ether pseudo-device pppx: ifnet @@ -814,6 +816,7 @@ file net/if_trunk.c trunk needs-coun file net/trunklacp.c trunk file net/if_mpe.c mpe needs-count file net/if_mpw.c mpw & bridge needs-count +file net/if_mpip.c mpip file net/if_bpe.c bpe needs-count file net/if_vether.c vether needs-count file net/if_pair.c pair needs-count Index: net/if_mpip.c =================================================================== RCS file: net/if_mpip.c diff -N net/if_mpip.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ net/if_mpip.c 26 Feb 2019 03:46:02 -0000 @@ -0,0 +1,706 @@ +/* $OpenBSD$ */ + +/* + * Copyright (c) 2015 Rafael Zalamena + * Copyright (c) 2019 David Gwynne + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include "bpfilter.h" + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include + +#ifdef INET6 +#include +#endif + +#include + +#if NBPFILTER > 0 +#include +#endif /* NBPFILTER */ + +struct mpip_neighbor { + struct shim_hdr n_rshim; + struct sockaddr_storage n_nexthop; +}; + +struct mpip_softc { + struct ifnet sc_if; + unsigned int sc_dead; + uint32_t sc_flow; /* xor for mbuf flowid */ + + struct ifaddr sc_ifa; + struct sockaddr_mpls sc_smpls; /* Local label */ + unsigned int sc_rdomain; + struct mpip_neighbor *sc_neighbor; + + unsigned int sc_cword; /* control word */ + unsigned int sc_fword; /* flow-aware transport */ + int sc_ttl; +}; + +void mpipattach(int); +int mpip_clone_create(struct if_clone *, int); +int mpip_clone_destroy(struct ifnet *); +int mpip_ioctl(struct ifnet *, u_long, caddr_t); +int mpip_output(struct ifnet *, struct mbuf *, struct sockaddr *, + struct rtentry *); +void mpip_start(struct ifnet *); + +struct if_clone mpip_cloner = + IF_CLONE_INITIALIZER("mpip", mpip_clone_create, mpip_clone_destroy); + +void +mpipattach(int n) +{ + if_clone_attach(&mpip_cloner); +} + +int +mpip_clone_create(struct if_clone *ifc, int unit) +{ + struct mpip_softc *sc; + struct ifnet *ifp; + + sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_CANFAIL|M_ZERO); + if (sc == NULL) + return (ENOMEM); + + sc->sc_neighbor = 0; + sc->sc_cword = 0; /* default to no control word */ + sc->sc_fword = 0; /* both sides have to agree on FAT first */ + sc->sc_flow = arc4random() & 0xfffff; + sc->sc_smpls.smpls_len = sizeof(sc->sc_smpls); + sc->sc_smpls.smpls_family = AF_MPLS; + sc->sc_ttl = -1; + + ifp = &sc->sc_if; + snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d", + ifc->ifc_name, unit); + ifp->if_softc = sc; + ifp->if_type = IFT_TUNNEL; + ifp->if_flags = IFF_POINTOPOINT | IFF_MULTICAST; + ifp->if_xflags = IFXF_CLONED; + ifp->if_ioctl = mpip_ioctl; + ifp->if_output = mpip_output; + ifp->if_start = mpip_start; + ifp->if_rtrequest = p2p_rtrequest; + ifp->if_mtu = 1500; + ifp->if_hardmtu = 65535; + IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); + + if_attach(ifp); + if_counters_alloc(ifp); + if_alloc_sadl(ifp); + +#if NBPFILTER > 0 + bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(uint32_t)); +#endif + + sc->sc_ifa.ifa_ifp = ifp; + sc->sc_ifa.ifa_addr = sdltosa(ifp->if_sadl); + + return (0); +} + +int +mpip_clone_destroy(struct ifnet *ifp) +{ + struct mpip_softc *sc = ifp->if_softc; + + NET_LOCK(); + ifp->if_flags &= ~IFF_RUNNING; + sc->sc_dead = 1; + + if (sc->sc_smpls.smpls_label) { + rt_ifa_del(&sc->sc_ifa, RTF_LOCAL | RTF_MPLS, + smplstosa(&sc->sc_smpls), 0); + } + NET_UNLOCK(); + + ifq_barrier(&ifp->if_snd); + + if_detach(ifp); + + free(sc->sc_neighbor, M_DEVBUF, sizeof(*sc->sc_neighbor)); + free(sc, M_DEVBUF, sizeof(*sc)); + + return (0); +} + +static int +mpip_set_route(struct mpip_softc *sc, uint32_t shim, unsigned int rdomain) +{ + int error; + + rt_ifa_del(&sc->sc_ifa, RTF_MPLS | RTF_LOCAL, + smplstosa(&sc->sc_smpls), 0); + + sc->sc_smpls.smpls_label = shim; + sc->sc_rdomain = rdomain; + + error = rt_ifa_add(&sc->sc_ifa, RTF_MPLS | RTF_LOCAL, + smplstosa(&sc->sc_smpls), 0); + if (error) { + sc->sc_smpls.smpls_label = MPLS_LABEL2SHIM(0); + return (error); + } + + return (0); +} + +static int +mpip_set_label(struct mpip_softc *sc, struct ifreq *ifr) +{ + struct shim_hdr label; + uint32_t shim; + int error; + + error = copyin(ifr->ifr_data, &label, sizeof(label)); + if (error != 0) + return (error); + + if (label.shim_label > MPLS_LABEL_MAX || + label.shim_label <= MPLS_LABEL_RESERVED_MAX) + return (EINVAL); + + shim = MPLS_LABEL2SHIM(label.shim_label); + + if (sc->sc_smpls.smpls_label == shim) + return (0); + + return (mpip_set_route(sc, shim, sc->sc_rdomain)); +} + +static int +mpip_get_label(struct mpip_softc *sc, struct ifreq *ifr) +{ + struct shim_hdr label; + + label.shim_label = MPLS_SHIM2LABEL(sc->sc_smpls.smpls_label); + + if (label.shim_label == MPLS_LABEL2SHIM(0)) + return (EADDRNOTAVAIL); + + return (copyout(&label, ifr->ifr_data, sizeof(label))); +} + +static int +mpip_del_label(struct mpip_softc *sc) +{ + if (sc->sc_smpls.smpls_label != MPLS_LABEL2SHIM(0)) { + rt_ifa_del(&sc->sc_ifa, RTF_MPLS | RTF_LOCAL, + smplstosa(&sc->sc_smpls), 0); + } + + sc->sc_smpls.smpls_label = MPLS_LABEL2SHIM(0); + + return (0); +} + +static int +mpip_set_neighbor(struct mpip_softc *sc, struct if_laddrreq *req) +{ + struct mpip_neighbor *n, *o; + struct sockaddr *sa = (struct sockaddr *)&req->addr; + struct sockaddr_mpls *smpls = (struct sockaddr_mpls *)&req->dstaddr; + uint32_t label; + + if (smpls->smpls_family != AF_MPLS) + return (EINVAL); + label = smpls->smpls_label; + if (label > MPLS_LABEL_MAX || label <= MPLS_LABEL_RESERVED_MAX) + return (EINVAL); + + switch (sa->sa_family) { + case AF_INET: { + struct sockaddr_in *sin = (struct sockaddr_in *)sa; + + if (in_nullhost(sin->sin_addr) || + IN_MULTICAST(sin->sin_addr.s_addr)) + return (EINVAL); + + break; + } +#ifdef INET6 + case AF_INET6: { + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa; + + if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) || + IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) + return (EINVAL); + + /* check scope */ + + break; + } +#endif + default: + return (EAFNOSUPPORT); + } + + if (sc->sc_dead) + return (ENXIO); + + n = malloc(sizeof(*n), M_DEVBUF, M_WAITOK|M_CANFAIL|M_ZERO); + if (n == NULL) + return (ENOMEM); + + n->n_rshim.shim_label = MPLS_LABEL2SHIM(label); + n->n_nexthop = req->addr; + + o = sc->sc_neighbor; + sc->sc_neighbor = n; + + NET_UNLOCK(); + ifq_barrier(&sc->sc_if.if_snd); + NET_LOCK(); + + free(o, M_DEVBUF, sizeof(*o)); + + return (0); +} + +static int +mpip_get_neighbor(struct mpip_softc *sc, struct if_laddrreq *req) +{ + struct sockaddr_mpls *smpls = (struct sockaddr_mpls *)&req->dstaddr; + struct mpip_neighbor *n = sc->sc_neighbor; + + if (n == NULL) + return (EADDRNOTAVAIL); + + smpls->smpls_len = sizeof(*smpls); + smpls->smpls_family = AF_MPLS; + smpls->smpls_label = MPLS_SHIM2LABEL(n->n_rshim.shim_label); + req->addr = n->n_nexthop; + + return (0); +} + +static int +mpip_del_neighbor(struct mpip_softc *sc, struct ifreq *req) +{ + struct mpip_neighbor *o; + + if (sc->sc_dead) + return (ENXIO); + + o = sc->sc_neighbor; + sc->sc_neighbor = NULL; + + NET_UNLOCK(); + ifq_barrier(&sc->sc_if.if_snd); + NET_LOCK(); + + free(o, M_DEVBUF, sizeof(*o)); + + return (0); +} + +int +mpip_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) +{ + struct mpip_softc *sc = ifp->if_softc; + struct ifreq *ifr = (struct ifreq *)data; + int error = 0; + + switch (cmd) { + case SIOCSIFADDR: + break; + case SIOCSIFFLAGS: + if ((ifp->if_flags & IFF_UP)) + ifp->if_flags |= IFF_RUNNING; + else + ifp->if_flags &= ~IFF_RUNNING; + break; + case SIOCSIFMTU: + if (ifr->ifr_mtu < 60 || /* XXX */ + ifr->ifr_mtu > 65536) /* XXX */ + error = EINVAL; + else + ifp->if_mtu = ifr->ifr_mtu; + break; + + case SIOCGPWE3: + ifr->ifr_pwe3 = IF_PWE3_IP; + break; + case SIOCSPWE3CTRLWORD: + sc->sc_cword = ifr->ifr_pwe3 ? 1 : 0; + break; + case SIOCGPWE3CTRLWORD: + ifr->ifr_pwe3 = sc->sc_cword; + break; + case SIOCSPWE3FAT: + sc->sc_fword = ifr->ifr_pwe3 ? 1 : 0; + break; + case SIOCGPWE3FAT: + ifr->ifr_pwe3 = sc->sc_fword; + break; + + case SIOCSETLABEL: + error = mpip_set_label(sc, ifr); + break; + case SIOCGETLABEL: + error = mpip_get_label(sc, ifr); + break; + case SIOCDELLABEL: + error = mpip_del_label(sc); + break; + + case SIOCSPWE3NEIGHBOR: + error = mpip_set_neighbor(sc, (struct if_laddrreq *)data); + break; + case SIOCGPWE3NEIGHBOR: + error = mpip_get_neighbor(sc, (struct if_laddrreq *)data); + break; + case SIOCDPWE3NEIGHBOR: + error = mpip_del_neighbor(sc, ifr); + break; + + case SIOCSLIFPHYRTABLE: + if (ifr->ifr_rdomainid < 0 || + ifr->ifr_rdomainid > RT_TABLEID_MAX || + !rtable_exists(ifr->ifr_rdomainid) || + ifr->ifr_rdomainid != rtable_l2(ifr->ifr_rdomainid)) { + error = EINVAL; + break; + } + if (sc->sc_rdomain != ifr->ifr_rdomainid) { + error = mpip_set_route(sc, sc->sc_smpls.smpls_label, + ifr->ifr_rdomainid); + } + break; + case SIOCGLIFPHYRTABLE: + ifr->ifr_rdomainid = sc->sc_rdomain; + break; + + case SIOCSLIFPHYTTL: + if (ifr->ifr_ttl != -1 && + (ifr->ifr_ttl < 1 || ifr->ifr_ttl > 0xff)) { + error = EINVAL; + break; + } + + /* commit */ + sc->sc_ttl = ifr->ifr_ttl; + break; + case SIOCGLIFPHYTTL: + ifr->ifr_ttl = sc->sc_ttl; + break; + + case SIOCADDMULTI: + case SIOCDELMULTI: + break; + + default: + error = ENOTTY; + break; + } + + return (error); +} + +static void +mpip_input(struct mpip_softc *sc, struct mbuf *m) +{ + struct ifnet *ifp = &sc->sc_if; + uint32_t shim; + struct mbuf *n; + uint8_t ttl; + void (*input)(struct ifnet *, struct mbuf *); + + if (!ISSET(ifp->if_flags, IFF_RUNNING)) + goto drop; + + shim = *mtod(m, uint32_t *); + m_adj(m, sizeof(shim)); + + ttl = ntohl(shim & MPLS_TTL_MASK); + + if (sc->sc_fword) { + uint32_t label; + + if (MPLS_BOS_ISSET(shim)) + goto drop; + + if (m->m_len < sizeof(shim)) { + m = m_pullup(m, sizeof(shim)); + if (m == NULL) + return; + } + + shim = *mtod(m, uint32_t *); + if (!MPLS_BOS_ISSET(shim)) + goto drop; + + label = MPLS_SHIM2LABEL(shim); + if (label <= MPLS_LABEL_RESERVED_MAX) { + counters_inc(ifp->if_counters, ifc_noproto); /* ? */ + goto drop; + } + + label -= MPLS_LABEL_RESERVED_MAX + 1; + label ^= sc->sc_flow; + m->m_pkthdr.ph_flowid = M_FLOWID_VALID | label; + + m_adj(m, sizeof(shim)); + } else if (!MPLS_BOS_ISSET(shim)) + goto drop; + + if (sc->sc_cword) { + if (m->m_len < sizeof(shim)) { + m = m_pullup(m, sizeof(shim)); + if (m == NULL) + return; + } + shim = *mtod(m, uint32_t *); + + /* + * The first 4 bits identifies that this packet is a + * control word. If the control word is configured and + * we received an IP datagram we shall drop it. + */ + if (shim & CW_ZERO_MASK) { + counters_inc(ifp->if_counters, ifc_ierrors); + goto drop; + } + + /* We don't support fragmentation just yet. */ + if (shim & CW_FRAG_MASK) { + counters_inc(ifp->if_counters, ifc_ierrors); + goto drop; + } + + m_adj(m, sizeof(shim)); + } + + n = m; + while (n->m_len == 0) { + n = n->m_next; + if (n == NULL) + goto drop; + } + + switch (*mtod(n, uint8_t *) >> 4) { + case 4: + if (sc->sc_ttl == -1) { + m = mpls_ip_adjttl(m, ttl); + if (m == NULL) + return; + } + input = ipv4_input; + m->m_pkthdr.ph_family = AF_INET; + break; +#ifdef INET6 + case 6: + if (sc->sc_ttl == -1) { + m = mpls_ip6_adjttl(m, ttl); + if (m == NULL) + return; + } + input = ipv6_input; + m->m_pkthdr.ph_family = AF_INET6; + break; +#endif /* INET6 */ + default: + counters_inc(ifp->if_counters, ifc_noproto); + goto drop; + } + + m->m_pkthdr.ph_ifidx = ifp->if_index; + m->m_pkthdr.ph_rtableid = ifp->if_rdomain; + +#if NBPFILTER > 0 + { + caddr_t if_bpf = ifp->if_bpf; + if (if_bpf) { + bpf_mtap_af(if_bpf, m->m_pkthdr.ph_family, + m, BPF_DIRECTION_IN); + } + } +#endif + + (*input)(ifp, m); + return; +drop: + m_freem(m); +} + +int +mpip_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, + struct rtentry *rt) +{ + struct mpip_softc *sc = ifp->if_softc; + int error; + + if (dst->sa_family == AF_LINK && + rt != NULL && ISSET(rt->rt_flags, RTF_LOCAL)) { + mpip_input(sc, m); + return (0); + } + + if (!ISSET(ifp->if_flags, IFF_RUNNING)) { + error = ENETDOWN; + goto drop; + } + + switch (dst->sa_family) { + case AF_INET: +#ifdef INET6 + case AF_INET6: +#endif + break; + default: + error = EAFNOSUPPORT; + goto drop; + } + + m->m_pkthdr.ph_family = dst->sa_family; + + error = if_enqueue(ifp, m); + if (error) + counters_inc(ifp->if_counters, ifc_oerrors); + return (error); + +drop: + m_freem(m); + return (error); +} + +void +mpip_start(struct ifnet *ifp) +{ + struct mpip_softc *sc = ifp->if_softc; + struct mpip_neighbor *n = sc->sc_neighbor; + struct rtentry *rt; + struct ifnet *ifp0; + struct mbuf *m; + uint32_t shim; + struct sockaddr_mpls smpls = { + .smpls_len = sizeof(smpls), + .smpls_family = AF_MPLS, + }; + uint32_t bos; + uint8_t ttl; + + if (!ISSET(ifp->if_flags, IFF_RUNNING) || n == NULL) { + IFQ_PURGE(&ifp->if_snd); + return; + } + + rt = rtalloc(sstosa(&n->n_nexthop), RT_RESOLVE, 0); + if (!rtisvalid(rt)) { + IFQ_PURGE(&ifp->if_snd); + goto rtfree; + } + + ifp0 = if_get(rt->rt_ifidx); + if (ifp0 == NULL) { + IFQ_PURGE(&ifp->if_snd); + goto rtfree; + } + + while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) { +#if NBPFILTER > 0 + caddr_t if_bpf = sc->sc_if.if_bpf; + if (if_bpf) { + bpf_mtap_af(if_bpf, m->m_pkthdr.ph_family, + m, BPF_DIRECTION_OUT); + } +#endif /* NBPFILTER */ + + if (sc->sc_ttl == -1) { + switch (m->m_pkthdr.ph_family) { + case AF_INET: { + struct ip *ip; + ip = mtod(m, struct ip *); + ttl = ip->ip_ttl; + break; + } +#ifdef INET6 + case AF_INET6: { + struct ip6_hdr *ip6; + ip6 = mtod(m, struct ip6_hdr *); + ttl = ip6->ip6_hlim; + break; + } +#endif + default: + unhandled_af(m->m_pkthdr.ph_family); + } + } else + ttl = mpls_defttl; + + if (sc->sc_cword) { + m = m_prepend(m, sizeof(shim), M_NOWAIT); + if (m == NULL) + continue; + + *mtod(m, uint32_t *) = 0; + } + + bos = MPLS_BOS_MASK; + + if (sc->sc_fword) { + uint32_t flow = 0; + m = m_prepend(m, sizeof(shim), M_NOWAIT); + if (m == NULL) + continue; + + if (ISSET(m->m_pkthdr.ph_flowid, M_FLOWID_VALID)) + flow = m->m_pkthdr.ph_flowid & M_FLOWID_MASK; + flow ^= sc->sc_flow; + flow += MPLS_LABEL_RESERVED_MAX + 1; + + shim = htonl(1) & MPLS_TTL_MASK; + shim |= htonl(flow << MPLS_LABEL_OFFSET) & + MPLS_LABEL_MASK; + shim |= bos; + *mtod(m, uint32_t *) = shim; + + bos = 0; + } + + m = m_prepend(m, sizeof(shim), M_NOWAIT); + if (m == NULL) + continue; + + shim = htonl(ttl) & MPLS_TTL_MASK; + shim |= n->n_rshim.shim_label; + shim |= bos; + *mtod(m, uint32_t *) = shim; + + mpls_output(ifp0, m, (struct sockaddr *)&smpls, rt); + } + + if_put(ifp0); +rtfree: + rtfree(rt); +}