Index: sys/net/if_ipe.c =================================================================== RCS file: sys/net/if_ipe.c diff -N sys/net/if_ipe.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/net/if_ipe.c 24 Jan 2018 02:51:39 -0000 @@ -0,0 +1,781 @@ +/* $OpenBSD$ */ + +/* + * Copyright (c) 2018 David Gwynne + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include "bpfilter.h" +#include "pf.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include + +#ifdef INET6 +#include +#include +#endif + +#if NBPFILTER > 0 +#include +#endif + +#if NPF > 0 +#include +#endif + +#include + +struct ipe_tunnel { + struct ip_encap_header ipe_header; + unsigned int ipe_rtableid; + uint32_t ipe_src[4]; + uint32_t ipe_dst[4]; + sa_family_t ipe_af; +}; + +struct ipe_softc { + struct ifnet sc_if; + + struct ipe_tunnel sc_tunnel; + uint8_t sc_ttl; + + RBT_ENTRY(ipe_softc) + sc_entry; +}; + +static int ipe_clone_create(struct if_clone *, int); +static int ipe_clone_destroy(struct ifnet *); + +static struct if_clone ipe_cloner = IF_CLONE_INITIALIZER("ipe", + ipe_clone_create, ipe_clone_destroy); + +RBT_HEAD(ipe_tree, ipe_softc); + +static inline int + ipe_cmp(const struct ipe_softc *, const struct ipe_softc *); + +RBT_PROTOTYPE(ipe_tree, ipe_softc, sc_entry, ipe_cmp); + +struct ipe_tree ipe_softcs = RBT_INITIALIZER(); + +#define IPEHDR (MAX(sizeof(struct ip), sizeof(struct ip6_hdr)) + \ + sizeof(struct ip_encap_header)) +#define IPEMTU (1500 - IPEHDR) + +static int ipe_ioctl(struct ifnet *, u_long, caddr_t); +static int ipe_up(struct ipe_softc *); +static int ipe_down(struct ipe_softc *); +static int ipe_set_vnetid(struct ipe_softc *, struct ifreq *); +static int ipe_get_vnetid(struct ipe_softc *, struct ifreq *); +static int ipe_set_tunnel(struct ipe_softc *, struct if_laddrreq *); +static int ipe_get_tunnel(struct ipe_softc *, struct if_laddrreq *); +static int ipe_del_tunnel(struct ipe_softc *); + +static int ipe_output(struct ifnet *, struct mbuf *, + struct sockaddr *, struct rtentry *); +static void ipe_start(struct ifnet *); +static int ipe_encap(struct ipe_softc *, struct mbuf *); +static int ipe_input(struct ipe_softc *, struct mbuf **, int); +static int ipe_encap4(struct ipe_softc *, struct mbuf *); +#ifdef INET6 +static int ipe_encap6(struct ipe_softc *, struct mbuf *); +#endif + +/* + * let's begin + */ + +void +ipeattach(int n) +{ + if_clone_attach(&ipe_cloner); +} + +int +ipe_clone_create(struct if_clone *ifc, int unit) +{ + struct ipe_softc *sc; + + sc = malloc(sizeof(*sc), M_DEVBUF, M_NOWAIT|M_ZERO); + if (!sc) + return (ENOMEM); + + sc->sc_tunnel.ipe_rtableid = 0; + sc->sc_tunnel.ipe_af = AF_UNSPEC; + sc->sc_tunnel.ipe_header.ieh_flags = htons(IPE_FLAGS); + sc->sc_tunnel.ipe_header.ieh_cksum = 0; + sc->sc_tunnel.ipe_header.ieh_flowid = 0; + sc->sc_ttl = ip_defttl; + + snprintf(sc->sc_if.if_xname, sizeof sc->sc_if.if_xname, "%s%d", + ifc->ifc_name, unit); + sc->sc_if.if_softc = sc; + sc->sc_if.if_type = IFT_TUNNEL; + sc->sc_if.if_addrlen = 0; + sc->sc_if.if_mtu = IPEMTU; + sc->sc_if.if_flags = IFF_POINTOPOINT|IFF_MULTICAST; + sc->sc_if.if_output = ipe_output; + sc->sc_if.if_start = ipe_start; + sc->sc_if.if_ioctl = ipe_ioctl; + sc->sc_if.if_rtrequest = p2p_rtrequest; + + if_attach(&sc->sc_if); + if_alloc_sadl(&sc->sc_if); + +#if NBPFILTER > 0 + bpfattach(&sc->sc_if.if_bpf, &sc->sc_if, DLT_LOOP, sizeof(uint32_t)); +#endif + + return (0); +} + +int +ipe_clone_destroy(struct ifnet *ifp) +{ + struct ipe_softc *sc = ifp->if_softc; + + if_detach(ifp); + + free(sc, M_DEVBUF, sizeof(*sc)); + + return (0); +} + +/* + * do a checksum of a header. + * + * assumes len is aligned correctly, and not an odd number of bytes. + */ +static inline uint16_t +ipe_cksum(const void *buf, size_t len) +{ + const uint16_t *p = buf; + uint32_t sum = 0; + + do { + sum += bemtoh16(p++); + } while (len -= 2); + + /* end-around-carry */ + sum = (sum >> 16) + (sum & 0xffff); + sum += (sum >> 16); + return (~sum); +} + +static inline int +ipe_cmp(const struct ipe_softc *a, const struct ipe_softc *b) +{ + return (memcmp(&a->sc_tunnel, &b->sc_tunnel, sizeof(a->sc_tunnel))); +} + +static int +ipe_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, + struct rtentry *rt) +{ + struct m_tag *mtag; + int error = 0; + + if (!ISSET(ifp->if_flags, IFF_RUNNING)) { + m_freem(m); + error = ENETDOWN; + goto end; + } + + switch (dst->sa_family) { + case AF_INET: + break; +#ifdef INET6 + case AF_INET6: + break; +#endif + default: + m_freem(m); + error = EAFNOSUPPORT; + goto end; + } + + /* Try to limit infinite recursion through misconfiguration. */ + for (mtag = m_tag_find(m, PACKET_TAG_GRE, NULL); mtag; + mtag = m_tag_find(m, PACKET_TAG_GRE, mtag)) { + if (memcmp(mtag + 1, &ifp->if_index, + sizeof(ifp->if_index)) == 0) { + m_freem(m); + error = EIO; + goto end; + } + } + + mtag = m_tag_get(PACKET_TAG_GRE, sizeof(ifp->if_index), M_NOWAIT); + if (mtag == NULL) { + m_freem(m); + error = ENOBUFS; + goto end; + } + memcpy(mtag + 1, &ifp->if_index, sizeof(ifp->if_index)); + m_tag_prepend(m, mtag); + + m->m_pkthdr.ph_family = dst->sa_family; + + error = if_enqueue(ifp, m); + end: + if (error) + ifp->if_oerrors++; + return (error); +} + +static void +ipe_start(struct ifnet *ifp) +{ + struct ipe_softc *sc = ifp->if_softc; + struct mbuf *m; + + while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) { +#if NBPFILTER > 0 + if (ifp->if_bpf) { + bpf_mtap_af(ifp->if_bpf, m->m_pkthdr.ph_family, + m, BPF_DIRECTION_OUT); + } +#endif + + if (ipe_encap(sc, m) != 0) + ifp->if_oerrors++; + } +} + +static int +ipe_encap(struct ipe_softc *sc, struct mbuf *m) +{ + struct ipe_tunnel *tunnel = &sc->sc_tunnel; + struct ip_encap_header *ieh; + + m = m_prepend(m, sizeof(*ieh), M_DONTWAIT); + if (m == NULL) + return (ENOBUFS); + + ieh = mtod(m, struct ip_encap_header *); + *ieh = tunnel->ipe_header; + + m->m_flags &= ~(M_BCAST|M_MCAST); + m->m_pkthdr.ph_rtableid = tunnel->ipe_rtableid; +#if NPF > 0 + pf_pkt_addr_changed(m); +#endif + +#ifdef INET6 + if (tunnel->ipe_af == AF_INET6) + return (ipe_encap6(sc, m)); +#endif + + return (ipe_encap4(sc, m)); +} + +static int +ipe_encap4(struct ipe_softc *sc, struct mbuf *m) +{ + struct ipe_tunnel *tunnel = &sc->sc_tunnel; + struct ip *ip; + + m = m_prepend(m, sizeof(*ip), M_DONTWAIT); + if (m == NULL) + return (ENOBUFS); + + ip = mtod(m, struct ip *); + ip->ip_v = IPVERSION; + ip->ip_hl = sizeof(*ip) >> 2; + ip->ip_tos = IPTOS_LOWDELAY; + ip->ip_len = htons(m->m_pkthdr.len); + ip->ip_id = htons(ip_randomid()); + ip->ip_off = 0; + ip->ip_ttl = sc->sc_ttl; + ip->ip_p = IPPROTO_ENCAP; + ip->ip_sum = 0; + ip->ip_src.s_addr = tunnel->ipe_src[0]; + ip->ip_dst.s_addr = tunnel->ipe_dst[0]; + + ip_send(m); + + return (0); +} + +#ifdef INET6 +static int +ipe_encap6(struct ipe_softc *sc, struct mbuf *m) +{ + struct ipe_tunnel *tunnel = &sc->sc_tunnel; + struct ip6_hdr *ip6; + uint16_t len = m->m_pkthdr.len; + + m = m_prepend(m, sizeof(*ip6), M_DONTWAIT); + if (m == NULL) + return (ENOBUFS); + + ip6 = mtod(m, struct ip6_hdr *); + ip6->ip6_flow = 0; + ip6->ip6_vfc &= ~IPV6_VERSION_MASK; + ip6->ip6_vfc |= IPV6_VERSION; + ip6->ip6_nxt = IPPROTO_ENCAP; + ip6->ip6_hlim = sc->sc_ttl; + ip6->ip6_plen = htons(len); + memcpy(&ip6->ip6_src, tunnel->ipe_src, sizeof(ip6->ip6_src)); + memcpy(&ip6->ip6_dst, tunnel->ipe_dst, sizeof(ip6->ip6_dst)); + + ip6_send(m); + + return (0); +} +#endif /* INET6 */ + +int +ipe_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) +{ + struct ipe_softc *sc = ifp->if_softc; + struct ifreq *ifr = (struct ifreq *)data; + int error = 0; + + switch(cmd) { + case SIOCSIFADDR: + ifp->if_flags |= IFF_UP; + /* FALLTHROUGH */ + case SIOCSIFFLAGS: + if (ISSET(ifp->if_flags, IFF_UP)) { + if (!ISSET(ifp->if_flags, IFF_RUNNING)) + error = ipe_up(sc); + else + error = ENETRESET; + } else { + if (ISSET(ifp->if_flags, IFF_RUNNING)) + error = ipe_down(sc); + } + break; + case SIOCSIFDSTADDR: + break; + case SIOCSIFMTU: + if (ifr->ifr_mtu < 576) { + error = EINVAL; + break; + } + ifp->if_mtu = ifr->ifr_mtu; + break; + case SIOCGIFMTU: + ifr->ifr_mtu = sc->sc_if.if_mtu; + break; + case SIOCADDMULTI: + case SIOCDELMULTI: + break; + + case SIOCSVNETID: + error = ipe_set_vnetid(sc, ifr); + break; + case SIOCGVNETID: + error = ipe_get_vnetid(sc, ifr); + break; + case SIOCSLIFPHYADDR: + error = ipe_set_tunnel(sc, (struct if_laddrreq *)data); + break; + case SIOCGLIFPHYADDR: + error = ipe_get_tunnel(sc, (struct if_laddrreq *)data); + break; + case SIOCDIFPHYADDR: + error = ipe_del_tunnel(sc); + break; + + case SIOCSLIFPHYRTABLE: + if (ISSET(ifp->if_flags, IFF_RUNNING)) { + error = EBUSY; + break; + } + + if (ifr->ifr_rdomainid < 0 || + ifr->ifr_rdomainid > RT_TABLEID_MAX || + !rtable_exists(ifr->ifr_rdomainid)) { + error = EINVAL; + break; + } + sc->sc_tunnel.ipe_rtableid = ifr->ifr_rdomainid; + break; + case SIOCGLIFPHYRTABLE: + ifr->ifr_rdomainid = sc->sc_tunnel.ipe_rtableid; + break; + + case SIOCSLIFPHYTTL: + if (ISSET(ifp->if_flags, IFF_RUNNING)) { + error = EBUSY; + break; + } + if (ifr->ifr_ttl < 0 || ifr->ifr_ttl > 0xff) { + error = EINVAL; + break; + } + + /* commit */ + sc->sc_ttl = (uint8_t)ifr->ifr_ttl; + break; + + case SIOCGLIFPHYTTL: + ifr->ifr_ttl = (int)sc->sc_ttl; + break; + + default: + error = ENOTTY; + break; + } + + return (error); +} + +static int +ipe_up(struct ipe_softc *sc) +{ + struct ipe_tunnel *tunnel = &sc->sc_tunnel; + uint16_t cksum; + + if (tunnel->ipe_af == AF_UNSPEC) + return (EDESTADDRREQ); + + tunnel->ipe_header.ieh_cksum = 0; + cksum = ipe_cksum(&tunnel->ipe_header, sizeof(&tunnel->ipe_header)); + htobem16(&tunnel->ipe_header.ieh_cksum, cksum); + + NET_ASSERT_LOCKED(); + if (RBT_INSERT(ipe_tree, &ipe_softcs, sc) != NULL) + return (EADDRINUSE); + + SET(sc->sc_if.if_flags, IFF_RUNNING); + + return (0); +} + +static int +ipe_down(struct ipe_softc *sc) +{ + NET_ASSERT_LOCKED(); + RBT_REMOVE(ipe_tree, &ipe_softcs, sc); + + CLR(sc->sc_if.if_flags, IFF_RUNNING); + + ifq_barrier(&sc->sc_if.if_snd); + + return (0); +} + +static int +ipe_set_vnetid(struct ipe_softc *sc, struct ifreq *ifr) +{ + struct ipe_tunnel *tunnel = &sc->sc_tunnel; + + if (ISSET(sc->sc_if.if_flags, IFF_RUNNING)) + return (EBUSY); + + if (ifr->ifr_vnetid < 0 || ifr->ifr_vnetid > 0xffffffff) + return (EINVAL); + + htobem32(&tunnel->ipe_header.ieh_flowid, ifr->ifr_vnetid); + + return (0); +} + +static int +ipe_get_vnetid(struct ipe_softc *sc, struct ifreq *ifr) +{ + struct ipe_tunnel *tunnel = &sc->sc_tunnel; + + ifr->ifr_vnetid = bemtoh32(&tunnel->ipe_header.ieh_flowid); + + return (0); +} + +static int +ipe_set_tunnel(struct ipe_softc *sc, struct if_laddrreq *req) +{ + struct ipe_tunnel *tunnel = &sc->sc_tunnel; + struct sockaddr *src = (struct sockaddr *)&req->addr; + struct sockaddr *dst = (struct sockaddr *)&req->dstaddr; + struct sockaddr_in *src4, *dst4; +#ifdef INET6 + struct sockaddr_in6 *src6, *dst6; + struct in6_addr srcin6, dstin6; + int error; +#endif + + if (ISSET(sc->sc_if.if_flags, IFF_RUNNING)) + return (EBUSY); + + /* sa_family and sa_len must be equal */ + if (src->sa_family != dst->sa_family || src->sa_len != dst->sa_len) + return (EINVAL); + + /* validate */ + switch (dst->sa_family) { + case AF_INET: + src4 = (struct sockaddr_in *)src; + if (in_nullhost(src4->sin_addr) || + IN_MULTICAST(src4->sin_addr.s_addr)) + return (EINVAL); + + dst4 = (struct sockaddr_in *)dst; + if (in_nullhost(dst4->sin_addr) || + IN_MULTICAST(dst4->sin_addr.s_addr)) + return (EINVAL); + + /* commit */ + memset(tunnel->ipe_src, 0, sizeof(tunnel->ipe_src)); + memset(tunnel->ipe_dst, 0, sizeof(tunnel->ipe_dst)); + tunnel->ipe_src[0] = src4->sin_addr.s_addr; + tunnel->ipe_dst[0] = dst4->sin_addr.s_addr; + tunnel->ipe_af = AF_INET; + + break; +#ifdef INET6 + case AF_INET6: + if (dst->sa_len != sizeof(*src6)) + return (EINVAL); + + src6 = (struct sockaddr_in6 *)src; + if (IN6_IS_ADDR_UNSPECIFIED(&src6->sin6_addr) || + IN6_IS_ADDR_MULTICAST(&src6->sin6_addr)) + return (EINVAL); + + error = in6_embedscope(&srcin6, src6, NULL); + if (error != 0) + return (error); + + dst6 = (struct sockaddr_in6 *)dst; + if (IN6_IS_ADDR_UNSPECIFIED(&dst6->sin6_addr) || + IN6_IS_ADDR_MULTICAST(&dst6->sin6_addr)) + return (EINVAL); + + error = in6_embedscope(&dstin6, dst6, NULL); + if (error != 0) + return (error); + + /* commit */ + memcpy(tunnel->ipe_src, &srcin6, sizeof(tunnel->ipe_src)); + memcpy(tunnel->ipe_dst, &dstin6, sizeof(tunnel->ipe_dst)); + tunnel->ipe_af = AF_INET6; + + break; +#endif + default: + return (EAFNOSUPPORT); + } + + return (0); +} + +static int +ipe_get_tunnel(struct ipe_softc *sc, struct if_laddrreq *req) +{ + struct ipe_tunnel *tunnel = &sc->sc_tunnel; + struct sockaddr *src = (struct sockaddr *)&req->addr; + struct sockaddr *dst = (struct sockaddr *)&req->dstaddr; + struct sockaddr_in *sin; +#ifdef INET6 /* ifconfig already embeds the scopeid */ + struct sockaddr_in6 *sin6; +#endif + + switch (tunnel->ipe_af) { + case AF_UNSPEC: + return (EADDRNOTAVAIL); + case AF_INET: + sin = (struct sockaddr_in *)src; + memset(sin, 0, sizeof(*sin)); + sin->sin_family = AF_INET; + sin->sin_len = sizeof(*sin); + sin->sin_addr.s_addr = tunnel->ipe_src[0]; + + sin = (struct sockaddr_in *)dst; + memset(sin, 0, sizeof(*sin)); + sin->sin_family = AF_INET; + sin->sin_len = sizeof(*sin); + sin->sin_addr.s_addr = tunnel->ipe_dst[0]; + + break; + +#ifdef INET6 + case AF_INET6: + sin6 = (struct sockaddr_in6 *)src; + memset(sin6, 0, sizeof(*sin6)); + sin6->sin6_family = AF_INET6; + sin6->sin6_len = sizeof(*sin6); + in6_recoverscope(sin6, (struct in6_addr *)tunnel->ipe_src); + + sin6 = (struct sockaddr_in6 *)dst; + memset(sin6, 0, sizeof(*sin6)); + sin6->sin6_family = AF_INET6; + sin6->sin6_len = sizeof(*sin6); + in6_recoverscope(sin6, (struct in6_addr *)tunnel->ipe_dst); + + break; +#endif + default: + return (EAFNOSUPPORT); + } + + return (0); +} + +static int +ipe_del_tunnel(struct ipe_softc *sc) +{ + struct ipe_tunnel *tunnel = &sc->sc_tunnel; + + if (ISSET(sc->sc_if.if_flags, IFF_RUNNING)) + return (EBUSY); + + /* commit */ + tunnel->ipe_af = AF_UNSPEC; + + return (0); +} + +int +ipe_input4(struct mbuf **mp, int *offp, int type, int af) +{ + struct ipe_softc key; + struct ip *ip; + int iphlen; + + ip = mtod(*mp, struct ip *); + iphlen = ip->ip_hl << 2; + + memset(&key, 0, sizeof(key)); + key.sc_tunnel.ipe_af = AF_INET; + key.sc_tunnel.ipe_src[0] = ip->ip_dst.s_addr; + key.sc_tunnel.ipe_dst[0] = ip->ip_src.s_addr; + + return (ipe_input(&key, mp, iphlen)); +} + +#ifdef INET6 +int +ipe_input6(struct mbuf **mp, int *offp, int type, int af) +{ + struct ipe_softc key; + struct ip6_hdr *ip6; + + ip6 = mtod(*mp, struct ip6_hdr *); + + memset(&key, 0, sizeof(key)); + key.sc_tunnel.ipe_af = AF_INET6; + memcpy(key.sc_tunnel.ipe_src, &ip6->ip6_dst, + sizeof(key.sc_tunnel.ipe_src)); + memcpy(key.sc_tunnel.ipe_dst, &ip6->ip6_src, + sizeof(key.sc_tunnel.ipe_dst)); + + return (ipe_input(&key, mp, sizeof(*ip6))); +} +#endif + +static int +ipe_input(struct ipe_softc *key, struct mbuf **mp, int iphlen) +{ + struct ipe_tunnel *tunnel = &key->sc_tunnel; + struct mbuf *m = *mp; + struct ifnet *ifp; + struct ipe_softc *sc; + struct ip_encap_header *ieh; + caddr_t hdr; + int hlen; + int af; + + hlen = iphlen + sizeof(*ieh); + + m = *mp = m_pullup(m, hlen); + if (m == NULL) + return (IPPROTO_DONE); + + hdr = mtod(m, caddr_t); + ieh = (struct ip_encap_header *)(hdr + iphlen); + + tunnel->ipe_rtableid = m->m_pkthdr.ph_rtableid; + tunnel->ipe_header = *ieh; + + /* NET_ASSERT_READ_LOCKED() */ + sc = RBT_FIND(ipe_tree, &ipe_softcs, key); + if (sc == NULL) + goto drop; + + m_adj(m, hlen); + + if (m->m_len == 0) + goto drop; + + switch (*mtod(m, uint8_t *) >> 4) { + case 4: + af = AF_INET; + break; +#ifdef INET6 + case 6: + af = AF_INET6; + break; +#endif + default: + goto drop; + } + + ifp = &sc->sc_if; + + CLR(m->m_flags, M_MCAST|M_BCAST); + SET(m->m_pkthdr.csum_flags, M_IPV4_CSUM_IN_OK); + m->m_pkthdr.ph_ifidx = ifp->if_index; + m->m_pkthdr.ph_rtableid = ifp->if_rdomain; + +#if NPF > 0 + pf_pkt_addr_changed(m); +#endif + + ifp->if_ipackets++; + ifp->if_ibytes += m->m_pkthdr.len; + +#if NBPFILTER > 0 + if (ifp->if_bpf) + bpf_mtap_af(ifp->if_bpf, af, m, BPF_DIRECTION_IN); +#endif + + switch (af) { + case AF_INET: + ipv4_input(ifp, m); + break; +#ifdef INET6 + case AF_INET6: + ipv6_input(ifp, m); + break; +#endif + } + + return (IPPROTO_DONE); + +drop: + m_freem(m); + return (IPPROTO_DONE); +} + +RBT_GENERATE(ipe_tree, ipe_softc, sc_entry, ipe_cmp); Index: sys/net/if_ipe.h =================================================================== RCS file: sys/net/if_ipe.h diff -N sys/net/if_ipe.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/net/if_ipe.h 24 Jan 2018 02:51:39 -0000 @@ -0,0 +1,47 @@ +/* $OpenBSD$ */ + +/* + * Copyright (c) 2018 David Gwynne + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef _NET_IF_IPE_H +#define _NET_IF_IPE_H + +struct ip_encap_header { + uint16_t ieh_flags; +#define IPE_VERS_MASK 0xf000 +#define IPE_VERS_1 0x1000 +#define IPE_HL_MASK 0x0f00 +#define IPE_HL_SHIFT 8 +#define IPE_MT_MASK 0x00f0 +#define IPE_MT_DATA 0x0010 +#define IPE_MT_ERROR 0x0020 +#define IPE_RC_MASK 0x000f +#define IPE_RC_UNK_FLOWID 0x0001 +#define IPE_RC_ICMP_RETURNED 0x0002 + uint16_t ieh_cksum; + uint32_t ieh_flowid; +}__packed __aligned(4); + +#define IPE_HL (sizeof(struct ip_encap_header) << IPE_HL_SHIFT) +#define IPE_FLAGS (IPE_VERS_1 | IPE_HL | IPE_MT_DATA) + +#ifdef _KERNEL +void ipeattach(int); +int ipe_input4(struct mbuf **, int *, int, int); +int ipe_input6(struct mbuf **, int *, int, int); +#endif /* _KERNEL */ + +#endif /* _NET_IF_MOBILEIP_H_ */ Index: sys/netinet/in_proto.c =================================================================== RCS file: /cvs/src/sys/netinet/in_proto.c,v retrieving revision 1.88 diff -u -p -r1.88 in_proto.c --- sys/netinet/in_proto.c 23 Nov 2017 13:45:46 -0000 1.88 +++ sys/netinet/in_proto.c 24 Jan 2018 02:51:39 -0000 @@ -172,6 +172,11 @@ #include #endif +#include "ipe.h" +#if NIPE > 0 +#include +#endif + u_char ip_protox[IPPROTO_MAX]; const struct protosw inetsw[] = { @@ -417,6 +422,19 @@ const struct protosw inetsw[] = { .pr_sysctl = etherip_sysctl }, #endif /* NETHERIP */ +#if NIPE > 0 +{ + .pr_type = SOCK_RAW, + .pr_domain = &inetdomain, + .pr_protocol = IPPROTO_ENCAP, + .pr_flags = PR_ATOMIC|PR_ADDR, + .pr_input = ipe_input4, + .pr_ctloutput = rip_ctloutput, + .pr_usrreq = rip_usrreq, + .pr_attach = rip_attach, + .pr_detach = rip_detach, +}, +#endif /* NIPE */ { /* raw wildcard */ .pr_type = SOCK_RAW, Index: sys/netinet6/in6_proto.c =================================================================== RCS file: /cvs/src/sys/netinet6/in6_proto.c,v retrieving revision 1.100 diff -u -p -r1.100 in6_proto.c --- sys/netinet6/in6_proto.c 23 Nov 2017 13:45:46 -0000 1.100 +++ sys/netinet6/in6_proto.c 24 Jan 2018 02:51:39 -0000 @@ -116,6 +116,11 @@ #include #endif +#include "ipe.h" +#if NIPE > 0 +#include +#endif + /* * TCP/IP protocol family: IP6, ICMP6, UDP, TCP. */ @@ -313,6 +318,19 @@ const struct protosw inet6sw[] = { .pr_detach = rip6_detach, }, #endif /* NETHERIP */ +#if NIPE > 0 +{ + .pr_type = SOCK_RAW, + .pr_domain = &inet6domain, + .pr_protocol = IPPROTO_ENCAP, + .pr_flags = PR_ATOMIC|PR_ADDR, + .pr_input = ipe_input6, + .pr_ctloutput = rip6_ctloutput, + .pr_usrreq = rip6_usrreq, + .pr_attach = rip6_attach, + .pr_detach = rip6_detach, +}, +#endif /* NIPE */ { /* raw wildcard */ .pr_type = SOCK_RAW, Index: sys/conf/GENERIC =================================================================== RCS file: /cvs/src/sys/conf/GENERIC,v retrieving revision 1.250 diff -u -p -r1.250 GENERIC --- sys/conf/GENERIC 25 Oct 2017 12:38:21 -0000 1.250 +++ sys/conf/GENERIC 24 Jan 2018 02:51:39 -0000 @@ -90,6 +90,7 @@ pseudo-device carp # CARP protocol supp pseudo-device etherip # EtherIP (RFC 3378) pseudo-device gif # IPv[46] over IPv[46] tunnel (RFC1933) pseudo-device gre # GRE encapsulation interface +pseudo-device ipe # RFC 1241 IP encapsulation interface pseudo-device loop # network loopback pseudo-device mpe # MPLS PE interface pseudo-device mpw # MPLS pseudowire support Index: sys/conf/files =================================================================== RCS file: /cvs/src/sys/conf/files,v retrieving revision 1.656 diff -u -p -r1.656 files --- sys/conf/files 16 Nov 2017 18:12:27 -0000 1.656 +++ sys/conf/files 24 Jan 2018 02:51:40 -0000 @@ -551,6 +551,7 @@ pseudo-device carp: ifnet, ether pseudo-device sppp: ifnet pseudo-device gif: ifnet pseudo-device gre: ifnet +pseudo-device ipe: ifnet pseudo-device crypto: ifnet pseudo-device trunk: ifnet, ether, ifmedia pseudo-device mpe: ifnet, ether @@ -798,6 +799,7 @@ file net/rtsock.c file net/slcompress.c ppp file net/if_enc.c enc needs-count file net/if_gre.c gre needs-count +file net/if_ipe.c ipe needs-count file net/if_trunk.c trunk needs-count file net/trunklacp.c trunk file net/if_mpe.c mpe needs-count Index: share/man/man4/Makefile =================================================================== RCS file: /cvs/src/share/man/man4/Makefile,v retrieving revision 1.663 diff -u -p -r1.663 Makefile --- share/man/man4/Makefile 2 Jan 2018 22:56:01 -0000 1.663 +++ share/man/man4/Makefile 24 Jan 2018 02:51:40 -0000 @@ -31,7 +31,7 @@ MAN= aac.4 ac97.4 acphy.4 acrtc.4 \ hvn.4 hvs.4 hyperv.4 \ iatp.4 ichiic.4 ichwdt.4 icmp.4 icmp6.4 icsphy.4 ifmedia.4 \ iha.4 ihidev.4 iic.4 ikbd.4 ims.4 imt.4 inet.4 inet6.4 inphy.4 iophy.4 \ - ip.4 ip6.4 ipcomp.4 ipgphy.4 ipmi.4 ips.4 ipsec.4 ipw.4 \ + ip.4 ip6.4 ipcomp.4 ipe.4 ipgphy.4 ipmi.4 ips.4 ipsec.4 ipw.4 \ isa.4 isagpio.4 isapnp.4 it.4 itherm.4 iwi.4 iwn.4 iwm.4 \ ix.4 ixgb.4 jmb.4 jme.4 jmphy.4 \ kate.4 km.4 ksyms.4 kue.4 lc.4 lge.4 lii.4 lisa.4 lm.4 \ Index: share/man/man4/ipe.4 =================================================================== RCS file: share/man/man4/ipe.4 diff -N share/man/man4/ipe.4 --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ share/man/man4/ipe.4 24 Jan 2018 02:51:40 -0000 @@ -0,0 +1,172 @@ +.\" $OpenBSD$ +.\" +.\" Copyright (c) 2018 David Gwynne +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate$ +.Dt IPE 4 +.Sh NAME +.Nm ipe +.Nd RFC 1241 Internet Encapsulation Protocol network device +.Sh SYNOPSIS +.Cd "pseudo-device ipe" +.Sh DESCRIPTION +The +.Nm +driver provides IP tunnel construction using +A Scheme for an Internet Encapsulation Protocol: Version 1 (RFC 1241). +.Pp +.Nm +datagrams (IP protocol number 98 as per RFC 1700) +are encapsulated into IP using a small encapsulation header. +Different tunnels between the same endpoints are distinguished by a +Flow Identifier in the header. +This protocol according to the RFC only supports encapsulating IPv4 +in IPv4, but the driver also supports IPv6. +.Pp +A +.Nm +interface can be created at runtime using the +.Ic ifconfig Nm Ns Ar N Ic create +command or by setting up a +.Xr hostname.if 5 +configuration file for +.Xr netstart 8 . +.Pp +The MTU is set to 1452 by default. +This may not be an optimal value +depending on the link between the two tunnel endpoints, +but it can be adjusted via +.Xr ifconfig 8 . +.Pp +For correct operation, the route to the tunnel destination must not +go over the interface itself. +This can be implemented by adding a distinct or a more specific +route to the tunnel destination than the hosts or networks routed +via the tunnel interface. +Alternatively, the tunnel traffic may be configured in a separate +routing table to the encapsulated traffic. +.Pp +.Nm +interfaces support the following +.Xr ioctl 2 Ns s +for configuring tunnel options: +.Bl -tag -width indent -offset 3n +.It Dv SIOCSLIFPHYADDR Fa "struct if_laddrreq *" +Set the addresses of the outer IP header. +The addresses may only be configured while the interface is down. +.It Dv SIOCGLIFPHYADDR Fa "struct if_laddrreq *" +Get the addresses of the outer IP header. +.It Dv SIOCDIFPHYADDR +Clear the outer IP header addresses. +The addresses may only be cleared while the interface is down. +.It Dv SIOCSVNETID Fa "struct ifreq *" +Set a 32 bit virtual network identifier used as the Flow Identifier +in the IP Encapsulation header. +The virtual network identifier may only be configured while the +interface is down. +.It Dv SIOCGVNETID Fa "struct ifreq *" +Get the virtual network identifer used as the Flow Identifier in +the IP Encapsulation header. +.It Dv SIOCSLIFPHYRTABLE Fa "struct ifreq *" +Set the routing table the encapsulated IP packets operate within. +The routing table may only be configured while the interface is down. +.It Dv SIOCGLIFPHYRTABLE Fa "struct ifreq *" +Get the routing table the encapsulated IP packets operate within. +.It Dv SIOCSLIFPHYTTL Fa "struct ifreq *" +Set the Time-To-Live field in IPv4 encapsulation headers, or the +Hop Limit field in IPv6 encapsulation headers. +.It Dv SIOCGLIFPHYTTL Fa "struct ifreq *" +Get the value used in Time-To-Live field in a IPv4 encapsulation +header or the Hop Limit field in a IPv6 encapsulation header. +.El +.Sh EXAMPLES +Configuration example: +.Bd -literal +Host X --- Host A ----------- IP Encap ------------ Host D --- Host E + \e / + \e / + +------ Host B ------ Host C ------+ +.Ed +.Pp +On Host A +.Pq Ox : +.Bd -literal -offset indent +# route add default B +# ifconfig ipeN create +# ifconfig ipeN tunnel A D +# ifconfig ipeN A D netmask 255.255.255.255 +# route add E D +.Ed +.Pp +On Host D +.Pq Ox : +.Bd -literal -offset indent +# route add default C +# ifconfig ipeN create +# ifconfig ipeN tunnel D A +# ifconfig ipeN D A netmask 255.255.255.255 +# route add D E +.Ed +.Pp +The Flow Identifier may be set using +.Xr ifconfig 8 +and the vnetid argument: +.Bd -literal -offset indent +# ifconfig ipeN vnetid 128 +.Ed +.Pp +The route domain used for the encapsulated traffic may be set using +.Xr ifconfig 8 +and the tunneldomain argument: +.Bd -literal -offset indent +# ifconfig ipeN tunneldomain 1 +.Ed +.Sh SEE ALSO +.Xr inet 4 , +.Xr ip 4 , +.Xr netintro 4 , +.Xr options 4 , +.Xr hostname.if 5 , +.Xr protocols 5 , +.Xr ifconfig 8 , +.Xr netstart 8 +.Sh STANDARDS +.Rs +.%A R. Woodburn +.%A D. Mills +.%D July 1991 +.%R RFC 1241 +.%T A Scheme for an Internet Encapsulation Protocol: Version 1 +.Re +.Pp +.Rs +.%A J. Reynolds +.%A J. Postel +.%D October 1994 +.%R RFC 1700 +.%T Assigned Numbers +.Re +.Sh HISTORY +The +.Nm +driver first appeared in +.Ox 6.3 . +.Sh AUTHORS +.An David Gwynne Aq Mt dlg@openbsd.org +.Sh CAVEATS +The +.Nm +driver only handles data packets within the protocol, it does not +implement support for error handling as described in the RFC.