Index: sbin/ifconfig/ifconfig.c =================================================================== RCS file: /cvs/src/sbin/ifconfig/ifconfig.c,v retrieving revision 1.333 diff -u -p -r1.333 ifconfig.c --- sbin/ifconfig/ifconfig.c 10 Nov 2016 14:36:03 -0000 1.333 +++ sbin/ifconfig/ifconfig.c 12 Dec 2016 05:59:20 -0000 @@ -2873,8 +2873,6 @@ phys_status(int force) (void) strlcpy(req.iflr_name, name, sizeof(req.iflr_name)); if (ioctl(s, SIOCGLIFPHYADDR, (caddr_t)&req) < 0) return; - if (req.addr.ss_family == AF_INET6) - in6_fillscopeid((struct sockaddr_in6 *)&req.addr); if (getnameinfo((struct sockaddr *)&req.addr, req.addr.ss_len, psrcaddr, sizeof(psrcaddr), 0, 0, niflag) != 0) strlcpy(psrcaddr, "", sizeof(psrcaddr)); @@ -2884,7 +2882,6 @@ phys_status(int force) if (req.dstaddr.ss_family == AF_INET) dstport = ((struct sockaddr_in *)&req.dstaddr)->sin_port; else if (req.dstaddr.ss_family == AF_INET6) { - in6_fillscopeid((struct sockaddr_in6 *)&req.dstaddr); dstport = ((struct sockaddr_in6 *)&req.dstaddr)->sin6_port; } if (getnameinfo((struct sockaddr *)&req.dstaddr, req.dstaddr.ss_len, @@ -3668,7 +3665,7 @@ getvnetid(void) return; } - printf("\tvnetid: %lld\n", ifr.ifr_vnetid); + printf("\tvnetid: %lld (0x%llx)\n", ifr.ifr_vnetid, ifr.ifr_vnetid); } void Index: share/man/man4/Makefile =================================================================== RCS file: /cvs/src/share/man/man4/Makefile,v retrieving revision 1.646 diff -u -p -r1.646 Makefile --- share/man/man4/Makefile 7 Dec 2016 15:53:05 -0000 1.646 +++ share/man/man4/Makefile 12 Dec 2016 05:59:20 -0000 @@ -37,8 +37,8 @@ MAN= aac.4 ac97.4 acphy.4 \ lmenv.4 lmn.4 lmtemp.4 lo.4 lpt.4 lxtphy.4 luphy.4 \ maestro.4 mainbus.4 malo.4 maxds.4 maxrtc.4 maxtmp.4 mbg.4 midi.4 \ mii.4 mfi.4 \ - mfii.4 mlphy.4 moscom.4 mos.4 mpe.4 mpath.4 mpi.4 mpii.4 mpu.4 msk.4 \ - mpw.4 msts.4 mtd.4 mtdphy.4 multicast.4 mtio.4 myx.4 \ + mfii.4 mlphy.4 mobileip.4 moscom.4 mos.4 mpe.4 mpath.4 mpi.4 mpii.4 \ + mpu.4 msk.4 mpw.4 msts.4 mtd.4 mtdphy.4 multicast.4 mtio.4 myx.4 \ ne.4 neo.4 nep.4 netintro.4 nfe.4 nge.4 nmea.4 \ nsclpcsio.4 nsgphy.4 nsphy.4 nsphyter.4 null.4 nviic.4 nvme.4 nvt.4 \ oce.4 ohci.4 options.4 onewire.4 oosiop.4 osiop.4 otus.4 \ Index: share/man/man4/etherip.4 =================================================================== RCS file: /cvs/src/share/man/man4/etherip.4,v retrieving revision 1.4 diff -u -p -r1.4 etherip.4 --- share/man/man4/etherip.4 3 Dec 2015 18:44:41 -0000 1.4 +++ share/man/man4/etherip.4 12 Dec 2016 05:59:20 -0000 @@ -21,58 +21,65 @@ .Os .Sh NAME .Nm etherip -.Nd EtherIP tunnel interface +.Nd EtherIP encapsulating network device .Sh SYNOPSIS .Cd "pseudo-device etherip" .Sh DESCRIPTION The .Nm -interface is a pseudo-device for tunnelling Ethernet frames across IP[46] -networks using RFC 3378 EtherIP encapsulation. +interface is a pseudo-device for tunnelling Ethernet frames across +IPv4 and IPv6 networks using RFC 3378 EtherIP encapsulation. +.Pp +EtherIP datagrams (IP protocol number 97) consist of a 2 byte EtherIP header and an outer IP header encapsulating Ethernet datagrams. .Pp An .Nm -interface can be created using the -.Ic ifconfig etherip Ns Ar N Ic create +interface can be created at runtime using the +.Ic ifconfig Nm Ns Ar N Ic create command or by setting up a .Xr hostname.if 5 configuration file for .Xr netstart 8 . -It must be configured with the addresses used for the outer header. -This can be done using -.Xr ifconfig 8 Ns 's -.Ic tunnel -command (which uses the -.Dv SIOCSIFPHYADDR -ioctl). .Pp -The +The MTU of .Nm -interface must be made a member of a -.Xr bridge 4 . -The -.Xr sysctl 3 -variable -.Dv net.inet.etherip.allow -must be set to 1, unless -.Xr ipsec 4 -is being used to protect the traffic. -Ethernet frames are then encapsulated and sent across the network -to another -.Xr bridge 4 , -which decapsulates the datagram and processes the resulting Ethernet -frame as if it had originated on a normal Ethernet interface. -This effectively allows a layer 2 network to be extended from one point to -another, possibly through the Internet. -This mechanism may be used in -conjunction with IPsec by specifying the appropriate IPsec flows -between the two bridges. -To only protect the bridge traffic between -the two bridges, the transport protocol 97 (etherip) selector may be -used in -.Xr ipsec.conf 5 . -Otherwise, the Ethernet frames will be sent in the clear between the -two bridges. +interfaces is set to 1500 by default. +This may not be optimal values depending on the link between the two tunnel endpoints, but it can be adjusted via +.Xr ifconfig 8 . +.Pp +For correct operation, the route to the tunnel destination must not +go over the interface itself. +This can be implemented by adding a distinct or a more specific +route to the tunnel destination than the hosts or networks routed +via the tunnel interface. +Alternatively, the tunnel traffic may be configured in a separate +routing table to the encapsulated traffic. +.Pp +.Nm +interfaces support the following +.Xr ioctl 2 Ns s +for configuring tunnel options: +.Bl -tag -width indent -offset 3n +.It Dv SIOCSLIFPHYADDR Fa "struct if_laddrreq *" +Set the IPv4 or IPV6 addresses of the outer IP header. +The addresses may only be configured while the interface is down. +.It Dv SIOCGLIFPHYADDR Fa "struct if_laddrreq *" +Get the addresses of the outer IP header. +.It Dv SIOCDIFPHYADDR +Clear the outer IP header addresses. +The addresses may only be cleared while the interface is down. +.It Dv SIOCSLIFPHYRTABLE Fa "struct ifreq *" +Set the routing table the tunnel traffic operates in. +The routing table may only be configured while the interface is down. +.It Dv SIOCGLIFPHYRTABLE Fa "struct ifreq *" +Get the routing table the tunnel traffic operates in. +.It Dv SIOCSLIFPHYTTL Fa "struct ifreq *" +Set the Time-To-Live field in IPv4 encapsulation headers, or the +Hop Limit field in IPv6 encapsulation headers. +.It Dv SIOCGLIFPHYTTL Fa "struct ifreq *" +Get the value used in Time-To-Live field in a IPv4 encapsulation +header or the Hop Limit field in a IPv6 encapsulation header. +.El .Sh EXAMPLES Given two physically separate Ethernet networks, a bridge can be used as follows to make them appear as the same local area network. Index: share/man/man4/gif.4 =================================================================== RCS file: /cvs/src/share/man/man4/gif.4,v retrieving revision 1.29 diff -u -p -r1.29 gif.4 --- share/man/man4/gif.4 7 Jun 2016 20:25:48 -0000 1.29 +++ share/man/man4/gif.4 12 Dec 2016 05:59:20 -0000 @@ -40,37 +40,94 @@ The .Nm interface is a generic tunnelling pseudo-device for IPv4 and IPv6. -It can tunnel IPv[46] over IPv[46] with behavior mainly based on -RFC 4213 IPv6-over-IPv4, for a total of four possible combinations. +It can tunnel IPv4, IPv6, and MPLS over IPv4 or IPv6 Networks. +.Pp +Encapsulated datagrams are prepended by an IP header with the +protocol type set to identify the original datagrams type. +IPv4 packets are encapsulated in IP packets using type IP (protocol +number 4), IPv6 packets are encapsulated with type IPv6 (protocol +number 41), and MPLS packets are encapsulated with type MPLS (protocol +number 137). +The tunnel is only identified by the addresses used by the pair +of endpoints the traffic is encapsulated between. .Pp A .Nm interface can be created at runtime using the -.Ic ifconfig gif Ns Ar N Ic create +.Ic ifconfig Nm Ns Ar N Ic create command or by setting up a .Xr hostname.if 5 configuration file for .Xr netstart 8 . .Pp -The -.Nm -interface must be configured with the -addresses used for the outer header. -This can be done by using -.Xr ifconfig 8 Ns 's -.Ic tunnel -command (which uses the -.Dv SIOCSIFPHYADDR -ioctl). -.Pp -The addresses of the inner header must be configured by using -.Xr ifconfig 8 -in the normal way. -The routing table can be used to direct packets toward the +The MTU of +.Nm gif +interfaces is set to 1280 by default. +This may not be an optimal value depending on the link between the two tunnel endpoints, but it can be adjusted via +.Xr ifconfig 8 . +.Pp +For correct operation, the route to the tunnel destination must not +go over the interface itself. +This can be implemented by adding a distinct or a more specific +route to the tunnel destination than the hosts or networks routed +via the tunnel interface. +Alternatively, the tunnel traffic may be configured in a separate +routing table to the encapsulated traffic. +.Pp .Nm -interface. +interfaces support the following +.Xr ioctl 2 Ns s +for configuring tunnel options: +.Bl -tag -width indent -offset 3n +.It Dv SIOCSLIFPHYADDR Fa "struct if_laddrreq *" +Set the IPv4 or IPv6 addresses of the outer IP header. +The addresses may only be configured while the interface is down. +.It Dv SIOCGLIFPHYADDR Fa "struct if_laddrreq *" +Get the addresses of the outer IP header. +.It Dv SIOCDIFPHYADDR +Clear the outer IP header addresses. +The addresses may only be cleared while the interface is down. +.It Dv SIOCSLIFPHYRTABLE Fa "struct ifreq *" +Set the routing table the encapsulated IP packets operate within. +The routing table may only be configured while the interface is down. +.It Dv SIOCGLIFPHYRTABLE Fa "struct ifreq *" +Get the routing table the encapsulated IP packets operate within. +.It Dv SIOCSLIFPHYTTL Fa "struct ifreq *" +Set the Time-To-Live field in IPv4 encapsulation headers, or the +Hop Limit field in IPv6 encapsulation headers. +.It Dv SIOCGLIFPHYTTL Fa "struct ifreq *" +Get the value used in Time-To-Live field in a IPv4 encapsulation +header or the Hop Limit field in a IPv6 encapsulation header. +.El +.Sh EXAMPLES +.Nm gif +configuration example: +.Bd -literal +Host X ---- Host A ----------- tunnel --------- host D ---- Host E + \e / + \e / + +----- Host B ------ Host C ------+ +.Ed +.Pp +On Host A: +.Bd -literal -offset indent +# route add default B +# ifconfig gifN create +# ifconfig gifN tunnel A D +# ifconfig gifN A D netmask 0xffffffff linkX up +# route add E D +.Ed +.Pp +On Host D +.Pq Ox : +.Bd -literal -offset indent +# route add default C +# ifconfig gifN create +# ifconfig gifN tunnel D A +# ifconfig gifN D A +# route add E E +.Ed .Sh SEE ALSO -.Xr sysctl 3 , .Xr etherip 4 , .Xr inet 4 , .Xr inet6 4 , @@ -85,6 +142,15 @@ interface. .%D October 2005 .%R RFC 4213 .%T Basic Transition Mechanisms for IPv6 Hosts and Routers +.Re +.Pp +.Rs +.%A T. Worster, Motorola Inc. +.%A Y. Rekhter, Juniper Networks, Inc. +.%A E. Rosen (editor), Cisco Systems, Inc. +.%D March 2005 +.%R RFC 5332 +.%T Encapsulating MPLS in IP or Generic Routing Encapsulation (GRE) .Re .Sh HISTORY The Index: share/man/man4/gre.4 =================================================================== RCS file: /cvs/src/share/man/man4/gre.4,v retrieving revision 1.46 diff -u -p -r1.46 gre.4 --- share/man/man4/gre.4 31 Aug 2016 18:16:54 -0000 1.46 +++ share/man/man4/gre.4 12 Dec 2016 05:59:20 -0000 @@ -33,168 +33,193 @@ .Os .Sh NAME .Nm gre , -.Nm mobileip -.Nd encapsulating network device +.Nm egre , +.Nm nvgre +.Nd GRE and NVGRE encapsulating network devices .Sh SYNOPSIS .Cd "pseudo-device gre" .Sh DESCRIPTION The .Nm -driver allows tunnel construction using the Cisco GRE or -the Mobile IP (RFC 2004) encapsulation protocols. +pseudo-device provides interfaces for tunnelling protocols across +IPv4 and IPv6 networks using RFC 1701 Generic Routing Encapsultion +(GRE) tunnels or RFC 7637 Network Virtualisation GRE (NVGRE) tunnels. +.Pp +GRE datagrams (IP protocol number 47) consist of a GRE and outer IP +header encapsulationg another protocols datagram. +The GRE header specifies the type of the encapsulated datagram, +allowing for the tunneling of multiple protocols. +Different tunnels between the same endpoints may be distinguised +by an optional Key field in the GRE header. +.Pp +NVGRE is a Layer 2 Ethernet encapsulation protocol implemented using +the GRE protocol with different semantics for the Key parameter. +The Key field is mandantory, but is interpreted as a 24 bit Virtual +Subnet Identifier (VSID) and an 8 bit Flow Identifier. .Pp -.Tn GRE , -.Tn WCCPv1 , -and -.Tn Mobile IP -are enabled with the following -.Xr sysctl 3 -variables respectively in -.Pa /etc/sysctl.conf : -.Bl -tag -width "net.inet.mobileip.allow" -.It Va net.inet.gre.allow -Allow GRE packets in and out of the system. -.It Va net.inet.gre.wccp -Set to 1 to allow WCCPv1-style GRE packets into the system; -set to 2 to handle the packets as WCCPv2-style GRE, truncating -the redirect header. -Some magic with the packet filter configuration -and a caching proxy like squid are needed -to do anything useful with these packets. -This sysctl requires -.Va gre.allow -to be set. -.It Va net.inet.mobileip.allow -Allow Mobile IP packets in and out of the system. -.El -.Pp -This driver currently supports the following modes of operation: -.Bl -tag -width mobileipXXX +This pseudo driver provides the interfaces: +.Bl -tag -width nvgreXXX .It Nm gre -GRE datagrams (IP protocol number 47) -are prepended by an outer datagram and a GRE header. -The GRE header specifies the type of the encapsulated datagram -and thus allows for tunneling other protocols than IP, -such as AppleTalk. -GRE mode is the default tunnel mode on Cisco routers. -This is also the default mode of operation of the -.Nm -interfaces. -.It Nm mobileip -MOBILE datagrams (IP protocol number 55) -are encapsulated into IP, but with a much smaller -encapsulation header. -This protocol only supports IP in IP encapsulation, and is intended -for use with Mobile IP. +Layer 3 protocols, specifically IPv4, IPv6, and MPLS, are encapsulated +by GRE and IP headers as per RFC 1701 and RFC 1702. +Each interface may be configured with an optional 32 bit key as a +virtual network identifier. +.It Nm egre +Layer 2 Ethernet packets are encapsulated by GRE and IP headers. +Transparent Ethernet (0x6558) is used as the protocol identifier +in the GRE header as per RFC 1701. +Each interface may be configured with an optional 32 bit virtual network +identifier which is used as the Key. +.It Nm nvgre +Layer 2 Ethernet packets are encapsulated by the NVGRE variant of +a GRE header, and an IP header. +Each interface is configured with a 24 bit virtual network identifier +that is used as the Virtual Subnet Identifier (VSID). .El .Pp -A -.Nm gre -or -.Nm mobileip -interface can be created at runtime using the -.Ic ifconfig gre Ns Ar N Ic create +.Nm gre , +.Nm egre , +and +.Nm nvgre +interfaces can be created at runtime using the +.Ic ifconfig iface Ns Ar N Ic create command or by setting up a .Xr hostname.if 5 configuration file for .Xr netstart 8 . -The MTU is set to 1476 by default to match the value used by Cisco routers. -This may not be an optimal value, -depending on the link between the two tunnel endpoints, -but it can be adjusted via -.Xr ifconfig 8 . -.Pp -For correct operation, -there needs to be a route to the destination -that is less specific than the one over the tunnel -(there needs to be a route to the decapsulating host that -does not run over the tunnel, as this would create a loop). -.Pp -Note that the IP addresses of the tunnel endpoints may be the same as the -ones defined with -.Xr ifconfig 8 -for the interface (as if IP is encapsulated) but need not be as, -for example, when encapsulating AppleTalk. +.Pp +The MTU of +.Nm gre +interfaces is set to 1476 by default to match the value used by +Cisco routers. +The MTU of +.Nm egre +and +.Nm nvgre +interfaces are set to 1500 by default. +These may not be optimal values depending on the link between the +two tunnel endpoints, but it can be adjusted via ifconfig(8). +.Pp +For correct operation, the route to the tunnel destination must not +go over the interface itself. +This can be implemented by adding a distinct or a more specific +route to the tunnel destination than the hosts or networks routed +via the tunnel interface. +Alternatively, the tunnel traffic may be configured in a separate +routing table to the encapsulated traffic. +.Pp +.Nm gre , +.Nm egre , +and +.Nm nvgre +interfaces support the following +.Xr ioctl 2 Ns s +for configuring tunnel options: +.Bl -tag -width indent -offset 3n +.It Dv SIOCSLIFPHYADDR Fa "struct if_laddrreq *" +Set the IPv4 or IPV6 addresses of the outer IP header. +The addresses may only be configured while the interface is down. +.It Dv SIOCGLIFPHYADDR Fa "struct if_laddrreq *" +Get the addresses of the outer IP header. +.It Dv SIOCDIFPHYADDR +Clear the outer IP header addresses. +The addresses may only be cleared while the interface is down. +.It Dv SIOCSLIFPHYRTABLE Fa "struct ifreq *" +Set the routing table the tunnel traffic operates in. +The routing table may only be configured while the interface is down. +.It Dv SIOCGLIFPHYRTABLE Fa "struct ifreq *" +Get the routing table the tunnel traffic operates in. +.It Dv SIOCSLIFPHYTTL Fa "struct ifreq *" +Set the Time-To-Live field in IPv4 encapsulation headers, or the +Hop Limit field in IPv6 encapsulation headers. +.It Dv SIOCGLIFPHYTTL Fa "struct ifreq *" +Get the value used in Time-To-Live field in a IPv4 encapsulation +header or the Hop Limit field in a IPv6 encapsulation header. +.El +.Pp +.Nm gre +and +.Nm egre +interfaces support the following semantics for the vnetid +.Xr ioctl 2 Ns s : +.Bl -tag -width indent -offset 3n +.It Dv SIOCSVNETID Fa "struct ifreq *" +Set a 32 bit virtual network identifier used as the Key in the GRE +header. +The virtual network identifier may only be configured while the +interface is down. +.It Dv SIOCGVNETID Fa "struct if_laddrreq *" +Get the virtual network identifer used as the Key in the GRE header. +.It Dv SIOCDVNETID +Remove the virtual network identifier used as the Key in the GRE header. +The virtual network identifer may only be cleared while the interface +is down. +.El +.Pp +.Nm nvgre +interfaces support the following semantics for the vnetid +.Xr ioctl 2 Ns s : +.Bl -tag -width indent -offset 3n +.It Dv SIOCSVNETID Fa "struct ifreq *" +Set a 24 bit virtual network identifier used as the VSID in the +NVGRE header. +The virtual network identifier may only be configured while the +interface is down. +.It Dv SIOCGVNETID Fa "struct if_laddrreq *" +Get the virtual network identifer used as the VSID in the NVGRE header. +.El .Sh EXAMPLES -Configuration example: +.Nm gre +configuration example: .Bd -literal -Host X ---- Host A ------------ tunnel ------------ Cisco D ---- Host E +Host X ---- Host A ----------- GRE tunnel --------- host D ---- Host E \e / \e / +------ Host B ------ Host C ------+ .Ed .Pp -On Host A -.Pq Ox : +On Host A: .Bd -literal -offset indent # route add default B # ifconfig greN create -# ifconfig greN A D netmask 0xffffffff linkX up # ifconfig greN tunnel A D +# ifconfig greN A D netmask 0xffffffff linkX up # route add E D .Ed .Pp -On Host D (Cisco): -.Bd -literal -offset indent -Interface TunnelX - ip unnumbered D ! e.g. address from Ethernet interface - tunnel source D ! e.g. address from Ethernet interface - tunnel destination A -ip route C -ip route A mask C -ip route X mask tunnelX -.Ed -.Pp -OR -.Pp On Host D .Pq Ox : .Bd -literal -offset indent # route add default C # ifconfig greN create -# ifconfig greN D A # ifconfig greN tunnel D A +# ifconfig greN D A +# route add E E .Ed .Pp -To reach Host A over the tunnel (from Host D), there has to be an -alias on Host A for the Ethernet interface: -.Pp -.Dl # ifconfig alias Y -.Pp -and on the Cisco: -.Pp -.Dl ip route Y mask tunnelX -.Pp -Keepalive packets may optionally be sent to the remote endpoint, which -decapsulates and returns them, allowing tunnel failure to be detected. -Enable them like this: +Configuring a Key on +.Nm gre +or +.Nm egre +intefaces: .Bd -literal -offset indent -# ifconfig greN keepalive period count +# ifconfig greN vnetid K .Ed .Pp -This will send a keepalive packet every -.Ar period -seconds. -If no response is received in -.Ar count -* -.Ar period -seconds, the link is considered down. -To return keepalives, the remote host must be configured to forward packets: +Removing the Key on +.Nm gre +or +.Nm egre +intefaces: .Bd -literal -offset indent -# sysctl net.inet.ip.forwarding=1 +# ifconfig egreN -vnetid .Ed .Pp -If -.Xr pf 4 -is enabled then it is necessary to add a pass rule specific for the keepalive -packets. -The rule must use -.Cm no state -because the keepalive packet is entering the network stack multiple times. -In most cases the following should work: +Configuring a VSID on +.Nm nvgre +intefaces: .Bd -literal -offset indent -pass quick on gre proto gre no state +# ifconfig nvgreN vnetid V .Ed .Sh SEE ALSO .Xr inet 4 , @@ -227,25 +252,54 @@ pass quick on gre proto gre no state .%T Generic Routing Encapsulation over IPv4 networks .Re .Pp -.Rs -.%A C. Perkins -.%D October 1996 -.%R RFC 2004 -.%T Minimal Encapsulation within IP -.Re -.Pp -.Rs -.%U http://www.wrec.org/Drafts/draft-ietf-wrec-web-pro-00.txt -.%T Web Cache Coordination Protocol V1.0 -.Re +.%A T.Worster, Motorola Inc. +.%A Y. Rekhter, Juniper Networks, Inc. +.%A E. Rosen (editor), Cisco Systems, Inc. +.%D March 2005 +.%R RFC 5332 +.%T Encapsulating MPLS in IP or Generic Routing Encapsulation (GRE) .Pp .Rs -.%U http://www.wrec.org/Drafts/draft-wilson-wrec-wccp-v2-00.txt -.%T Web Cache Coordination Protocol V2.0 +.%A P. Garg (editor) +.%A Y. Wang (editor) +.%D September 2015 +.%R RFC 7637 +.%T NVGRE: Network Virtualization Using Generic Routing Encapsulation .Re -.Sh AUTHORS -.An Heiko W. Rupp Aq Mt hwr@pilhuhn.de -.Sh BUGS -The GRE RFC is not yet fully implemented (no GRE options). +.Sh CAVEATS +The +.Nm gre +and +.Nm egre +interfaces do not support any of the optional fields defined +in the GRE RFCs apart from the Key. .Pp -The redirect header for WCCPv2 GRE encapsulated packets is skipped. +.Nm egre +and +.Nm nvgre +share the same GRE version, Key field, and use Transparent +Ethernet (0x6558) as the protocol type in the GRE header. +Therefore the +.Nm egre +Keys +and +.Nm nvgre +VSIDs share the same space in the protocol and can collide. +The drivers prevent configuration of overlapping Keys and VSIDs +between the same set of endpoints. +It is up to the tunnel endpoints to correctly configure the type +of interface to interpret the traffic. +.Pp +The NVGRE RFC specifies VSIDs 0 (0x0) to 4095 (0xfff) as reserved +for future use, and VSID 16777215 (0xffffff) for use for vendor-specific +endpoint communication. +The NVGRE RFC also explicitly states encapsulated Ethernet packets +must not contain IEEE 802.1Q (VLAN) tags. +The +.Nm nvgre +driver not restrict the use of these VSIDs, and does not prevent +the configuration of child +.Xr vlan 4 +interfaces or the bridging of VLAN tagged traffic across the tunnel. +These non-restrictions allow non-compliant tunnels to be configured +which may not interoperate with other vendors. Index: share/man/man4/mobileip.4 =================================================================== RCS file: share/man/man4/mobileip.4 diff -N share/man/man4/mobileip.4 --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ share/man/man4/mobileip.4 12 Dec 2016 05:59:20 -0000 @@ -0,0 +1,135 @@ +.\" $OpenBSD: gre.4,v 1.46 2016/08/31 18:16:54 jmc Exp $ +.\" $NetBSD: gre.4,v 1.10 1999/12/22 14:55:49 kleink Exp $ +.\" +.\" Copyright 1998 (c) The NetBSD Foundation, Inc. +.\" All rights reserved. +.\" +.\" This code is derived from software contributed to The NetBSD Foundation +.\" by Heiko W. Rupp +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS +.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +.\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS +.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +.\" POSSIBILITY OF SUCH DAMAGE. +.\" +.Dd $Mdocdate: August 31 2016 $ +.Dt MOBILEIP 4 +.Sh NAME +.Nm mobileip +.Nd MobileIP encapsulating network device +.Sh SYNOPSIS +.Cd "pseudo-device mobileip" +.Sh DESCRIPTION +The +.Nm +driver provides IP tunnel construction using +the Mobile IP (RFC 2004) encapsulation protocol. +.Pp +.Nm mobileip +datagrams (IP protocol number 55) +are encapsulated into IP using a small encapsulation header. +This protocol only supports encapsulating IPv4 in IPv4, and is intended +for use with Mobile IP. +.Pp +A +.Nm +interface can be created at runtime using the +.Ic ifconfig Nm Ns Ar N Ic create +command or by setting up a +.Xr hostname.if 5 +configuration file for +.Xr netstart 8 . +.Pp +The MTU is set to 1488 by default. +This may not be an optimal value +depending on the link between the two tunnel endpoints, +but it can be adjusted via +.Xr ifconfig 8 . +.Pp +For correct operation, the route to the tunnel destination must not +go over the interface itself. +This can be implemented by adding a distinct or a more specific +route to the tunnel destination than the hosts or networks routed +via the tunnel interface. +Alternatively, the tunnel traffic may be configured in a separate +routing table to the encapsulated traffic. +.Pp +.Nm +interfaces support the following +.Xr ioctl 2 Ns s +for configuring tunnel options: +.Bl -tag -width indent -offset 3n +.It Dv SIOCSLIFPHYADDR Fa "struct if_laddrreq *" +Set the addresses of the outer IP header. +The addresses may only be configured while the interface is down. +.It Dv SIOCGLIFPHYADDR Fa "struct if_laddrreq *" +Get the addresses of the outer IP header. +.It Dv SIOCDIFPHYADDR +Clear the outer IP header addresses. +The addresses may only be cleared while the interface is down. +.It Dv SIOCSLIFPHYRTABLE Fa "struct ifreq *" +Set the routing table the encapsulated IP packets operate within. +The routing table may only be configured while the interface is down. +.It Dv SIOCGLIFPHYRTABLE Fa "struct ifreq *" +Get the routing table the encapsulated IP packets operate within. +.El +.Sh EXAMPLES +Configuration example: +.Bd -literal +Host X --- Host A ----------- MobileIP ------------ Host D --- Host E + \e / + \e / + +------ Host B ------ Host C ------+ +.Ed +.Pp +On Host A +.Pq Ox : +.Bd -literal -offset indent +# route add default B +# ifconfig mobileipN create +# ifconfig mobileipN tunnel A D +# ifconfig mobileipN A D netmask 255.255.255.255 +# route add E D +.Ed +.Pp +On Host D +.Pq Ox : +.Bd -literal -offset indent +# route add default C +# ifconfig mobileipN create +# ifconfig mobileipN tunnel D A +# ifconfig mobileipN D A netmask 255.255.255.255 +# route add D E +.Ed +.Sh SEE ALSO +.Xr inet 4 , +.Xr ip 4 , +.Xr netintro 4 , +.Xr options 4 , +.Xr hostname.if 5 , +.Xr protocols 5 , +.Xr ifconfig 8 , +.Xr netstart 8 +.Sh STANDARDS +.Rs +.%A C. Perkins +.%D October 1996 +.%R RFC 2004 +.%T Minimal Encapsulation within IP +.Re Index: sys/conf/GENERIC =================================================================== RCS file: /cvs/src/sys/conf/GENERIC,v retrieving revision 1.236 diff -u -p -r1.236 GENERIC --- sys/conf/GENERIC 29 Nov 2016 09:08:34 -0000 1.236 +++ sys/conf/GENERIC 12 Dec 2016 05:59:20 -0000 @@ -95,6 +95,7 @@ pseudo-device carp # CARP protocol supp pseudo-device etherip # EtherIP (RFC 3378) pseudo-device gif # IPv[46] over IPv[46] tunnel (RFC1933) pseudo-device gre # GRE encapsulation interface +pseudo-device mobileip # MobileIP encapsulation interface pseudo-device loop # network loopback pseudo-device mpe # MPLS PE interface pseudo-device mpw # MPLS pseudowire support Index: sys/conf/files =================================================================== RCS file: /cvs/src/sys/conf/files,v retrieving revision 1.635 diff -u -p -r1.635 files --- sys/conf/files 30 Nov 2016 14:26:12 -0000 1.635 +++ sys/conf/files 12 Dec 2016 05:59:20 -0000 @@ -543,6 +543,7 @@ pseudo-device carp: ifnet, ether pseudo-device sppp: ifnet pseudo-device gif: ifnet pseudo-device gre: ifnet +pseudo-device mobileip: ifnet pseudo-device crypto: ifnet pseudo-device trunk: ifnet, ether, ifmedia pseudo-device mpe: ifnet, ether @@ -765,7 +766,6 @@ file net/bpf_filter.c bpfilter file net/if.c file net/ifq.c file net/if_ethersubr.c ether needs-flag -file net/if_etherip.c etherip needs-flag file net/if_spppsubr.c sppp file net/if_loop.c loop file net/if_media.c ifmedia @@ -789,8 +789,11 @@ file net/rtable.c file net/route.c file net/rtsock.c file net/slcompress.c ppp -file net/if_enc.c enc needs-count -file net/if_gre.c gre needs-count +file net/if_enc.c enc +file net/if_gre.c gre needs-flag +file net/if_mobileip.c mobileip needs-flag +file net/if_gif.c gif needs-flag +file net/if_etherip.c etherip needs-flag file net/if_trunk.c trunk needs-count file net/trunklacp.c trunk file net/if_mpe.c mpe needs-count @@ -838,11 +841,13 @@ file netinet/tcp_subr.c file netinet/tcp_timer.c file netinet/tcp_usrreq.c file netinet/udp_usrreq.c -file netinet/ip_gre.c +file netinet/ip_gre.c gre +file netinet/ip_mobileip.c mobileip +file netinet/ip_gif.c gif +file netinet/ip_etherip.c etherip file netinet/ip_ipsp.c ipsec | tcp_signature file netinet/ip_spd.c ipsec | tcp_signature -file netinet/ip_ipip.c -file netinet/ip_ether.c gif +file netinet/ip_ipip.c ipsec file netinet/ipsec_input.c ipsec file netinet/ipsec_output.c ipsec file netinet/ip_esp.c ipsec @@ -950,7 +955,6 @@ file uvm/uvm_user.c file uvm/uvm_vnode.c # IPv6 -file net/if_gif.c gif needs-count file netinet/ip_ecn.c file netinet6/in6_pcb.c inet6 file netinet6/in6.c inet6 Index: sys/net/if_etherip.c =================================================================== RCS file: /cvs/src/sys/net/if_etherip.c,v retrieving revision 1.9 diff -u -p -r1.9 if_etherip.c --- sys/net/if_etherip.c 17 Nov 2016 13:37:20 -0000 1.9 +++ sys/net/if_etherip.c 12 Dec 2016 05:59:20 -0000 @@ -1,4 +1,5 @@ /* $OpenBSD: if_etherip.c,v 1.9 2016/11/17 13:37:20 mpi Exp $ */ + /* * Copyright (c) 2015 Kazuya GODA * @@ -17,7 +18,6 @@ #include "bpfilter.h" #include "pf.h" -#include "gif.h" #include #include @@ -36,7 +36,6 @@ #include #include #include -#include #ifdef INET6 #include @@ -53,47 +52,61 @@ #include +struct etherip_tunnel { + RBT_ENTRY(etherip_tunnel) + t_entry; + + unsigned int t_rtableid; + int t_af; + uint32_t t_src[4]; + uint32_t t_dst[4]; + uint8_t t_ttl; +}; + struct etherip_softc { + struct etherip_tunnel sc_tunnel; /* must be first */ struct arpcom sc_ac; struct ifmedia sc_media; - unsigned int sc_rdomain; - struct sockaddr_storage sc_src; - struct sockaddr_storage sc_dst; - LIST_ENTRY(etherip_softc) sc_entry; }; -LIST_HEAD(, etherip_softc) etherip_softc_list; +void etheripattach(int); -#if 0 -/* - * TODO: - * At this stage, etherip_allow and etheripstat are defined - * at netinet/ip_ether.c. When implementation of etherip is - * removed from gif(4), there are moved here. - */ +static int etherip_clone_create(struct if_clone *, int); +static int etherip_clone_destroy(struct ifnet *); -/* - * We can control the acceptance of EtherIP packets by altering the sysctl - * net.inet.etherip.allow value. Zero means drop them, all else is acceptance. - */ -int etherip_allow = 0; +struct if_clone etherip_cloner = IF_CLONE_INITIALIZER("etherip", + etherip_clone_create, etherip_clone_destroy); -struct etheripstat etheripstat; -#endif +RBT_HEAD(etherip_tree, etherip_tunnel); -void etheripattach(int); -int etherip_clone_create(struct if_clone *, int); -int etherip_clone_destroy(struct ifnet *); -int etherip_ioctl(struct ifnet *, u_long, caddr_t); -void etherip_start(struct ifnet *); -int etherip_media_change(struct ifnet *); -void etherip_media_status(struct ifnet *, struct ifmediareq *); -int etherip_set_tunnel_addr(struct ifnet *, struct sockaddr_storage *, - struct sockaddr_storage *); +static inline int + etherip_cmp(const struct etherip_tunnel *, + const struct etherip_tunnel *); -struct if_clone etherip_cloner = IF_CLONE_INITIALIZER("etherip", - etherip_clone_create, etherip_clone_destroy); +RBT_PROTOTYPE(etherip_tree, etherip_tunnel, t_entry, etherip_cmp); +struct etherip_tree etherip_softcs = RBT_INITIALIZER(); +struct rwlock etherip_lk = RWLOCK_INITIALIZER("etheriplk"); + +static int etherip_ioctl(struct ifnet *, u_long, caddr_t); +static void etherip_start(struct ifnet *); +static int etherip_ip4_encap(struct etherip_softc *sc, struct mbuf *); +#ifdef INET6 +static int etherip_ip6_encap(struct etherip_softc *sc, struct mbuf *); +#endif +static int etherip_media_change(struct ifnet *); +static void etherip_media_status(struct ifnet *, struct ifmediareq *); + +static struct mbuf * + etherip_ip_input(struct mbuf *, int, struct etherip_tunnel *); + +static int etherip_up(struct etherip_softc *); +static int etherip_down(struct etherip_softc *); +static int etherip_set_tunnel(struct etherip_softc *, + struct if_laddrreq *); +static int etherip_get_tunnel(struct etherip_softc *, + struct if_laddrreq *); +static int etherip_del_tunnel(struct etherip_softc *); void etheripattach(int count) @@ -101,7 +114,7 @@ etheripattach(int count) if_clone_attach(ðerip_cloner); } -int +static int etherip_clone_create(struct if_clone *ifc, int unit) { struct ifnet *ifp; @@ -110,6 +123,10 @@ etherip_clone_create(struct if_clone *if if ((sc = malloc(sizeof(*sc), M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL) return ENOMEM; + sc->sc_tunnel.t_af = AF_UNSPEC; + sc->sc_tunnel.t_rtableid = 0; + sc->sc_tunnel.t_ttl = ip_defttl; + ifp = &sc->sc_ac.ac_if; snprintf(ifp->if_xname, sizeof ifp->if_xname, "etherip%d", unit); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; @@ -130,91 +147,74 @@ etherip_clone_create(struct if_clone *if if_attach(ifp); ether_ifattach(ifp); - LIST_INSERT_HEAD(ðerip_softc_list, sc, sc_entry); - - return 0; + return (0); } -int +static int etherip_clone_destroy(struct ifnet *ifp) { struct etherip_softc *sc = ifp->if_softc; - LIST_REMOVE(sc, sc_entry); - ifmedia_delete_instance(&sc->sc_media, IFM_INST_ANY); ether_ifdetach(ifp); if_detach(ifp); + free(sc, M_DEVBUF, sizeof(*sc)); return 0; } -int +static int etherip_media_change(struct ifnet *ifp) { return 0; } -void +static void etherip_media_status(struct ifnet *ifp, struct ifmediareq *imr) { imr->ifm_active = IFM_ETHER | IFM_AUTO; imr->ifm_status = IFM_AVALID | IFM_ACTIVE; } -void +static void etherip_start(struct ifnet *ifp) { struct etherip_softc *sc = ifp->if_softc; struct mbuf *m; int error; - for (;;) { - IFQ_DEQUEUE(&ifp->if_snd, m); - if (m == NULL) - break; - + while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) { #if NBPFILTER > 0 if (ifp->if_bpf) bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT); #endif - if (sc->sc_src.ss_family == AF_UNSPEC || - sc->sc_dst.ss_family == AF_UNSPEC) { - m_freem(m); - continue; - } ifp->if_opackets++; - switch (sc->sc_src.ss_family) { + switch (sc->sc_tunnel.t_af) { case AF_INET: - error = ip_etherip_output(ifp, m); + error = etherip_ip4_encap(sc, m); break; #ifdef INET6 case AF_INET6: - error = ip6_etherip_output(ifp, m); + error = etherip_ip6_encap(sc, m); break; #endif default: - unhandled_af(sc->sc_src.ss_family); + unhandled_af(sc->sc_tunnel.t_af); } if (error) ifp->if_oerrors++; } - } - -int +static int etherip_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct etherip_softc *sc = ifp->if_softc; - struct if_laddrreq *lifr = (struct if_laddrreq *)data; struct ifreq *ifr = (struct ifreq *)data; - struct sockaddr_storage *src, *dst; - struct proc *p = curproc; int error = 0; switch (cmd) { @@ -223,77 +223,52 @@ etherip_ioctl(struct ifnet *ifp, u_long /* FALLTHROUGH */ case SIOCSIFFLAGS: - if (ifp->if_flags & IFF_UP) - ifp->if_flags |= IFF_RUNNING; - else - ifp->if_flags &= ~IFF_RUNNING; - + if (ISSET(ifp->if_flags, IFF_UP)) { + if (!ISSET(ifp->if_flags, IFF_RUNNING)) + error = etherip_up(sc); + else + error = ENETRESET; + } else { + if (ISSET(ifp->if_flags, IFF_RUNNING)) + error = etherip_down(sc); + } break; case SIOCSLIFPHYRTABLE: - if ((error = suser(p, 0)) != 0) - break; - if (ifr->ifr_rdomainid < 0 || ifr->ifr_rdomainid > RT_TABLEID_MAX || !rtable_exists(ifr->ifr_rdomainid)) { error = EINVAL; break; } - sc->sc_rdomain = ifr->ifr_rdomainid; + sc->sc_tunnel.t_rtableid = ifr->ifr_rdomainid; break; - case SIOCGLIFPHYRTABLE: - ifr->ifr_rdomainid = sc->sc_rdomain; + ifr->ifr_rdomainid = sc->sc_tunnel.t_rtableid; break; case SIOCSLIFPHYADDR: - if ((error = suser(p, 0)) != 0) - break; - - src = &lifr->addr; - dst = &lifr->dstaddr; - if (src->ss_family == AF_UNSPEC || dst->ss_family == AF_UNSPEC) - return EADDRNOTAVAIL; - - switch (src->ss_family) { - case AF_INET: - if (src->ss_len != sizeof(struct sockaddr_in) || - dst->ss_len != sizeof(struct sockaddr_in)) - return EINVAL; - break; -#ifdef INET6 - case AF_INET6: - if (src->ss_len != sizeof(struct sockaddr_in6) || - dst->ss_len != sizeof(struct sockaddr_in6)) - return EINVAL; - break; -#endif - default: - return EAFNOSUPPORT; - } - - error = etherip_set_tunnel_addr(ifp, src, dst); + error = etherip_set_tunnel(sc, (struct if_laddrreq *)data); + break; + case SIOCGLIFPHYADDR: + error = etherip_get_tunnel(sc, (struct if_laddrreq *)data); break; - case SIOCDIFPHYADDR: - if ((error = suser(p, 0)) != 0) - break; - - ifp->if_flags &= ~IFF_RUNNING; - memset(&sc->sc_src, 0, sizeof(sc->sc_src)); - memset(&sc->sc_dst, 0, sizeof(sc->sc_dst)); + error = etherip_del_tunnel(sc); break; - case SIOCGLIFPHYADDR: - if (sc->sc_dst.ss_family == AF_UNSPEC) - return EADDRNOTAVAIL; - - memset(&lifr->addr, 0, sizeof(lifr->addr)); - memset(&lifr->dstaddr, 0, sizeof(lifr->dstaddr)); - memcpy(&lifr->addr, &sc->sc_src, sc->sc_src.ss_len); - memcpy(&lifr->dstaddr, &sc->sc_dst, sc->sc_dst.ss_len); + case SIOCSLIFPHYTTL: + if (ifr->ifr_ttl < 0 || ifr->ifr_ttl > 0xff) { + error = EINVAL; + break; + } + + /* commit */ + sc->sc_tunnel.t_ttl = (uint8_t)ifr->ifr_ttl; + break; + case SIOCGLIFPHYTTL: + ifr->ifr_ttl = (int)sc->sc_tunnel.t_ttl; break; case SIOCSIFMEDIA: @@ -306,371 +281,358 @@ etherip_ioctl(struct ifnet *ifp, u_long break; } - return error; + return (error); } -int -etherip_set_tunnel_addr(struct ifnet *ifp, struct sockaddr_storage *src, - struct sockaddr_storage *dst) +static inline int +etherip_cmp(const struct etherip_tunnel *a, const struct etherip_tunnel *b) { - struct etherip_softc *sc, *tsc; - int error = 0; + if (a->t_src > b->t_src) + return (1); + if (a->t_src < b->t_src) + return (-1); - sc = ifp->if_softc; + if (a->t_dst > b->t_dst) + return (1); + if (a->t_dst < b->t_dst) + return (-1); - LIST_FOREACH(tsc, ðerip_softc_list, sc_entry) { - if (tsc == sc) - continue; - - if (tsc->sc_src.ss_family != src->ss_family || - tsc->sc_dst.ss_family != dst->ss_family || - tsc->sc_src.ss_len != src->ss_len || - tsc->sc_dst.ss_len != dst->ss_len) - continue; - - if (tsc->sc_rdomain == sc->sc_rdomain && - memcmp(&tsc->sc_dst, dst, dst->ss_len) == 0 && - memcmp(&tsc->sc_src, src, src->ss_len) == 0) { - error = EADDRNOTAVAIL; - goto out; - } - } + if (a->t_rtableid > b->t_rtableid) + return (1); + if (a->t_rtableid < b->t_rtableid) + return (-1); - memcpy(&sc->sc_src, src, src->ss_len); - memcpy(&sc->sc_dst, dst, dst->ss_len); -out: - return error; + return (0); } -int -ip_etherip_output(struct ifnet *ifp, struct mbuf *m) +RBT_GENERATE(etherip_tree, etherip_tunnel, t_entry, etherip_cmp); + +static int +etherip_up(struct etherip_softc *sc) { - struct etherip_softc *sc = (struct etherip_softc *)ifp->if_softc; - struct sockaddr_in *src, *dst; - struct etherip_header *eip; - struct ip *ip; + struct etherip_tunnel *t; + int error = 0; - src = (struct sockaddr_in *)&sc->sc_src; - dst = (struct sockaddr_in *)&sc->sc_dst; + if (sc->sc_tunnel.t_af == AF_UNSPEC) + return (ENXIO); - if (src == NULL || dst == NULL || - src->sin_family != AF_INET || dst->sin_family != AF_INET) { - m_freem(m); - return EAFNOSUPPORT; - } - if (dst->sin_addr.s_addr == INADDR_ANY) { - m_freem(m); - return ENETUNREACH; - } + error = rw_enter(ðerip_lk, RW_WRITE | RW_INTR); + if (error != 0) + return (error); - m->m_flags &= ~(M_BCAST|M_MCAST); + t = RBT_INSERT(etherip_tree, ðerip_softcs, + (struct etherip_tunnel *)sc); - M_PREPEND(m, sizeof(struct etherip_header), M_DONTWAIT); - if (m == NULL) { - etheripstat.etherip_adrops++; - return ENOBUFS; - } - eip = mtod(m, struct etherip_header *); - eip->eip_ver = ETHERIP_VERSION; - eip->eip_res = 0; - eip->eip_pad = 0; - - M_PREPEND(m, sizeof(struct ip), M_DONTWAIT); - if (m == NULL) { - etheripstat.etherip_adrops++; - return ENOBUFS; - } - ip = mtod(m, struct ip *); - memset(ip, 0, sizeof(struct ip)); + rw_exit(ðerip_lk); - ip->ip_v = IPVERSION; - ip->ip_hl = sizeof(struct ip) >> 2; - ip->ip_id = htons(ip_randomid()); - ip->ip_tos = IPTOS_LOWDELAY; - ip->ip_p = IPPROTO_ETHERIP; - ip->ip_len = htons(m->m_pkthdr.len); - ip->ip_ttl = IPDEFTTL; - ip->ip_src = src->sin_addr; - ip->ip_dst = dst->sin_addr; + if (t == NULL) + SET(sc->sc_ac.ac_if.if_flags, IFF_RUNNING); + else + error = EBUSY; - m->m_pkthdr.ph_rtableid = sc->sc_rdomain; + return (error); +} -#if NPF > 0 - pf_pkt_addr_changed(m); -#endif - etheripstat.etherip_opackets++; - etheripstat.etherip_obytes += (m->m_pkthdr.len - - (sizeof(struct ip) + sizeof(struct etherip_header))); +static int +etherip_down(struct etherip_softc *sc) +{ + int error; + + error = rw_enter(ðerip_lk, RW_WRITE | RW_INTR); + if (error != 0) + return (error); + + RBT_REMOVE(etherip_tree, ðerip_softcs, &sc->sc_tunnel); + + rw_exit(ðerip_lk); + + CLR(sc->sc_ac.ac_if.if_flags, IFF_RUNNING); - return ip_output(m, NULL, NULL, IP_RAWOUTPUT, NULL, NULL, 0); + return (error); } -void -ip_etherip_input(struct mbuf *m, ...) +static int +etherip_set_tunnel(struct etherip_softc *sc, struct if_laddrreq *req) { - struct mbuf_list ml = MBUF_LIST_INITIALIZER(); - struct etherip_softc *sc; - const struct ip *ip; - struct etherip_header *eip; - struct sockaddr_in *src, *dst; - struct ifnet *ifp = NULL; - int off; - va_list ap; - - va_start(ap, m); - off = va_arg(ap, int); - va_end(ap); + struct sockaddr *src = (struct sockaddr *)&req->addr; + struct sockaddr *dst = (struct sockaddr *)&req->dstaddr; + struct sockaddr_in *sin; +#ifdef INET6 + struct sockaddr_in6 *sin6; + int error; +#endif - ip = mtod(m, struct ip *); + if (ISSET(sc->sc_ac.ac_if.if_flags, IFF_RUNNING)) + return (EBUSY); - if (ip->ip_p != IPPROTO_ETHERIP) { - m_freem(m); - ipstat_inc(ips_noproto); - return; - } + /* sa_family and sa_len must be equal */ + if (src->sa_family != dst->sa_family || src->sa_len != dst->sa_len) + return (EINVAL); + + /* validate */ + switch (dst->sa_family) { + case AF_INET: + if (dst->sa_len != sizeof(*sin)) + return (EINVAL); + + sin = (struct sockaddr_in *)src; + if (in_nullhost(sin->sin_addr) || + IN_MULTICAST(sin->sin_addr.s_addr)) + return (EINVAL); + + sc->sc_tunnel.t_src[0] = sin->sin_addr.s_addr; + + sin = (struct sockaddr_in *)dst; + if (in_nullhost(sin->sin_addr) || + IN_MULTICAST(sin->sin_addr.s_addr)) + return (EINVAL); - if (!etherip_allow) { - m_freem(m); - etheripstat.etherip_pdrops++; - return; - } + sc->sc_tunnel.t_dst[0] = sin->sin_addr.s_addr; - LIST_FOREACH(sc, ðerip_softc_list, sc_entry) { - if (sc->sc_src.ss_family != AF_INET || - sc->sc_dst.ss_family != AF_INET) - continue; - - src = (struct sockaddr_in *)&sc->sc_src; - dst = (struct sockaddr_in *)&sc->sc_dst; - - if (sc->sc_rdomain != rtable_l2(m->m_pkthdr.ph_rtableid) || - src->sin_addr.s_addr != ip->ip_dst.s_addr || - dst->sin_addr.s_addr != ip->ip_src.s_addr) - continue; + break; +#ifdef INET6 + case AF_INET6: + if (dst->sa_len != sizeof(*sin6)) + return (EINVAL); + + sin6 = (struct sockaddr_in6 *)src; + if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) || + IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) + return (EINVAL); + + error = in6_embedscope((struct in6_addr *)sc->sc_tunnel.t_src, + sin6, NULL); + if (error != 0) + return (error); + + sin6 = (struct sockaddr_in6 *)dst; + if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) || + IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) + return (EINVAL); + + error = in6_embedscope((struct in6_addr *)sc->sc_tunnel.t_dst, + sin6, NULL); + if (error != 0) + return (error); - ifp = &sc->sc_ac.ac_if; break; +#endif + default: + return (EAFNOSUPPORT); } - if (ifp == NULL) { -#if NGIF > 0 - /* - * This path is nessesary for gif(4) and etherip(4) coexistence. - * This is tricky but the path will be removed soon when - * implementation of etherip is removed from gif(4). - */ - etherip_input(m, off); -#else - etheripstat.etherip_noifdrops++; - m_freem(m); -#endif /* NGIF */ - return; - } + /* commit */ + sc->sc_tunnel.t_af = dst->sa_family; - m_adj(m, off); - m = m_pullup(m, sizeof(struct etherip_header)); - if (m == NULL) { - etheripstat.etherip_adrops++; - return; - } + return (0); +} - eip = mtod(m, struct etherip_header *); - if (eip->eip_ver != ETHERIP_VERSION || eip->eip_pad) { - etheripstat.etherip_adrops++; - m_freem(m); - return; - } +static int +etherip_get_tunnel(struct etherip_softc *sc, struct if_laddrreq *req) +{ + struct sockaddr *src = (struct sockaddr *)&req->addr; + struct sockaddr *dst = (struct sockaddr *)&req->dstaddr; + struct sockaddr_in *sin; +#ifdef INET6 + struct sockaddr_in6 *sin6; +#endif - etheripstat.etherip_ipackets++; - etheripstat.etherip_ibytes += (m->m_pkthdr.len - - sizeof(struct etherip_header)); - - m_adj(m, sizeof(struct etherip_header)); - m = m_pullup(m, sizeof(struct ether_header)); - if (m == NULL) { - etheripstat.etherip_adrops++; - return; - } - m->m_flags &= ~(M_BCAST|M_MCAST); + switch (sc->sc_tunnel.t_af) { + case AF_UNSPEC: + return (EADDRNOTAVAIL); + case AF_INET: + sin = (struct sockaddr_in *)src; + memset(sin, 0, sizeof(*sin)); + sin->sin_family = AF_INET; + sin->sin_len = sizeof(*sin); + sin->sin_addr.s_addr = sc->sc_tunnel.t_src[0]; + + sin = (struct sockaddr_in *)dst; + memset(sin, 0, sizeof(*sin)); + sin->sin_family = AF_INET; + sin->sin_len = sizeof(*sin); + sin->sin_addr.s_addr = sc->sc_tunnel.t_dst[0]; -#if NPF > 0 - pf_pkt_addr_changed(m); + break; + +#ifdef INET6 + case AF_INET6: + sin6 = (struct sockaddr_in6 *)src; + memset(sin6, 0, sizeof(*sin6)); + sin6->sin6_family = AF_INET6; + sin6->sin6_len = sizeof(*sin6); + in6_recoverscope(sin6, (struct in6_addr *)sc->sc_tunnel.t_src); + + sin6 = (struct sockaddr_in6 *)dst; + memset(sin6, 0, sizeof(*sin6)); + sin6->sin6_family = AF_INET6; + sin6->sin6_len = sizeof(*sin6); + in6_recoverscope(sin6, (struct in6_addr *)sc->sc_tunnel.t_dst); + + break; #endif + default: + return (EAFNOSUPPORT); + } - ml_enqueue(&ml, m); - if_input(ifp, &ml); + return (0); } -#ifdef INET6 -int -ip6_etherip_output(struct ifnet *ifp, struct mbuf *m) +static int +etherip_del_tunnel(struct etherip_softc *sc) { - struct etherip_softc *sc = (struct etherip_softc *)ifp->if_softc; - struct sockaddr_in6 *src, *dst; + if (ISSET(sc->sc_ac.ac_if.if_flags, IFF_RUNNING)) + return (EBUSY); + + /* commit */ + sc->sc_tunnel.t_af = AF_UNSPEC; + + return (0); +} + +static int +etherip_ip4_encap(struct etherip_softc *sc, struct mbuf *m) +{ + struct ip *ip; struct etherip_header *eip; - struct ip6_hdr *ip6; + int hlen; - src = (struct sockaddr_in6 *)&sc->sc_src; - dst = (struct sockaddr_in6 *)&sc->sc_dst; + hlen = sizeof(*ip) + sizeof(*eip); - if (src == NULL || dst == NULL || - src->sin6_family != AF_INET6 || dst->sin6_family != AF_INET6) { - m_freem(m); - return EAFNOSUPPORT; - } - if (IN6_IS_ADDR_UNSPECIFIED(&dst->sin6_addr)) { - m_freem(m); - return ENETUNREACH; - } + m = m_prepend(m, hlen, M_DONTWAIT); + if (m == NULL) + return (ENOMEM); - m->m_flags &= ~(M_BCAST|M_MCAST); + ip = mtod(m, struct ip *); + ip->ip_v = IPVERSION; + ip->ip_hl = sizeof(*ip) >> 2; + ip->ip_id = htons(ip_randomid()); + ip->ip_tos = IPTOS_LOWDELAY; + ip->ip_p = IPPROTO_ETHERIP; + ip->ip_len = htons(m->m_pkthdr.len); + ip->ip_ttl = sc->sc_tunnel.t_ttl; + ip->ip_src.s_addr = sc->sc_tunnel.t_src[0]; + ip->ip_dst.s_addr = sc->sc_tunnel.t_dst[0]; - M_PREPEND(m, sizeof(struct etherip_header), M_DONTWAIT); - if (m == NULL) { - etheripstat.etherip_adrops++; - return ENOBUFS; - } - eip = mtod(m, struct etherip_header *); - eip->eip_ver = ETHERIP_VERSION; - eip->eip_res = 0; - eip->eip_pad = 0; - - M_PREPEND(m, sizeof(struct ip6_hdr), M_DONTWAIT); - if (m == NULL) { - etheripstat.etherip_adrops++; - return ENOBUFS; - } - ip6 = mtod(m, struct ip6_hdr *); - ip6->ip6_flow = 0; - ip6->ip6_vfc &= ~IPV6_VERSION_MASK; - ip6->ip6_vfc |= IPV6_VERSION; - ip6->ip6_nxt = IPPROTO_ETHERIP; - ip6->ip6_hlim = ip6_defhlim; - ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(struct ip6_hdr)); - ip6->ip6_src = src->sin6_addr; - ip6->ip6_dst = dst->sin6_addr; + eip = (struct etherip_header *)(ip + 1); + eip->eip_header = htons(ETHERIP_VERSION << ETHERIP_VERS_SHIFT); - m->m_pkthdr.ph_rtableid = sc->sc_rdomain; + m->m_flags &= ~(M_BCAST|M_MCAST); + m->m_pkthdr.ph_rtableid = sc->sc_tunnel.t_rtableid; #if NPF > 0 pf_pkt_addr_changed(m); #endif - etheripstat.etherip_opackets++; - etheripstat.etherip_obytes += (m->m_pkthdr.len - - (sizeof(struct ip6_hdr) + sizeof(struct etherip_header))); - return ip6_output(m, 0, NULL, IPV6_MINMTU, 0, NULL); + return (ip_output(m, NULL, NULL, IP_RAWOUTPUT, NULL, NULL, 0)); +} + +struct mbuf * +etherip_ip4_input(struct mbuf *m, int hlen) +{ + struct etherip_tunnel key; + struct ip *ip; + + ip = mtod(m, struct ip *); + + key.t_af = AF_INET; + key.t_src[0] = ip->ip_dst.s_addr; + key.t_dst[0] = ip->ip_src.s_addr; + + return (etherip_ip_input(m, hlen, &key)); } -int -ip6_etherip_input(struct mbuf **mp, int *offp, int proto) +static struct mbuf * +etherip_ip_input(struct mbuf *m, int hlen, struct etherip_tunnel *key) { - struct mbuf *m = *mp; struct mbuf_list ml = MBUF_LIST_INITIALIZER(); - int off = *offp; struct etherip_softc *sc; - const struct ip6_hdr *ip6; struct etherip_header *eip; - struct sockaddr_in6 *src6, *dst6; - struct ifnet *ifp = NULL; + caddr_t hdr; + + key->t_rtableid = m->m_pkthdr.ph_rtableid; + + /* if the packet is ok the payload must contain an ethernet header */ + m = m_pullup(m, hlen + sizeof(*eip) + sizeof(struct ether_header)); + if (m == NULL) + return (NULL); + hdr = mtod(m, caddr_t); + eip = (struct etherip_header *)(hdr + hlen); - if (!etherip_allow) { - m_freem(m); - etheripstat.etherip_pdrops++; - return IPPROTO_NONE; + if (eip->eip_header != htons(ETHERIP_VERSION << ETHERIP_VERS_SHIFT)) { + /* not a supported version */ + return (m); } - ip6 = mtod(m, const struct ip6_hdr *); + rw_enter_read(ðerip_lk); + sc = (struct etherip_softc *)RBT_FIND(etherip_tree, + ðerip_softcs, key); + rw_exit_read(ðerip_lk); - LIST_FOREACH(sc, ðerip_softc_list, sc_entry) { - if (sc->sc_src.ss_family != AF_INET6 || - sc->sc_dst.ss_family != AF_INET6) - continue; + if (sc == NULL) + return (m); - src6 = (struct sockaddr_in6 *)&sc->sc_src; - dst6 = (struct sockaddr_in6 *)&sc->sc_dst; + m_adj(m, hlen + sizeof(*eip)); - if (!IN6_ARE_ADDR_EQUAL(&src6->sin6_addr, &ip6->ip6_dst) || - !IN6_ARE_ADDR_EQUAL(&dst6->sin6_addr, &ip6->ip6_src)) - continue; + m->m_flags &= ~(M_BCAST|M_MCAST); - ifp = &sc->sc_ac.ac_if; - break; - } +#if NPF > 0 + pf_pkt_addr_changed(m); +#endif - if (ifp == NULL) { -#if NGIF > 0 - /* - * This path is nessesary for gif(4) and etherip(4) coexistence. - * This is tricky but the path will be removed soon when - * implementation of etherip is removed from gif(4). - */ - return etherip_input6(mp, offp, proto); -#else - etheripstat.etherip_noifdrops++; - m_freem(m); - return IPPROTO_DONE; -#endif /* NGIF */ - } + ml_enqueue(&ml, m); + if_input(&sc->sc_ac.ac_if, &ml); + return (NULL); +} - m_adj(m, off); - m = m_pullup(m, sizeof(struct etherip_header)); - if (m == NULL) { - etheripstat.etherip_adrops++; - return IPPROTO_DONE; - } +#ifdef INET6 +static int +etherip_ip6_encap(struct etherip_softc *sc, struct mbuf *m) +{ + struct ip6_hdr *ip6; + struct etherip_header *eip; + int hlen; - eip = mtod(m, struct etherip_header *); - if ((eip->eip_ver != ETHERIP_VERSION) || eip->eip_pad) { - etheripstat.etherip_adrops++; - m_freem(m); - return IPPROTO_DONE; - } - etheripstat.etherip_ipackets++; - etheripstat.etherip_ibytes += (m->m_pkthdr.len - - sizeof(struct etherip_header)); - - m_adj(m, sizeof(struct etherip_header)); - m = m_pullup(m, sizeof(struct ether_header)); - if (m == NULL) { - etheripstat.etherip_adrops++; - return IPPROTO_DONE; - } + hlen = sizeof(*ip6) + sizeof(*eip); + + m = m_prepend(m, hlen, M_DONTWAIT); + if (m == NULL) + return (ENOMEM); + + ip6 = mtod(m, struct ip6_hdr *); + ip6->ip6_flow = 0; + ip6->ip6_vfc &= ~IPV6_VERSION_MASK; + ip6->ip6_vfc |= IPV6_VERSION; + ip6->ip6_nxt = IPPROTO_ETHERIP; + ip6->ip6_hlim = sc->sc_tunnel.t_ttl; + ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(struct ip6_hdr)); + memcpy(&ip6->ip6_src, sc->sc_tunnel.t_src, sizeof(ip6->ip6_src)); + memcpy(&ip6->ip6_dst, sc->sc_tunnel.t_dst, sizeof(ip6->ip6_dst)); + + eip = (struct etherip_header *)(ip6 + 1); + eip->eip_header = htons(ETHERIP_VERSION << ETHERIP_VERS_SHIFT); m->m_flags &= ~(M_BCAST|M_MCAST); + m->m_pkthdr.ph_rtableid = sc->sc_tunnel.t_rtableid; #if NPF > 0 pf_pkt_addr_changed(m); #endif - ml_enqueue(&ml, m); - if_input(ifp, &ml); - - return IPPROTO_DONE; + return (ip6_output(m, 0, NULL, IPV6_MINMTU, 0, NULL)); } -#endif /* INET6 */ +struct mbuf * +etherip_ip6_input(struct mbuf *m, int hlen) +{ + struct etherip_tunnel key; + struct ip6_hdr *ip6; -int -ip_etherip_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, - void *newp, size_t newlen) -{ - /* All sysctl names at this level are terminal. */ - if (namelen != 1) - return ENOTDIR; - - switch (name[0]) { - case ETHERIPCTL_ALLOW: - return sysctl_int(oldp, oldlenp, newp, newlen, ðerip_allow); - case ETHERIPCTL_STATS: - if (newp != NULL) - return EPERM; - return sysctl_struct(oldp, oldlenp, newp, newlen, - ðeripstat, sizeof(etheripstat)); - default: - break; - } + key.t_af = AF_INET6; + memcpy(key.t_src, &ip6->ip6_dst, sizeof(key.t_src)); + memcpy(key.t_dst, &ip6->ip6_src, sizeof(key.t_dst)); - return ENOPROTOOPT; + return (etherip_ip_input(m, hlen, &key)); } +#endif /* INET6 */ Index: sys/net/if_etherip.h =================================================================== RCS file: /cvs/src/sys/net/if_etherip.h,v retrieving revision 1.2 diff -u -p -r1.2 if_etherip.h --- sys/net/if_etherip.h 5 Dec 2015 22:16:27 -0000 1.2 +++ sys/net/if_etherip.h 12 Dec 2016 05:59:20 -0000 @@ -17,18 +17,6 @@ #ifndef _NET_IF_ETHERIP_H_ #define _NET_IF_ETHERIP_H_ -#if 0 -/* - * TODO: - * At this stage, struct etheripstat and struct etherip_header, - * and EtherIP sysctl objects are present at netinet/ip_ether.h. - * When implementation of etherip is removed from gif(4), there - * are moved here. - */ - -extern int etherip_allow; -extern struct etheripstat etheripstat; - struct etheripstat { uint32_t etherip_hdrops; /* packet shorter than header shows */ uint32_t etherip_qfull; /* bridge queue full, packet dropped */ @@ -42,43 +30,19 @@ struct etheripstat { }; struct etherip_header { -#if BYTE_ORDER == LITTLE_ENDIAN - unsigned int eip_res:4; /* reserved */ - unsigned int eip_ver:4; /* version */ -#endif -#if BYTE_ORDER == BIG_ENDIAN - unsigned int eip_ver:4; /* version */ - unsigned int eip_res:4; /* reserved */ -#endif - uint8_t eip_pad; /* required padding byte */ -} __packed; + uint16_t eip_header; +#define ETHERIP_VERS_MASK 0xf000 +#define ETHERIP_VERS_SHIFT 12 +#define ETHERIP_PAD 0x0fff +} __packed __aligned(4); #define ETHERIP_VERSION 0x03 -/* - * Names for Ether-IP sysctl objects - */ -#define ETHERIPCTL_ALLOW 1 /* accept incoming EtherIP packets */ -#define ETHERIPCTL_STATS 2 /* etherip stats */ -#define ETHERIPCTL_MAXID 3 - -#define ETHERIPCTL_NAMES { \ - { 0, 0 }, \ - { "allow", CTLTYPE_INT }, \ - { "stats", CTLTYPE_STRUCT }, \ -} - - -#endif /* 0 */ - -int ip_etherip_sysctl(int *, uint, void *, size_t *, void *, size_t); -int ip_etherip_output(struct ifnet *, struct mbuf *); -void ip_etherip_input(struct mbuf *, ...); - +#ifdef _KERNEL +struct mbuf *etherip_ip4_input(struct mbuf *, int); #ifdef INET6 -int ip6_etherip_output(struct ifnet *, struct mbuf *); -int ip6_etherip_input(struct mbuf **, int *, int); +struct mbuf *etherip_ip6_input(struct mbuf *, int); #endif /* INET6 */ - +#endif /* _KERNEL */ #endif /* _NET_IF_ETHERIP_H_ */ Index: sys/net/if_gif.c =================================================================== RCS file: /cvs/src/sys/net/if_gif.c,v retrieving revision 1.86 diff -u -p -r1.86 if_gif.c --- sys/net/if_gif.c 13 Sep 2016 07:48:45 -0000 1.86 +++ sys/net/if_gif.c 12 Dec 2016 05:59:20 -0000 @@ -45,9 +45,7 @@ #include #include #include -#include #include -#include #ifdef INET6 #include @@ -63,8 +61,8 @@ #endif #ifdef MPLS -#include -#endif +#include +#endif /* MPLS */ #include "pf.h" #if NPF > 0 @@ -75,238 +73,181 @@ #define GIF_MTU_MIN (1280) /* Minimum MTU */ #define GIF_MTU_MAX (8192) /* Maximum MTU */ -void gifattach(int); -int gif_clone_create(struct if_clone *, int); -int gif_clone_destroy(struct ifnet *); -int gif_checkloop(struct ifnet *, struct mbuf *); -void gif_start(struct ifnet *); -int gif_ioctl(struct ifnet *, u_long, caddr_t); -int gif_output(struct ifnet *, struct mbuf *, struct sockaddr *, - struct rtentry *); - -int in_gif_output(struct ifnet *, int, struct mbuf **); -int in6_gif_output(struct ifnet *, int, struct mbuf **); - -/* - * gif global variable definitions - */ -struct gif_softc_head gif_softc_list; -struct if_clone gif_cloner = +void gifattach(int); + +struct gif_tunnel { + RBT_ENTRY(gif_tunnel) t_entry; + + unsigned int t_rtableid; + int t_af; + uint32_t t_src[4]; + uint32_t t_dst[4]; + uint8_t t_ttl; +}; + +struct gif_softc { + struct gif_tunnel sc_tunnel; /* must be first */ + struct ifnet sc_if; +}; + +static int gif_clone_create(struct if_clone *, int); +static int gif_clone_destroy(struct ifnet *); + +static struct if_clone gif_cloner = IF_CLONE_INITIALIZER("gif", gif_clone_create, gif_clone_destroy); +RBT_HEAD(gif_tree, gif_tunnel); + +static inline int + gif_cmp(const struct gif_tunnel *, const struct gif_tunnel *); + +RBT_PROTOTYPE(gif_tree, gif_tunnel, t_entry, gif_cmp); + +struct gif_tree gif_softcs = RBT_INITIALIZER(); +struct rwlock gif_lk = RWLOCK_INITIALIZER("giflk"); + +static int gif_ioctl(struct ifnet *, u_long, caddr_t); +static int gif_output(struct ifnet *, struct mbuf *, + struct sockaddr *, struct rtentry *); +static int gif_checkloop(struct ifnet *, struct mbuf *); +static void gif_start(struct ifnet *); +static int gif_ip4_encap(struct gif_softc *sc, struct mbuf *); +#ifdef INET6 +static int gif_ip6_encap(struct gif_softc *sc, struct mbuf *); +#endif + +static struct mbuf * + gif_ip_input(struct mbuf *, int, struct gif_tunnel *, uint8_t); + +static int gif_up(struct gif_softc *); +static int gif_down(struct gif_softc *); +static int gif_set_tunnel(struct gif_softc *, struct if_laddrreq *); +static int gif_get_tunnel(struct gif_softc *, struct if_laddrreq *); +static int gif_del_tunnel(struct gif_softc *); + void gifattach(int count) { - LIST_INIT(&gif_softc_list); if_clone_attach(&gif_cloner); } -int +static int gif_clone_create(struct if_clone *ifc, int unit) { struct gif_softc *sc; - int s; + struct ifnet *ifp; sc = malloc(sizeof(*sc), M_DEVBUF, M_NOWAIT|M_ZERO); if (!sc) return (ENOMEM); - snprintf(sc->gif_if.if_xname, sizeof sc->gif_if.if_xname, - "%s%d", ifc->ifc_name, unit); - sc->gif_if.if_mtu = GIF_MTU; - sc->gif_if.if_flags = IFF_POINTOPOINT | IFF_MULTICAST; - sc->gif_if.if_ioctl = gif_ioctl; - sc->gif_if.if_start = gif_start; - sc->gif_if.if_output = gif_output; - sc->gif_if.if_rtrequest = p2p_rtrequest; - sc->gif_if.if_type = IFT_GIF; - IFQ_SET_MAXLEN(&sc->gif_if.if_snd, IFQ_MAXLEN); - sc->gif_if.if_softc = sc; - if_attach(&sc->gif_if); - if_alloc_sadl(&sc->gif_if); + sc->sc_tunnel.t_rtableid = 0; + sc->sc_tunnel.t_af = AF_UNSPEC; + sc->sc_tunnel.t_ttl = ip_defttl; + + ifp = &sc->sc_if; + + snprintf(ifp->if_xname, sizeof(ifp->if_xname), + "%s%d", ifc->ifc_name, unit); + ifp->if_softc = sc; + ifp->if_mtu = GIF_MTU; + ifp->if_flags = IFF_POINTOPOINT | IFF_MULTICAST; + ifp->if_ioctl = gif_ioctl; + ifp->if_output = gif_output; + ifp->if_start = gif_start; + ifp->if_rtrequest = p2p_rtrequest; + ifp->if_type = IFT_TUNNEL; + IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); + + if_attach(ifp); + if_alloc_sadl(ifp); #if NBPFILTER > 0 - bpfattach(&sc->gif_if.if_bpf, &sc->gif_if, DLT_LOOP, sizeof(u_int32_t)); + bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(u_int32_t)); #endif - s = splnet(); - LIST_INSERT_HEAD(&gif_softc_list, sc, gif_list); - splx(s); return (0); } -int +static int gif_clone_destroy(struct ifnet *ifp) { struct gif_softc *sc = ifp->if_softc; - int s; - - s = splnet(); - LIST_REMOVE(sc, gif_list); - splx(s); if_detach(ifp); - if (sc->gif_psrc) - free((caddr_t)sc->gif_psrc, M_IFADDR, 0); - sc->gif_psrc = NULL; - if (sc->gif_pdst) - free((caddr_t)sc->gif_pdst, M_IFADDR, 0); - sc->gif_pdst = NULL; free(sc, M_DEVBUF, sizeof(*sc)); return (0); } -void +static void gif_start(struct ifnet *ifp) { struct gif_softc *sc = (struct gif_softc*)ifp; struct mbuf *m; + int error; - for (;;) { - IFQ_DEQUEUE(&ifp->if_snd, m); - if (m == NULL) - break; - - /* is interface up and usable? */ - if (!(ifp->if_flags & IFF_UP) || - sc->gif_psrc == NULL || sc->gif_pdst == NULL || - sc->gif_psrc->sa_family != sc->gif_pdst->sa_family) { - m_freem(m); - continue; - } - + while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) { #if NBPFILTER > 0 - if (ifp->if_bpf) { - int offset; - sa_family_t family; - u_int8_t proto; - - /* must decapsulate outer header for bpf */ - switch (sc->gif_psrc->sa_family) { - case AF_INET: - offset = sizeof(struct ip); - proto = mtod(m, struct ip *)->ip_p; - break; -#ifdef INET6 - case AF_INET6: - offset = sizeof(struct ip6_hdr); - proto = mtod(m, struct ip6_hdr *)->ip6_nxt; - break; -#endif - default: - proto = 0; - break; - } - switch (proto) { - case IPPROTO_IPV4: - family = AF_INET; - break; - case IPPROTO_IPV6: - family = AF_INET6; - break; - case IPPROTO_ETHERIP: - family = AF_LINK; - offset += sizeof(struct etherip_header); - break; - case IPPROTO_MPLS: - family = AF_MPLS; - break; - default: - offset = 0; - family = sc->gif_psrc->sa_family; - break; - } - m->m_data += offset; - m->m_len -= offset; - m->m_pkthdr.len -= offset; - bpf_mtap_af(ifp->if_bpf, family, m, BPF_DIRECTION_OUT); - m->m_data -= offset; - m->m_len += offset; - m->m_pkthdr.len += offset; + caddr_t if_bpf = ifp->if_bpf; + if (if_bpf) { + int af = m->m_pkthdr.ether_vtag; + bpf_mtap_af(if_bpf, af, m, BPF_DIRECTION_OUT); } #endif + ifp->if_opackets++; /* XXX we should cache the outgoing route */ - switch (sc->gif_psrc->sa_family) { + switch (sc->sc_tunnel.t_af) { case AF_INET: - ip_output(m, NULL, NULL, 0, NULL, NULL, 0); + error = gif_ip4_encap(sc, m); break; #ifdef INET6 case AF_INET6: - /* - * force fragmentation to minimum MTU, to avoid path - * MTU discovery. It is too painful to ask for resend - * of inner packet, to achieve path MTU discovery for - * encapsulated packets. - */ - ip6_output(m, 0, NULL, IPV6_MINMTU, 0, NULL); + error = gif_ip6_encap(sc, m); break; #endif default: - m_freem(m); - break; + unhandled_af(sc->sc_tunnel.t_af); } - } -} -int -gif_encap(struct ifnet *ifp, struct mbuf **mp, sa_family_t af) -{ - struct gif_softc *sc = (struct gif_softc*)ifp; - int error = 0; - /* - * Remove multicast and broadcast flags or encapsulated packet - * ends up as multicast or broadcast packet. - */ - (*mp)->m_flags &= ~(M_BCAST|M_MCAST); - - /* - * Encapsulate packet. Add IP or IP6 header depending on tunnel AF. - */ - switch (sc->gif_psrc->sa_family) { - case AF_INET: - error = in_gif_output(ifp, af, mp); - break; -#ifdef INET6 - case AF_INET6: - error = in6_gif_output(ifp, af, mp); - break; -#endif - default: - m_freem(*mp); - error = EAFNOSUPPORT; - break; + if (error != 0) + ifp->if_oerrors++; } - - if (error) - return (error); - - error = gif_checkloop(ifp, *mp); - return (error); } -int +static int gif_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, struct rtentry *rt) { - struct gif_softc *sc = (struct gif_softc*)ifp; int error = 0; - if (!(ifp->if_flags & IFF_UP) || - sc->gif_psrc == NULL || sc->gif_pdst == NULL || - sc->gif_psrc->sa_family != sc->gif_pdst->sa_family) { + if (!ISSET(ifp->if_flags, IFF_RUNNING)) { m_freem(m); error = ENETDOWN; goto end; } - error = gif_encap(ifp, &m, dst->sa_family); - if (error) + error = gif_checkloop(ifp, m); + if (error != 0) goto end; + switch (dst->sa_family) { + case AF_INET: +#ifdef INET6 + case AF_INET6: +#endif +#ifdef MPLS + case AF_MPLS: +#endif + break; + default: + m_freem(m); + return (EAFNOSUPPORT); + } + error = if_enqueue(ifp, m); end: @@ -315,19 +256,27 @@ end: return (error); } -int +static int gif_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct gif_softc *sc = (struct gif_softc*)ifp; - struct ifreq *ifr = (struct ifreq *)data; - int error = 0, size; - struct sockaddr *dst, *src; - struct sockaddr *sa; - int s; - struct gif_softc *sc2; + struct ifreq *ifr = (struct ifreq *)data; + int error = 0; switch (cmd) { case SIOCSIFADDR: + ifp->if_flags |= IFF_UP; + /* FALLTHROUGH */ + case SIOCSIFFLAGS: + if (ISSET(ifp->if_flags, IFF_UP)) { + if (!ISSET(ifp->if_flags, IFF_RUNNING)) + error = gif_up(sc); + else + error = ENETRESET; + } else { + if (ISSET(ifp->if_flags, IFF_RUNNING)) + error = gif_down(sc); + } break; case SIOCSIFDSTADDR: @@ -337,275 +286,264 @@ gif_ioctl(struct ifnet *ifp, u_long cmd, case SIOCDELMULTI: break; - case SIOCSIFPHYADDR: -#ifdef INET6 - case SIOCSIFPHYADDR_IN6: -#endif /* INET6 */ case SIOCSLIFPHYADDR: - switch (cmd) { - case SIOCSIFPHYADDR: - src = sintosa( - &(((struct in_aliasreq *)data)->ifra_addr)); - dst = sintosa( - &(((struct in_aliasreq *)data)->ifra_dstaddr)); - break; -#ifdef INET6 - case SIOCSIFPHYADDR_IN6: - src = sin6tosa( - &(((struct in6_aliasreq *)data)->ifra_addr)); - dst = sin6tosa( - &(((struct in6_aliasreq *)data)->ifra_dstaddr)); - break; -#endif - case SIOCSLIFPHYADDR: - src = (struct sockaddr *) - &(((struct if_laddrreq *)data)->addr); - dst = (struct sockaddr *) - &(((struct if_laddrreq *)data)->dstaddr); - break; - default: - return (EINVAL); - } + error = gif_set_tunnel(sc, (struct if_laddrreq *)data); + break; + case SIOCGLIFPHYADDR: + error = gif_get_tunnel(sc, (struct if_laddrreq *)data); + break; + case SIOCDIFPHYADDR: + error = gif_del_tunnel(sc); + break; - /* sa_family must be equal */ - if (src->sa_family != dst->sa_family) - return (EINVAL); + case SIOCSIFMTU: + if (ifr->ifr_mtu < GIF_MTU_MIN || ifr->ifr_mtu > GIF_MTU_MAX) + error = EINVAL; + else + ifp->if_mtu = ifr->ifr_mtu; + break; - /* validate sa_len */ - switch (src->sa_family) { - case AF_INET: - if (src->sa_len != sizeof(struct sockaddr_in)) - return (EINVAL); - break; -#ifdef INET6 - case AF_INET6: - if (src->sa_len != sizeof(struct sockaddr_in6)) - return (EINVAL); - break; -#endif - default: - return (EAFNOSUPPORT); - } - switch (dst->sa_family) { - case AF_INET: - if (dst->sa_len != sizeof(struct sockaddr_in)) - return (EINVAL); - break; -#ifdef INET6 - case AF_INET6: - if (dst->sa_len != sizeof(struct sockaddr_in6)) - return (EINVAL); + case SIOCSLIFPHYRTABLE: + if (ifr->ifr_rdomainid < 0 || + ifr->ifr_rdomainid > RT_TABLEID_MAX || + !rtable_exists(ifr->ifr_rdomainid)) { + error = EINVAL; break; -#endif - default: - return (EAFNOSUPPORT); } + sc->sc_tunnel.t_rtableid = ifr->ifr_rdomainid; + break; + case SIOCGLIFPHYRTABLE: + ifr->ifr_rdomainid = sc->sc_tunnel.t_rtableid; + break; - /* check sa_family looks sane for the cmd */ - switch (cmd) { - case SIOCSIFPHYADDR: - if (src->sa_family == AF_INET) - break; - return (EAFNOSUPPORT); -#ifdef INET6 - case SIOCSIFPHYADDR_IN6: - if (src->sa_family == AF_INET6) - break; - return (EAFNOSUPPORT); -#endif /* INET6 */ - case SIOCSLIFPHYADDR: - /* checks done in the above */ + case SIOCSLIFPHYTTL: + if (ifr->ifr_ttl < 0 || ifr->ifr_ttl > 0xff) { + error = EINVAL; break; } - LIST_FOREACH(sc2, &gif_softc_list, gif_list) { - if (sc2 == sc) - continue; - if (!sc2->gif_pdst || !sc2->gif_psrc) - continue; - if (sc2->gif_pdst->sa_family != dst->sa_family || - sc2->gif_pdst->sa_len != dst->sa_len || - sc2->gif_psrc->sa_family != src->sa_family || - sc2->gif_psrc->sa_len != src->sa_len) - continue; - /* can't configure same pair of address onto two gifs */ - if (bcmp(sc2->gif_pdst, dst, dst->sa_len) == 0 && - bcmp(sc2->gif_psrc, src, src->sa_len) == 0) { - error = EADDRNOTAVAIL; - goto bad; - } - - /* can't configure multiple multi-dest interfaces */ -#define multidest(x) \ - (satosin(x)->sin_addr.s_addr == INADDR_ANY) -#ifdef INET6 -#define multidest6(x) \ - (IN6_IS_ADDR_UNSPECIFIED(&satosin6(x)->sin6_addr)) -#endif - if (dst->sa_family == AF_INET && - multidest(dst) && multidest(sc2->gif_pdst)) { - error = EADDRNOTAVAIL; - goto bad; - } -#ifdef INET6 - if (dst->sa_family == AF_INET6 && - multidest6(dst) && multidest6(sc2->gif_pdst)) { - error = EADDRNOTAVAIL; - goto bad; - } -#endif - } - - if (sc->gif_psrc) - free((caddr_t)sc->gif_psrc, M_IFADDR, 0); - sa = malloc(src->sa_len, M_IFADDR, M_WAITOK); - bcopy((caddr_t)src, (caddr_t)sa, src->sa_len); - sc->gif_psrc = sa; - - if (sc->gif_pdst) - free((caddr_t)sc->gif_pdst, M_IFADDR, 0); - sa = malloc(dst->sa_len, M_IFADDR, M_WAITOK); - bcopy((caddr_t)dst, (caddr_t)sa, dst->sa_len); - sc->gif_pdst = sa; - - s = splnet(); - ifp->if_flags |= IFF_RUNNING; - if_up(ifp); /* send up RTM_IFINFO */ - splx(s); - - error = 0; + /* commit */ + sc->sc_tunnel.t_ttl = (uint8_t)ifr->ifr_ttl; + break; + case SIOCGLIFPHYTTL: + ifr->ifr_ttl = (int)sc->sc_tunnel.t_ttl; break; -#ifdef SIOCDIFPHYADDR - case SIOCDIFPHYADDR: - if (sc->gif_psrc) { - free((caddr_t)sc->gif_psrc, M_IFADDR, 0); - sc->gif_psrc = NULL; - } - if (sc->gif_pdst) { - free((caddr_t)sc->gif_pdst, M_IFADDR, 0); - sc->gif_pdst = NULL; - } - /* change the IFF_{UP, RUNNING} flag as well? */ + default: + error = ENOTTY; break; -#endif + } - case SIOCGIFPSRCADDR: -#ifdef INET6 - case SIOCGIFPSRCADDR_IN6: -#endif /* INET6 */ - if (sc->gif_psrc == NULL) { - error = EADDRNOTAVAIL; - goto bad; - } - src = sc->gif_psrc; - switch (cmd) { - case SIOCGIFPSRCADDR: - dst = &ifr->ifr_addr; - size = sizeof(ifr->ifr_addr); - break; + return (error); +} + +static inline int +gif_cmp(const struct gif_tunnel *a, const struct gif_tunnel *b) +{ + if (a->t_src > b->t_src) + return (1); + if (a->t_src < b->t_src) + return (-1); + + if (a->t_dst > b->t_dst) + return (1); + if (a->t_dst < b->t_dst) + return (-1); + + if (a->t_rtableid > b->t_rtableid) + return (1); + if (a->t_rtableid < b->t_rtableid) + return (-1); + + return (0); +} + +RBT_GENERATE(gif_tree, gif_tunnel, t_entry, gif_cmp); + +static int +gif_up(struct gif_softc *sc) +{ + struct gif_tunnel *t; + int error = 0; + + if (sc->sc_tunnel.t_af == AF_UNSPEC) + return (ENXIO); + + error = rw_enter(&gif_lk, RW_WRITE | RW_INTR); + if (error != 0) + return (error); + + t = RBT_INSERT(gif_tree, &gif_softcs, (struct gif_tunnel *)sc); + + rw_exit(&gif_lk); + + if (t == NULL) + SET(sc->sc_if.if_flags, IFF_RUNNING); + else + error = EBUSY; + + return (error); +} + +static int +gif_down(struct gif_softc *sc) +{ + int error; + + error = rw_enter(&gif_lk, RW_WRITE | RW_INTR); + if (error != 0) + return (error); + + RBT_REMOVE(gif_tree, &gif_softcs, &sc->sc_tunnel); + + rw_exit(&gif_lk); + + CLR(sc->sc_if.if_flags, IFF_RUNNING); + + return (error); +} + +static int +gif_set_tunnel(struct gif_softc *sc, struct if_laddrreq *req) +{ + struct sockaddr *src = (struct sockaddr *)&req->addr; + struct sockaddr *dst = (struct sockaddr *)&req->dstaddr; + struct sockaddr_in *sin; #ifdef INET6 - case SIOCGIFPSRCADDR_IN6: - dst = sin6tosa( - &(((struct in6_ifreq *)data)->ifr_addr)); - size = sizeof(((struct in6_ifreq *)data)->ifr_addr); - break; -#endif /* INET6 */ - default: - error = EADDRNOTAVAIL; - goto bad; - } - if (src->sa_len > size) + struct sockaddr_in6 *sin6; + int error; +#endif + + if (ISSET(sc->sc_if.if_flags, IFF_RUNNING)) + return (EBUSY); + + /* sa_family and sa_len must be equal */ + if (src->sa_family != dst->sa_family || src->sa_len != dst->sa_len) + return (EINVAL); + + /* validate */ + switch (dst->sa_family) { + case AF_INET: + if (dst->sa_len != sizeof(*sin)) return (EINVAL); - bcopy((caddr_t)src, (caddr_t)dst, src->sa_len); - break; - case SIOCGIFPDSTADDR: -#ifdef INET6 - case SIOCGIFPDSTADDR_IN6: -#endif /* INET6 */ - if (sc->gif_pdst == NULL) { - error = EADDRNOTAVAIL; - goto bad; - } - src = sc->gif_pdst; - switch (cmd) { - case SIOCGIFPDSTADDR: - dst = &ifr->ifr_addr; - size = sizeof(ifr->ifr_addr); - break; -#ifdef INET6 - case SIOCGIFPDSTADDR_IN6: - dst = sin6tosa(&(((struct in6_ifreq *)data)->ifr_addr)); - size = sizeof(((struct in6_ifreq *)data)->ifr_addr); - break; -#endif /* INET6 */ - default: - error = EADDRNOTAVAIL; - goto bad; - } - if (src->sa_len > size) + sin = (struct sockaddr_in *)src; + if (in_nullhost(sin->sin_addr) || + IN_MULTICAST(sin->sin_addr.s_addr)) return (EINVAL); - bcopy((caddr_t)src, (caddr_t)dst, src->sa_len); - break; - case SIOCGLIFPHYADDR: - if (sc->gif_psrc == NULL || sc->gif_pdst == NULL) { - error = EADDRNOTAVAIL; - goto bad; - } + sc->sc_tunnel.t_src[0] = sin->sin_addr.s_addr; - /* copy src */ - src = sc->gif_psrc; - dst = (struct sockaddr *) - &(((struct if_laddrreq *)data)->addr); - size = sizeof(((struct if_laddrreq *)data)->addr); - if (src->sa_len > size) + sin = (struct sockaddr_in *)dst; + if (in_nullhost(sin->sin_addr) || + IN_MULTICAST(sin->sin_addr.s_addr)) return (EINVAL); - bcopy((caddr_t)src, (caddr_t)dst, src->sa_len); - /* copy dst */ - src = sc->gif_pdst; - dst = (struct sockaddr *) - &(((struct if_laddrreq *)data)->dstaddr); - size = sizeof(((struct if_laddrreq *)data)->dstaddr); - if (src->sa_len > size) - return (EINVAL); - bcopy((caddr_t)src, (caddr_t)dst, src->sa_len); - break; + sc->sc_tunnel.t_dst[0] = sin->sin_addr.s_addr; - case SIOCSIFFLAGS: - /* if_ioctl() takes care of it */ break; +#ifdef INET6 + case AF_INET6: + if (dst->sa_len != sizeof(*sin6)) + return (EINVAL); + + sin6 = (struct sockaddr_in6 *)src; + if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) || + IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) + return (EINVAL); + + error = in6_embedscope((struct in6_addr *)sc->sc_tunnel.t_src, + sin6, NULL); + if (error != 0) + return (error); + + sin6 = (struct sockaddr_in6 *)dst; + if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) || + IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) + return (EINVAL); + + error = in6_embedscope((struct in6_addr *)sc->sc_tunnel.t_dst, + sin6, NULL); + if (error != 0) + return (error); - case SIOCSIFMTU: - if (ifr->ifr_mtu < GIF_MTU_MIN || ifr->ifr_mtu > GIF_MTU_MAX) - error = EINVAL; - else - ifp->if_mtu = ifr->ifr_mtu; break; +#endif + default: + return (EAFNOSUPPORT); + } + + /* commit */ + sc->sc_tunnel.t_af = dst->sa_family; + + return (0); +} + +static int +gif_get_tunnel(struct gif_softc *sc, struct if_laddrreq *req) +{ + struct sockaddr *src = (struct sockaddr *)&req->addr; + struct sockaddr *dst = (struct sockaddr *)&req->dstaddr; + struct sockaddr_in *sin; +#ifdef INET6 + struct sockaddr_in6 *sin6; +#endif + + switch (sc->sc_tunnel.t_af) { + case AF_UNSPEC: + return (EADDRNOTAVAIL); + + case AF_INET: + sin = (struct sockaddr_in *)src; + memset(sin, 0, sizeof(*sin)); + sin->sin_family = AF_INET; + sin->sin_len = sizeof(*sin); + sin->sin_addr.s_addr = sc->sc_tunnel.t_src[0]; + + sin = (struct sockaddr_in *)dst; + memset(sin, 0, sizeof(*sin)); + sin->sin_family = AF_INET; + sin->sin_len = sizeof(*sin); + sin->sin_addr.s_addr = sc->sc_tunnel.t_dst[0]; - case SIOCSLIFPHYRTABLE: - if (ifr->ifr_rdomainid < 0 || - ifr->ifr_rdomainid > RT_TABLEID_MAX || - !rtable_exists(ifr->ifr_rdomainid)) { - error = EINVAL; - break; - } - sc->gif_rtableid = ifr->ifr_rdomainid; break; - case SIOCGLIFPHYRTABLE: - ifr->ifr_rdomainid = sc->gif_rtableid; + +#ifdef INET6 + case AF_INET6: + sin6 = (struct sockaddr_in6 *)src; + memset(sin6, 0, sizeof(*sin6)); + sin6->sin6_family = AF_INET6; + sin6->sin6_len = sizeof(*sin6); + in6_recoverscope(sin6, (struct in6_addr *)sc->sc_tunnel.t_src); + + sin6 = (struct sockaddr_in6 *)dst; + memset(sin6, 0, sizeof(*sin6)); + sin6->sin6_family = AF_INET6; + sin6->sin6_len = sizeof(*sin6); + in6_recoverscope(sin6, (struct in6_addr *)sc->sc_tunnel.t_dst); + break; +#endif default: - error = ENOTTY; - break; + return (EAFNOSUPPORT); } - bad: - return (error); + + return (0); } -int +static int +gif_del_tunnel(struct gif_softc *sc) +{ + if (ISSET(sc->sc_if.if_flags, IFF_RUNNING)) + return (EBUSY); + + /* commit */ + sc->sc_tunnel.t_af = AF_UNSPEC; + + return (0); +} + +static int gif_checkloop(struct ifnet *ifp, struct mbuf *m) { struct m_tag *mtag; @@ -631,260 +569,197 @@ gif_checkloop(struct ifnet *ifp, struct } *(struct ifnet **)(mtag + 1) = ifp; m_tag_prepend(m, mtag); - return 0; + + return (0); } -int -in_gif_output(struct ifnet *ifp, int family, struct mbuf **m0) +static inline uint8_t +gif_af2proto(int af) { - struct gif_softc *sc = (struct gif_softc*)ifp; - struct sockaddr_in *sin_src = satosin(sc->gif_psrc); - struct sockaddr_in *sin_dst = satosin(sc->gif_pdst); - struct tdb tdb; - struct xformsw xfs; - int error; - struct mbuf *m = *m0; - - if (sin_src == NULL || sin_dst == NULL || - sin_src->sin_family != AF_INET || - sin_dst->sin_family != AF_INET) { - m_freem(m); - return EAFNOSUPPORT; - } - -#ifdef DIAGNOSTIC - if (ifp->if_rdomain != rtable_l2(m->m_pkthdr.ph_rtableid)) { - printf("%s: trying to send packet on wrong domain. " - "if %d vs. mbuf %d, AF %d\n", ifp->if_xname, - ifp->if_rdomain, rtable_l2(m->m_pkthdr.ph_rtableid), - family); - } -#endif - - /* setup dummy tdb. it highly depends on ipip_output() code. */ - bzero(&tdb, sizeof(tdb)); - bzero(&xfs, sizeof(xfs)); - tdb.tdb_src.sin.sin_family = AF_INET; - tdb.tdb_src.sin.sin_len = sizeof(struct sockaddr_in); - tdb.tdb_src.sin.sin_addr = sin_src->sin_addr; - tdb.tdb_dst.sin.sin_family = AF_INET; - tdb.tdb_dst.sin.sin_len = sizeof(struct sockaddr_in); - tdb.tdb_dst.sin.sin_addr = sin_dst->sin_addr; - tdb.tdb_xform = &xfs; - xfs.xf_type = -1; /* not XF_IP4 */ - - switch (family) { + switch (af) { case AF_INET: - break; + return (IPPROTO_IPV4); #ifdef INET6 case AF_INET6: - break; + return (IPPROTO_IPV6); #endif -#if MPLS +#ifdef MPLS case AF_MPLS: - break; + return (IPPROTO_MPLS); #endif default: -#ifdef DEBUG - printf("%s: warning: unknown family %d passed\n", __func__, - family); -#endif - m_freem(m); - return EAFNOSUPPORT; + unhandled_af(af); } +} - /* encapsulate into IPv4 packet */ - *m0 = NULL; -#ifdef MPLS - if (family == AF_MPLS) - error = etherip_output(m, &tdb, m0, IPPROTO_MPLS); - else -#endif - error = ipip_output(m, &tdb, m0, 0, 0); - if (error) - return error; - else if (*m0 == NULL) - return EFAULT; +static int +gif_ip4_encap(struct gif_softc *sc, struct mbuf *m) +{ + struct ip *ip; + + m = m_prepend(m, sizeof(*ip), M_DONTWAIT); + if (m == NULL) + return (ENOMEM); - m = *m0; + ip = mtod(m, struct ip *); + ip->ip_v = IPVERSION; + ip->ip_hl = sizeof(*ip) >> 2; + ip->ip_id = htons(ip_randomid()); + ip->ip_tos = IPTOS_LOWDELAY; + ip->ip_p = gif_af2proto(m->m_pkthdr.ether_vtag); + ip->ip_len = htons(m->m_pkthdr.len); + ip->ip_ttl = sc->sc_tunnel.t_ttl; + ip->ip_src.s_addr = sc->sc_tunnel.t_src[0]; + ip->ip_dst.s_addr = sc->sc_tunnel.t_dst[0]; - m->m_pkthdr.ph_rtableid = sc->gif_rtableid; + m->m_flags &= ~(M_MCAST|M_BCAST); + m->m_pkthdr.ph_rtableid = sc->sc_tunnel.t_rtableid; #if NPF > 0 pf_pkt_addr_changed(m); #endif - return 0; + + return (ip_output(m, NULL, NULL, IP_RAWOUTPUT, NULL, NULL, 0)); } -void -in_gif_input(struct mbuf *m, ...) +struct mbuf * +gif_ip4_input(struct mbuf *m, int hlen) { - int off; - struct gif_softc *sc; - struct ifnet *gifp = NULL; + struct gif_tunnel key; struct ip *ip; - va_list ap; - - va_start(ap, m); - off = va_arg(ap, int); - va_end(ap); - - /* IP-in-IP header is caused by tunnel mode, so skip gif lookup */ - if (m->m_flags & M_TUNNEL) { - m->m_flags &= ~M_TUNNEL; - goto inject; - } ip = mtod(m, struct ip *); - /* this code will be soon improved. */ - LIST_FOREACH(sc, &gif_softc_list, gif_list) { - if (sc->gif_psrc == NULL || sc->gif_pdst == NULL || - sc->gif_psrc->sa_family != AF_INET || - sc->gif_pdst->sa_family != AF_INET || - rtable_l2(sc->gif_rtableid) != - rtable_l2(m->m_pkthdr.ph_rtableid)) { - continue; - } - - if ((sc->gif_if.if_flags & IFF_UP) == 0) - continue; + key.t_af = AF_INET; + key.t_src[0] = ip->ip_dst.s_addr; + key.t_dst[0] = ip->ip_src.s_addr; - if (in_hosteq(satosin(sc->gif_psrc)->sin_addr, ip->ip_dst) && - in_hosteq(satosin(sc->gif_pdst)->sin_addr, ip->ip_src)) { - gifp = &sc->gif_if; - break; - } - } - - if (gifp) { - m->m_pkthdr.ph_ifidx = gifp->if_index; - m->m_pkthdr.ph_rtableid = gifp->if_rdomain; - gifp->if_ipackets++; - gifp->if_ibytes += m->m_pkthdr.len; - /* We have a configured GIF */ - ipip_input(m, off, gifp, ip->ip_p); - return; - } + return (gif_ip_input(m, hlen, &key, ip->ip_p)); +} -inject: - ip4_input(m, off); /* No GIF interface was configured */ - return; +static void +gif_input_ip(struct mbuf *m) +{ + niq_enqueue(&ipintrq, m); } #ifdef INET6 -int -in6_gif_output(struct ifnet *ifp, int family, struct mbuf **m0) +static void +gif_input_ip6(struct mbuf *m) { - struct gif_softc *sc = (struct gif_softc*)ifp; - struct sockaddr_in6 *sin6_src = satosin6(sc->gif_psrc); - struct sockaddr_in6 *sin6_dst = satosin6(sc->gif_pdst); - struct tdb tdb; - struct xformsw xfs; - int error; - struct mbuf *m = *m0; - - if (sin6_src == NULL || sin6_dst == NULL || - sin6_src->sin6_family != AF_INET6 || - sin6_dst->sin6_family != AF_INET6) { - m_freem(m); - return EAFNOSUPPORT; - } + niq_enqueue(&ip6intrq, m); +} +#endif - /* setup dummy tdb. it highly depends on ipip_output() code. */ - bzero(&tdb, sizeof(tdb)); - bzero(&xfs, sizeof(xfs)); - tdb.tdb_src.sin6.sin6_family = AF_INET6; - tdb.tdb_src.sin6.sin6_len = sizeof(struct sockaddr_in6); - tdb.tdb_src.sin6.sin6_addr = sin6_src->sin6_addr; - tdb.tdb_dst.sin6.sin6_family = AF_INET6; - tdb.tdb_dst.sin6.sin6_len = sizeof(struct sockaddr_in6); - tdb.tdb_dst.sin6.sin6_addr = sin6_dst->sin6_addr; - tdb.tdb_xform = &xfs; - xfs.xf_type = -1; /* not XF_IP4 */ +#ifdef MPLS +static void +gif_input_mpls(struct mbuf *m) +{ + mpls_input(m); +} +#endif - switch (family) { - case AF_INET: +static struct mbuf * +gif_ip_input(struct mbuf *m, int hlen, struct gif_tunnel *key, uint8_t proto) +{ + struct gif_softc *sc; + struct ifnet *ifp; + void (*input)(struct mbuf *); + int af __unused; /* bpf */ + + switch (proto) { + case IPPROTO_IPV4: + af = AF_INET; + input = gif_input_ip; break; #ifdef INET6 - case AF_INET6: + case IPPROTO_IPV6: + af = AF_INET6; + input = gif_input_ip6; break; #endif #ifdef MPLS - case AF_MPLS: + case IPPROTO_MPLS: + af = AF_MPLS; + input = gif_input_mpls; break; #endif default: -#ifdef DEBUG - printf("%s: warning: unknown family %d passed\n", __func__, - family); -#endif - m_freem(m); - return EAFNOSUPPORT; + return (m); } - /* encapsulate into IPv6 packet */ - *m0 = NULL; -#if MPLS - if (family == AF_MPLS) - error = etherip_output(m, &tdb, m0, IPPROTO_MPLS); - else -#endif - error = ipip_output(m, &tdb, m0, 0, 0); - if (error) - return error; - else if (*m0 == NULL) - return EFAULT; + key->t_rtableid = m->m_pkthdr.ph_rtableid; - m = *m0; + rw_enter_read(&gif_lk); + sc = (struct gif_softc *)RBT_FIND(gif_tree, &gif_softcs, key); + rw_exit_read(&gif_lk); + + if (sc == NULL) + return (m); + + m_adj(m, hlen); + + ifp = &sc->sc_if; + + m->m_flags &= ~(M_BCAST|M_MCAST); + m->m_pkthdr.ph_ifidx = ifp->if_index; + m->m_pkthdr.ph_rtableid = ifp->if_rdomain; #if NPF > 0 pf_pkt_addr_changed(m); #endif - return 0; + + ifp->if_ipackets++; + ifp->if_ibytes += m->m_pkthdr.len; + +#if NBPFILTER > 0 + if (ifp->if_bpf) + bpf_mtap_af(ifp->if_bpf, af, m, BPF_DIRECTION_IN); +#endif + + (*input)(m); + return (NULL); } -int in6_gif_input(struct mbuf **mp, int *offp, int proto) +#ifdef INET6 +int +gif_ip6_encap(struct gif_softc *sc, struct mbuf *m) { - struct mbuf *m = *mp; - struct gif_softc *sc; - struct ifnet *gifp = NULL; struct ip6_hdr *ip6; - /* XXX What if we run transport-mode IPsec to protect gif tunnel ? */ - if (m->m_flags & (M_AUTH | M_CONF)) - goto inject; + m = m_prepend(m, sizeof(*ip6), M_DONTWAIT); + if (m == NULL) + return (ENOMEM); ip6 = mtod(m, struct ip6_hdr *); + ip6->ip6_flow = 0; + ip6->ip6_vfc &= ~IPV6_VERSION_MASK; + ip6->ip6_vfc |= IPV6_VERSION; + ip6->ip6_nxt = gif_af2proto(m->m_pkthdr.ether_vtag); + ip6->ip6_hlim = sc->sc_tunnel.t_ttl; + ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(struct ip6_hdr)); + memcpy(&ip6->ip6_src, sc->sc_tunnel.t_src, sizeof(ip6->ip6_src)); + memcpy(&ip6->ip6_dst, sc->sc_tunnel.t_dst, sizeof(ip6->ip6_dst)); -#define satoin6(sa) (satosin6(sa)->sin6_addr) - LIST_FOREACH(sc, &gif_softc_list, gif_list) { - if (sc->gif_psrc == NULL || sc->gif_pdst == NULL || - sc->gif_psrc->sa_family != AF_INET6 || - sc->gif_pdst->sa_family != AF_INET6) { - continue; - } + m->m_flags &= ~(M_BCAST|M_MCAST); + m->m_pkthdr.ph_rtableid = sc->sc_tunnel.t_rtableid; + +#if NPF > 0 + pf_pkt_addr_changed(m); +#endif - if ((sc->gif_if.if_flags & IFF_UP) == 0) - continue; + return (ip6_output(m, 0, NULL, IPV6_MINMTU, 0, NULL)); +} - if (IN6_ARE_ADDR_EQUAL(&satoin6(sc->gif_psrc), &ip6->ip6_dst) && - IN6_ARE_ADDR_EQUAL(&satoin6(sc->gif_pdst), &ip6->ip6_src)) { - gifp = &sc->gif_if; - break; - } - } +struct mbuf * +gif_ip6_input(struct mbuf *m, int hlen, int proto) +{ + struct gif_tunnel key; + struct ip6_hdr *ip6; - if (gifp) { - m->m_pkthdr.ph_ifidx = gifp->if_index; - gifp->if_ipackets++; - gifp->if_ibytes += m->m_pkthdr.len; - ipip_input(m, *offp, gifp, proto); - return IPPROTO_DONE; - } + key.t_af = AF_INET6; + memcpy(key.t_src, &ip6->ip6_dst, sizeof(key.t_src)); + memcpy(key.t_dst, &ip6->ip6_src, sizeof(key.t_dst)); -inject: - /* No GIF tunnel configured */ - ip4_input6(&m, offp, proto); - return IPPROTO_DONE; + return (gif_ip_input(m, hlen, &key, proto)); } #endif /* INET6 */ Index: sys/net/if_gif.h =================================================================== RCS file: /cvs/src/sys/net/if_gif.h,v retrieving revision 1.14 diff -u -p -r1.14 if_gif.h --- sys/net/if_gif.h 28 Sep 2015 08:32:05 -0000 1.14 +++ sys/net/if_gif.h 12 Dec 2016 05:59:20 -0000 @@ -37,19 +37,11 @@ #ifndef _NET_IF_GIF_H_ #define _NET_IF_GIF_H_ -struct gif_softc { - struct ifnet gif_if; /* common area */ - struct sockaddr *gif_psrc; /* Physical src addr */ - struct sockaddr *gif_pdst; /* Physical dst addr */ - u_int gif_rtableid; - LIST_ENTRY(gif_softc) gif_list; /* list of all gifs */ -}; - -extern LIST_HEAD(gif_softc_head, gif_softc) gif_softc_list; - -int gif_encap(struct ifnet *, struct mbuf **, sa_family_t); - -void in_gif_input(struct mbuf *, ...); -int in6_gif_input(struct mbuf **, int *, int); +#ifdef _KERNEL +struct mbuf *gif_ip4_input(struct mbuf *, int); +#ifdef INET6 +struct mbuf *gif_ip6_input(struct mbuf *, int, int proto); +#endif +#endif #endif /* _NET_IF_GIF_H_ */ Index: sys/net/if_gre.c =================================================================== RCS file: /cvs/src/sys/net/if_gre.c,v retrieving revision 1.81 diff -u -p -r1.81 if_gre.c --- sys/net/if_gre.c 16 Nov 2016 14:50:13 -0000 1.81 +++ sys/net/if_gre.c 12 Dec 2016 05:59:20 -0000 @@ -38,9 +38,6 @@ * Also supported: IP in IP encapsulation (proto 55) per RFC 2004. */ -#include "gre.h" -#if NGRE > 0 - #include "bpfilter.h" #include "pf.h" @@ -50,10 +47,11 @@ #include #include #include -#include +#include #include #include +#include #include #include @@ -61,6 +59,19 @@ #include #include +#ifdef INET6 +#include +#include +#endif + +#ifdef PIPEX +#include +#endif + +#ifdef MPLS +#include +#endif /* MPLS */ + #if NBPFILTER > 0 #include #endif @@ -71,10 +82,59 @@ #include +/* constant to network short */ +#if _BYTE_ORDER == _LITTLE_ENDIAN +#define ctons(_s) ((((_s) & 0x00ff) << 8) | (((_s) & 0xff00) >> 8)) +#else +#define ctons(_a) (_a) +#endif + #ifndef GRE_RECURSION_LIMIT #define GRE_RECURSION_LIMIT 3 /* How many levels of recursion allowed */ #endif /* GRE_RECURSION_LIMIT */ +struct gre_tunnel { + RBT_ENTRY(gre_entry) t_entry; + + uint32_t t_key_mask; +#define GRE_KEY_NONE htonl(0x00000000U) +#define GRE_KEY_NVGRE htonl(0xffffff00U) +#define GRE_KEY_MASK htonl(0xffffffffU) + uint32_t t_key; + + u_int t_rtableid; + int t_af; + uint32_t t_src[4]; + uint32_t t_dst[4]; + + uint8_t t_ttl; +}; + +/* + * layer 3 GRE tunnels + */ + +struct gre_softc { + struct gre_tunnel sc_tunnel; /* must be first */ + struct ifnet sc_if; +}; + +static int gre_clone_create(struct if_clone *, int); +static int gre_clone_destroy(struct ifnet *); + +struct if_clone gre_cloner = + IF_CLONE_INITIALIZER("gre", gre_clone_create, gre_clone_destroy); + +RBT_HEAD(gre_tree, gre_tunnel); + +static inline int + gre_cmp(const struct gre_tunnel *, const struct gre_tunnel *); + +RBT_PROTOTYPE(gre_tree, gre_tunnel, t_entry, gre_cmp); + +struct gre_tree gre_softcs = RBT_INITIALIZER(); +struct rwlock gre_lk = RWLOCK_INITIALIZER("grelk"); + /* * It is not easy to calculate the right value for a GRE MTU. * We leave this task to the admin and use the same default that @@ -82,91 +142,139 @@ */ #define GREMTU 1476 -int gre_clone_create(struct if_clone *, int); -int gre_clone_destroy(struct ifnet *); -struct gre_softc_head gre_softc_list; -struct gre_softc_head mobileip_softc_list; +static void gre_input_ip(struct mbuf *); +#ifdef INET6 +static void gre_input_ip6(struct mbuf *); +#endif +#ifdef MPLS +static void gre_input_mpls(struct mbuf *); +#endif + +static int gre_ioctl(struct ifnet *, u_long, caddr_t); +static int gre_up(struct gre_softc *); +static int gre_down(struct gre_softc *); + +static int gre_output(struct ifnet *, struct mbuf *, + struct sockaddr *, struct rtentry *); +static void gre_start(struct ifnet *); +static struct mbuf * + gre_encap(struct gre_softc *, struct mbuf *, uint8_t *); -struct if_clone gre_cloner = - IF_CLONE_INITIALIZER("gre", gre_clone_create, gre_clone_destroy); -struct if_clone mobileip_cloner = - IF_CLONE_INITIALIZER("mobileip", gre_clone_create, gre_clone_destroy); +/* + * layer 2 GRE tunnels + */ + +struct gre_l2_softc { + struct gre_tunnel sc_tunnel; /* must be first */ + struct arpcom sc_ac; + struct ifmedia sc_media; +}; + +static int gre_l2_clone_create(struct if_clone *, int); +static int gre_l2_clone_destroy(struct ifnet *); + +struct if_clone egre_cloner = + IF_CLONE_INITIALIZER("egre", gre_l2_clone_create, gre_l2_clone_destroy); +struct if_clone nvgre_cloner = + IF_CLONE_INITIALIZER("nvgre", gre_l2_clone_create, gre_l2_clone_destroy); + +RBT_HEAD(gre_l2_tree, gre_tunnel); + +static inline int + gre_l2_cmp(const struct gre_tunnel *, + const struct gre_tunnel *); + +RBT_PROTOTYPE(gre_l2_tree, gre_tunnel, t_entry, gre_l2_cmp); + +struct gre_l2_tree gre_l2_softcs = RBT_INITIALIZER(); +struct rwlock gre_l2_lk = RWLOCK_INITIALIZER("grel2lk"); + +static struct mbuf * + gre_l2_input(struct mbuf *, const struct gre_tunnel *, int); + +static int gre_l2_ioctl(struct ifnet *, u_long, caddr_t); +static int gre_l2_up(struct gre_l2_softc *); +static int gre_l2_down(struct gre_l2_softc *); +static int gre_l2_media_change(struct ifnet *); +static void gre_l2_media_status(struct ifnet *, struct ifmediareq *); + +static int egre_ioctl(struct ifnet *, u_long, caddr_t); +static void egre_start(struct ifnet *); +static struct mbuf * + egre_encap(struct gre_l2_softc *, struct mbuf *); + +static int nvgre_ioctl(struct ifnet *, u_long, caddr_t); +static int nvgre_set_vnetid(struct gre_tunnel *, struct ifreq *); +static int nvgre_get_vnetid(struct gre_tunnel *, struct ifreq *); +static void nvgre_start(struct ifnet *); +static struct mbuf * + nvgre_encap(struct gre_l2_softc *, struct mbuf *); + +/* + * common code + */ +static int gre_tunnel_ioctl(struct ifnet *, struct gre_tunnel *, + u_long, caddr_t); + +static int gre_set_tunnel(struct gre_tunnel *, struct if_laddrreq *); +static int gre_get_tunnel(struct gre_tunnel *, struct if_laddrreq *); +static int gre_del_tunnel(struct gre_tunnel *); + +static int gre_set_vnetid(struct gre_tunnel *, struct ifreq *); +static int gre_get_vnetid(struct gre_tunnel *, struct ifreq *); +static int gre_del_vnetid(struct gre_tunnel *); + +static struct mbuf * + gre_input_key(struct mbuf *, int, struct gre_tunnel *); +static int gre_ip_output(const struct gre_tunnel *, struct mbuf *, + uint8_t); /* - * We can control the acceptance of GRE and MobileIP packets by - * altering the sysctl net.inet.gre.allow and net.inet.mobileip.allow values - * respectively. Zero means drop them, all else is acceptance. We can also - * control acceptance of WCCPv1-style GRE packets through the - * net.inet.gre.wccp value, but be aware it depends upon normal GRE being - * allowed as well. - * + * let's begin */ -int gre_allow = 0; -int gre_wccp = 0; -int ip_mobile_allow = 0; - -void gre_keepalive(void *); -void gre_send_keepalive(void *); -void gre_link_state(struct gre_softc *); void greattach(int n) { - LIST_INIT(&gre_softc_list); - LIST_INIT(&mobileip_softc_list); if_clone_attach(&gre_cloner); - if_clone_attach(&mobileip_cloner); + if_clone_attach(&egre_cloner); + if_clone_attach(&nvgre_cloner); } int gre_clone_create(struct if_clone *ifc, int unit) { struct gre_softc *sc; - int s; sc = malloc(sizeof(*sc), M_DEVBUF, M_NOWAIT|M_ZERO); if (!sc) return (ENOMEM); + + sc->sc_tunnel.t_key_mask = GRE_KEY_NONE; + sc->sc_tunnel.t_af = AF_UNSPEC; + sc->sc_tunnel.t_rtableid = 0; + sc->sc_tunnel.t_rtableid = 0; + sc->sc_tunnel.t_ttl = ip_defttl; + snprintf(sc->sc_if.if_xname, sizeof sc->sc_if.if_xname, "%s%d", ifc->ifc_name, unit); sc->sc_if.if_softc = sc; sc->sc_if.if_type = IFT_TUNNEL; sc->sc_if.if_addrlen = 0; - sc->sc_if.if_hdrlen = 24; /* IP + GRE */ sc->sc_if.if_mtu = GREMTU; sc->sc_if.if_flags = IFF_POINTOPOINT|IFF_MULTICAST; sc->sc_if.if_output = gre_output; + sc->sc_if.if_start = gre_start; sc->sc_if.if_ioctl = gre_ioctl; sc->sc_if.if_rtrequest = p2p_rtrequest; - sc->sc_if.if_collisions = 0; - sc->sc_if.if_ierrors = 0; - sc->sc_if.if_oerrors = 0; - sc->sc_if.if_ipackets = 0; - sc->sc_if.if_opackets = 0; - sc->g_dst.s_addr = sc->g_src.s_addr = INADDR_ANY; - sc->sc_ka_state = GRE_STATE_UKNWN; - - if (strcmp("gre", ifc->ifc_name) == 0) { - /* GRE encapsulation */ - sc->g_proto = IPPROTO_GRE; - } else { - /* Mobile IP encapsulation */ - sc->g_proto = IPPROTO_MOBILE; - } - - timeout_set(&sc->sc_ka_hold, gre_keepalive, sc); - timeout_set_proc(&sc->sc_ka_snd, gre_send_keepalive, sc); if_attach(&sc->sc_if); if_alloc_sadl(&sc->sc_if); #if NBPFILTER > 0 - bpfattach(&sc->sc_if.if_bpf, &sc->sc_if, DLT_LOOP, sizeof(u_int32_t)); + bpfattach(&sc->sc_if.if_bpf, &sc->sc_if, DLT_LOOP, sizeof(uint32_t)); #endif - s = splnet(); - LIST_INSERT_HEAD(&gre_softc_list, sc, sc_list); - splx(s); return (0); } @@ -175,13 +283,6 @@ int gre_clone_destroy(struct ifnet *ifp) { struct gre_softc *sc = ifp->if_softc; - int s; - - s = splnet(); - timeout_del(&sc->sc_ka_snd); - timeout_del(&sc->sc_ka_hold); - LIST_REMOVE(sc, sc_list); - splx(s); if_detach(ifp); @@ -189,271 +290,473 @@ gre_clone_destroy(struct ifnet *ifp) return (0); } -/* - * The output routine. Takes a packet and encapsulates it in the protocol - * given by sc->g_proto. See also RFC 1701 and RFC 2004. - */ +int +gre_l2_clone_create(struct if_clone *ifc, int unit) +{ + struct gre_l2_softc *sc; + struct ifnet *ifp; + + sc = malloc(sizeof(*sc), M_DEVBUF, M_NOWAIT|M_ZERO); + if (!sc) + return (ENOMEM); + + sc->sc_tunnel.t_af = AF_UNSPEC; + sc->sc_tunnel.t_rtableid = 0; + sc->sc_tunnel.t_ttl = ip_defttl; + + ifp = &sc->sc_ac.ac_if; + + snprintf(ifp->if_xname, sizeof(ifp->if_xname), + "%s%d", ifc->ifc_name, unit); + ifp->if_softc = sc; + IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); + ether_fakeaddr(ifp); + + if (strcmp(ifc->ifc_name, "egre") == 0) { + sc->sc_tunnel.t_key_mask = GRE_KEY_NONE; + + ifp->if_ioctl = egre_ioctl; + ifp->if_start = egre_start; + } else if (strcmp(ifc->ifc_name, "nvgre") == 0) { + sc->sc_tunnel.t_key_mask = GRE_KEY_NVGRE; + sc->sc_tunnel.t_key = htonl(0x1000U << NVGRE_VSID_SHIFT); + + ifp->if_ioctl = nvgre_ioctl; + ifp->if_start = nvgre_start; + } else + panic("%s: unexpected clone %s", __func__, ifc->ifc_name); + + ifmedia_init(&sc->sc_media, 0, gre_l2_media_change, + gre_l2_media_status); + ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL); + ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO); + + if_attach(ifp); + ether_ifattach(ifp); + + return (0); +} int +gre_l2_clone_destroy(struct ifnet *ifp) +{ + struct gre_l2_softc *sc = ifp->if_softc; + + ifmedia_delete_instance(&sc->sc_media, IFM_INST_ANY); + ether_ifdetach(ifp); + if_detach(ifp); + + free(sc, M_DEVBUF, sizeof(*sc)); + + return (0); +} + +static inline int +gre_ip_cmp(int af, const uint32_t *a, const uint32_t *b) +{ + switch (af) { +#ifdef INET6 + case AF_INET6: + if (a[3] > b[3]) + return (1); + if (a[3] < b[3]) + return (-1); + + if (a[2] > b[2]) + return (1); + if (a[2] < b[2]) + return (-1); + + if (a[1] > b[1]) + return (1); + if (a[1] < b[1]) + return (-1); + + /* FALLTHROUGH */ +#endif /* INET6 */ + case AF_INET: + if (a[0] > b[0]) + return (1); + if (a[0] < b[0]) + return (-1); + break; + default: + panic("%s: unsupported af %d\n", __func__, af); + } + + return (0); +} + +static inline int +gre_common_cmp(const struct gre_tunnel *a, const struct gre_tunnel *b) +{ + int rv; + + /* sort by routing table */ + if (a->t_rtableid > b->t_rtableid) + return (1); + if (a->t_rtableid < b->t_rtableid) + return (-1); + + /* sort by address */ + if (a->t_af > b->t_af) + return (1); + if (a->t_af < b->t_af) + return (-1); + + rv = gre_ip_cmp(a->t_af, a->t_dst, b->t_dst); + if (rv != 0) + return (rv); + + rv = gre_ip_cmp(a->t_af, a->t_src, b->t_src); + if (rv != 0) + return (rv); + + return (0); +} + +static inline int +gre_cmp(const struct gre_tunnel *a, const struct gre_tunnel *b) +{ + /* sort by whether K is set */ + if (a->t_key_mask > b->t_key_mask) + return (1); + if (a->t_key_mask < b->t_key_mask) + return (-1); + + /* is K set on both? */ + if (a->t_key_mask != GRE_KEY_NONE) { + if (a->t_key > b->t_key) + return (1); + if (a->t_key < b->t_key) + return (-1); + } + + return (gre_common_cmp(a, b)); +} + +static inline int +gre_l2_cmp(const struct gre_tunnel *a, const struct gre_tunnel *b) +{ + uint32_t ka, kb; + uint32_t mask; + + /* is K set at all? */ + ka = a->t_key_mask & GRE_KEY_NVGRE; + kb = b->t_key_mask & GRE_KEY_NVGRE; + + /* sort by whether K is set */ + if (ka > kb) + return (1); + if (ka < kb) + return (-1); + + /* is K set on both? */ + if (ka != GRE_KEY_NONE) { + /* get common prefix */ + mask = a->t_key_mask & b->t_key_mask; + + ka = a->t_key & mask; + kb = b->t_key & mask; + + /* sort by common prefix */ + if (ka > kb) + return (1); + if (ka < kb) + return (-1); + } + + return (gre_common_cmp(a, b)); +} + +static int gre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, - struct rtentry *rt) + struct rtentry *rt) { - int error = 0; - struct gre_softc *sc = (struct gre_softc *) (ifp->if_softc); - struct greip *gh = NULL; - struct ip *inp = NULL; - u_int8_t ip_tos = 0; - u_int16_t etype = 0; - struct mobile_h mob_h; struct m_tag *mtag; + int error = 0; - if ((ifp->if_flags & IFF_UP) == 0 || - sc->g_src.s_addr == INADDR_ANY || sc->g_dst.s_addr == INADDR_ANY) { + if (!ISSET(ifp->if_flags, IFF_RUNNING)) { m_freem(m); error = ENETDOWN; goto end; } -#ifdef DIAGNOSTIC - if (ifp->if_rdomain != rtable_l2(m->m_pkthdr.ph_rtableid)) { - printf("%s: trying to send packet on wrong domain. " - "if %d vs. mbuf %d, AF %d\n", ifp->if_xname, - ifp->if_rdomain, rtable_l2(m->m_pkthdr.ph_rtableid), - dst->sa_family); - } -#endif - /* Try to limit infinite recursion through misconfiguration. */ for (mtag = m_tag_find(m, PACKET_TAG_GRE, NULL); mtag; mtag = m_tag_find(m, PACKET_TAG_GRE, mtag)) { - if (!bcmp((caddr_t)(mtag + 1), &ifp, sizeof(struct ifnet *))) { + if (memcmp(mtag + 1, &ifp->if_index, + sizeof(ifp->if_index)) == 0) { m_freem(m); error = EIO; goto end; } } - mtag = m_tag_get(PACKET_TAG_GRE, sizeof(struct ifnet *), M_NOWAIT); + mtag = m_tag_get(PACKET_TAG_GRE, sizeof(ifp->if_index), M_NOWAIT); if (mtag == NULL) { m_freem(m); error = ENOBUFS; goto end; } - bcopy(&ifp, (caddr_t)(mtag + 1), sizeof(struct ifnet *)); + memcpy(mtag + 1, &ifp->if_index, sizeof(ifp->if_index)); m_tag_prepend(m, mtag); - m->m_flags &= ~(M_BCAST|M_MCAST); + switch (dst->sa_family) { + case AF_INET: +#ifdef INET6 + case AF_INET6: +#endif +#ifdef MPLS + case AF_MPLS: +#endif + break; + default: + m_freem(m); + error = EAFNOSUPPORT; + goto end; + } + + m->m_pkthdr.ether_vtag = dst->sa_family; + + error = if_enqueue(ifp, m); + end: + if (error) + ifp->if_oerrors++; + return (error); +} + +static void +gre_start(struct ifnet *ifp) +{ + struct gre_softc *sc = ifp->if_softc; + struct mbuf *m; + uint8_t tos; + while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) { #if NBPFILTER > 0 - if (ifp->if_bpf) - bpf_mtap_af(ifp->if_bpf, dst->sa_family, m, BPF_DIRECTION_OUT); + caddr_t if_bpf = ifp->if_bpf; + if (if_bpf) { + int af = m->m_pkthdr.ether_vtag; + bpf_mtap_af(if_bpf, af, m, BPF_DIRECTION_OUT); + } #endif - if (sc->g_proto == IPPROTO_MOBILE) { - if (ip_mobile_allow == 0) { - m_freem(m); - error = EACCES; - goto end; - } + ifp->if_opackets++; - if (dst->sa_family == AF_INET) { - struct mbuf *m0; - int msiz; - - /* - * Make sure the complete IP header (with options) - * is in the first mbuf. - */ - if (m->m_len < sizeof(struct ip)) { - m = m_pullup(m, sizeof(struct ip)); - if (m == NULL) { - error = ENOBUFS; - goto end; - } else - inp = mtod(m, struct ip *); - - if (m->m_len < inp->ip_hl << 2) { - m = m_pullup(m, inp->ip_hl << 2); - if (m == NULL) { - error = ENOBUFS; - goto end; - } - } - } - - inp = mtod(m, struct ip *); - - bzero(&mob_h, MOB_H_SIZ_L); - mob_h.proto = (inp->ip_p) << 8; - mob_h.odst = inp->ip_dst.s_addr; - inp->ip_dst.s_addr = sc->g_dst.s_addr; - - /* - * If the packet comes from our host, we only change - * the destination address in the IP header. - * Otherwise we need to save and change the source. - */ - if (inp->ip_src.s_addr == sc->g_src.s_addr) { - msiz = MOB_H_SIZ_S; - } else { - mob_h.proto |= MOB_H_SBIT; - mob_h.osrc = inp->ip_src.s_addr; - inp->ip_src.s_addr = sc->g_src.s_addr; - msiz = MOB_H_SIZ_L; - } - - mob_h.proto = htons(mob_h.proto); - mob_h.hcrc = gre_in_cksum((u_int16_t *) &mob_h, msiz); - - /* Squeeze in the mobility header */ - if ((m->m_data - msiz) < m->m_pktdat) { - /* Need new mbuf */ - MGETHDR(m0, M_DONTWAIT, MT_HEADER); - if (m0 == NULL) { - m_freem(m); - error = ENOBUFS; - goto end; - } - M_MOVE_HDR(m0, m); - - m0->m_len = msiz + (inp->ip_hl << 2); - m0->m_data += max_linkhdr; - m0->m_pkthdr.len = m->m_pkthdr.len + msiz; - m->m_data += inp->ip_hl << 2; - m->m_len -= inp->ip_hl << 2; - - bcopy((caddr_t) inp, mtod(m0, caddr_t), - sizeof(struct ip)); - - m0->m_next = m; - m = m0; - } else { /* we have some space left in the old one */ - m->m_data -= msiz; - m->m_len += msiz; - m->m_pkthdr.len += msiz; - bcopy(inp, mtod(m, caddr_t), - inp->ip_hl << 2); - } - - /* Copy Mobility header */ - inp = mtod(m, struct ip *); - bcopy(&mob_h, (caddr_t)(inp + 1), (unsigned) msiz); - inp->ip_len = htons(ntohs(inp->ip_len) + msiz); - } else { /* AF_INET */ - m_freem(m); - error = EINVAL; - goto end; - } - } else if (sc->g_proto == IPPROTO_GRE) { - if (gre_allow == 0) { - m_freem(m); - error = EACCES; - goto end; - } + m = gre_encap(sc, m, &tos); + if (m == NULL || gre_ip_output(&sc->sc_tunnel, m, tos) != 0) + ifp->if_oerrors++; + } +} - switch(dst->sa_family) { - case AF_INET: - if (m->m_len < sizeof(struct ip)) { - m = m_pullup(m, sizeof(struct ip)); - if (m == NULL) { - error = ENOBUFS; - goto end; - } - } - - inp = mtod(m, struct ip *); - ip_tos = inp->ip_tos; - etype = ETHERTYPE_IP; - break; +static struct mbuf * +gre_encap(struct gre_softc *sc, struct mbuf *m, uint8_t *tos) +{ + struct gre_header *gh; + struct gre_h_key *gkh; + uint16_t etype; + int hlen; + + *tos = 0; + switch (m->m_pkthdr.ether_vtag) { + case AF_INET: { + etype = htons(ETHERTYPE_IP); + + struct ip *ip = mtod(m, struct ip *); + *tos = ip->ip_tos; + break; + } #ifdef INET6 - case AF_INET6: - etype = ETHERTYPE_IPV6; - break; + case AF_INET6: + etype = htons(ETHERTYPE_IPV6); + break; #endif #ifdef MPLS - case AF_MPLS: - if (m->m_flags & (M_BCAST | M_MCAST)) - etype = ETHERTYPE_MPLS_MCAST; - else - etype = ETHERTYPE_MPLS; - break; + case AF_MPLS: + if (m->m_flags & (M_BCAST | M_MCAST)) + etype = htons(ETHERTYPE_MPLS_MCAST); + else + etype = htons(ETHERTYPE_MPLS); + break; #endif - default: - m_freem(m); - error = EAFNOSUPPORT; - goto end; - } - - M_PREPEND(m, sizeof(struct greip), M_DONTWAIT); - } else { - m_freem(m); - error = EINVAL; - goto end; + default: + unhandled_af(m->m_pkthdr.ether_vtag); } - if (m == NULL) { - error = ENOBUFS; - goto end; + hlen = sizeof(*gh); + if (sc->sc_tunnel.t_key_mask != GRE_KEY_NONE) + hlen += sizeof(*gkh); + + m = m_prepend(m, hlen, M_DONTWAIT); + if (m == NULL) + return (NULL); + + gh = mtod(m, struct gre_header *); + gh->gre_flags = GRE_VERS_0; + gh->gre_proto = etype; + if (sc->sc_tunnel.t_key_mask != GRE_KEY_NONE) { + gh->gre_flags |= htons(GRE_KP); + + gkh = (struct gre_h_key *)(gh + 1); + gkh->gre_key = sc->sc_tunnel.t_key; } - gh = mtod(m, struct greip *); - if (sc->g_proto == IPPROTO_GRE) { - /* We don't support any GRE flags for now */ + return (m); +} + +static int +gre_ip_output(const struct gre_tunnel *tunnel, struct mbuf *m, uint8_t tos) +{ + int error; + + m->m_flags &= ~(M_BCAST|M_MCAST); + m->m_pkthdr.ph_rtableid = tunnel->t_rtableid; + +#if NPF > 0 + pf_pkt_addr_changed(m); +#endif + + switch (tunnel->t_af) { + case AF_INET: { + struct ip *ip; + + m = m_prepend(m, sizeof(*ip), M_DONTWAIT); + if (m == NULL) + return (ENOMEM); + + ip = mtod(m, struct ip *); + ip->ip_tos = tos; + ip->ip_len = htons(m->m_pkthdr.len); + ip->ip_ttl = tunnel->t_ttl; + ip->ip_p = IPPROTO_GRE; + ip->ip_src.s_addr = tunnel->t_src[0]; + ip->ip_dst.s_addr = tunnel->t_dst[0]; - bzero((void *) &gh->gi_g, sizeof(struct gre_h)); - gh->gi_ptype = htons(etype); + error = ip_output(m, NULL, NULL, 0, NULL, NULL, 0); + break; } +#ifdef INET6 + case AF_INET6: { + struct ip6_hdr *ip6; + int len = m->m_pkthdr.len; + + m = m_prepend(m, sizeof(*ip6), M_DONTWAIT); + if (m == NULL) + return (ENOMEM); + + ip6 = mtod(m, struct ip6_hdr *); + ip6->ip6_flow = ISSET(m->m_pkthdr.ph_flowid, M_FLOWID_VALID) ? + htonl(m->m_pkthdr.ph_flowid & M_FLOWID_MASK) : 0; + ip6->ip6_vfc |= IPV6_VERSION; + ip6->ip6_plen = htons(len); + ip6->ip6_nxt = IPPROTO_GRE; + ip6->ip6_hlim = tunnel->t_ttl; + memcpy(&ip6->ip6_src, tunnel->t_src, sizeof(ip6->ip6_src)); + memcpy(&ip6->ip6_dst, tunnel->t_dst, sizeof(ip6->ip6_dst)); - gh->gi_pr = sc->g_proto; - if (sc->g_proto != IPPROTO_MOBILE) { - gh->gi_src = sc->g_src; - gh->gi_dst = sc->g_dst; - ((struct ip *) gh)->ip_hl = (sizeof(struct ip)) >> 2; - ((struct ip *) gh)->ip_ttl = ip_defttl; - ((struct ip *) gh)->ip_tos = ip_tos; - gh->gi_len = htons(m->m_pkthdr.len); + error = ip6_output(m, 0, NULL, IPV6_MINMTU, 0, NULL); + break; + } +#endif /* INET6 */ + default: + panic("%s: unsupported af %d in %p", __func__, tunnel->t_af, + tunnel); } - ifp->if_opackets++; - ifp->if_obytes += m->m_pkthdr.len; + return (error); +} +static int +gre_tunnel_ioctl(struct ifnet *ifp, struct gre_tunnel *tunnel, + u_long cmd, caddr_t data) +{ + struct ifreq *ifr = (struct ifreq *)data; + int error; - m->m_pkthdr.ph_rtableid = sc->g_rtableid; + switch(cmd) { + case SIOCSLIFPHYADDR: + if (ISSET(ifp->if_flags, IFF_RUNNING)) { + error = EBUSY; + break; + } -#if NPF > 0 - pf_pkt_addr_changed(m); -#endif + error = gre_set_tunnel(tunnel, (struct if_laddrreq *)data); + break; + case SIOCGLIFPHYADDR: + error = gre_get_tunnel(tunnel, (struct if_laddrreq *)data); + break; + case SIOCDIFPHYADDR: + if (ISSET(ifp->if_flags, IFF_RUNNING)) { + error = EBUSY; + break; + } + + error = gre_del_tunnel(tunnel); + break; + case SIOCSLIFPHYRTABLE: + if (ISSET(ifp->if_flags, IFF_RUNNING)) { + error = EBUSY; + break; + } + + if (ifr->ifr_rdomainid < 0 || + ifr->ifr_rdomainid > RT_TABLEID_MAX || + !rtable_exists(ifr->ifr_rdomainid)) { + error = EINVAL; + break; + } + tunnel->t_rtableid = ifr->ifr_rdomainid; + break; + case SIOCGLIFPHYRTABLE: + ifr->ifr_rdomainid = tunnel->t_rtableid; + break; + + case SIOCSLIFPHYTTL: + if (ifr->ifr_ttl < 0 || ifr->ifr_ttl > 0xff) { + error = EINVAL; + break; + } + + /* commit */ + tunnel->t_ttl = (uint8_t)ifr->ifr_ttl; + break; + + case SIOCGLIFPHYTTL: + ifr->ifr_ttl = (int)tunnel->t_ttl; + break; + + default: + error = ENOTTY; + } - /* Send it off */ - error = ip_output(m, NULL, &sc->route, 0, NULL, NULL, 0); - end: - if (error) - ifp->if_oerrors++; return (error); } int gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { - - struct ifreq *ifr = (struct ifreq *)data; - struct if_laddrreq *lifr = (struct if_laddrreq *)data; - struct ifkalivereq *ikar = (struct ifkalivereq *)data; struct gre_softc *sc = ifp->if_softc; - int s; - struct sockaddr_in si; + struct ifreq *ifr = (struct ifreq *)data; int error = 0; - struct proc *prc = curproc; /* XXX */ - s = splnet(); switch(cmd) { case SIOCSIFADDR: ifp->if_flags |= IFF_UP; + /* FALLTHROUGH */ + case SIOCSIFFLAGS: + if (ISSET(ifp->if_flags, IFF_UP)) { + if (!ISSET(ifp->if_flags, IFF_RUNNING)) + error = gre_up(sc); + else + error = ENETRESET; + } else { + if (ISSET(ifp->if_flags, IFF_RUNNING)) + error = gre_down(sc); + } break; case SIOCSIFDSTADDR: break; - case SIOCSIFFLAGS: - break; case SIOCSIFMTU: if (ifr->ifr_mtu < 576) { error = EINVAL; @@ -464,255 +767,796 @@ gre_ioctl(struct ifnet *ifp, u_long cmd, case SIOCGIFMTU: ifr->ifr_mtu = sc->sc_if.if_mtu; break; - case SIOCGIFHARDMTU: - ifr->ifr_hardmtu = sc->sc_if.if_hardmtu; - break; case SIOCADDMULTI: case SIOCDELMULTI: break; - case SIOCSETKALIVE: - if ((error = suser(prc, 0)) != 0) - break; - if (ikar->ikar_timeo < 0 || ikar->ikar_timeo > 86400 || - ikar->ikar_cnt < 0 || ikar->ikar_cnt > 256) { - error = EINVAL; - break; - } - sc->sc_ka_timout = ikar->ikar_timeo; - sc->sc_ka_cnt = ikar->ikar_cnt; - if (sc->sc_ka_timout == 0 || sc->sc_ka_cnt == 0) { - sc->sc_ka_timout = 0; - sc->sc_ka_cnt = 0; - sc->sc_ka_state = GRE_STATE_UKNWN; - gre_link_state(sc); - break; - } - if (!timeout_pending(&sc->sc_ka_snd)) { - sc->sc_ka_holdmax = sc->sc_ka_cnt; - timeout_add(&sc->sc_ka_snd, 1); - timeout_add_sec(&sc->sc_ka_hold, sc->sc_ka_timout * - sc->sc_ka_cnt); - } - break; - case SIOCGETKALIVE: - ikar->ikar_timeo = sc->sc_ka_timout; - ikar->ikar_cnt = sc->sc_ka_cnt; - break; - case SIOCSLIFPHYADDR: - if ((error = suser(prc, 0)) != 0) - break; - if (lifr->addr.ss_family != AF_INET || - lifr->dstaddr.ss_family != AF_INET) { - error = EAFNOSUPPORT; - break; - } - if (lifr->addr.ss_len != sizeof(si) || - lifr->dstaddr.ss_len != sizeof(si)) { - error = EINVAL; + + case SIOCSVNETID: + if (ISSET(ifp->if_flags, IFF_RUNNING)) { + error = EBUSY; break; } - sc->g_src = ((struct sockaddr_in *)&lifr->addr)->sin_addr; - sc->g_dst = ((struct sockaddr_in *)&lifr->dstaddr)->sin_addr; - recompute: - if ((sc->g_src.s_addr != INADDR_ANY) && - (sc->g_dst.s_addr != INADDR_ANY)) { - if (sc->route.ro_rt != NULL) { - rtfree(sc->route.ro_rt); - sc->route.ro_rt = NULL; - } - /* ip_output() will do the lookup */ - bzero(&sc->route, sizeof(sc->route)); - ifp->if_flags |= IFF_UP; - } - break; - case SIOCDIFPHYADDR: - if ((error = suser(prc, 0)) != 0) - break; - sc->g_src.s_addr = INADDR_ANY; - sc->g_dst.s_addr = INADDR_ANY; + error = gre_set_vnetid(&sc->sc_tunnel, ifr); break; - case SIOCGLIFPHYADDR: - if (sc->g_src.s_addr == INADDR_ANY || - sc->g_dst.s_addr == INADDR_ANY) { - error = EADDRNOTAVAIL; - break; - } - bzero(&si, sizeof(si)); - si.sin_family = AF_INET; - si.sin_len = sizeof(struct sockaddr_in); - si.sin_addr.s_addr = sc->g_src.s_addr; - memcpy(&lifr->addr, &si, sizeof(si)); - si.sin_addr.s_addr = sc->g_dst.s_addr; - memcpy(&lifr->dstaddr, &si, sizeof(si)); + case SIOCGVNETID: + error = gre_get_vnetid(&sc->sc_tunnel, ifr); break; - case SIOCSLIFPHYRTABLE: - if ((error = suser(prc, 0)) != 0) - break; - if (ifr->ifr_rdomainid < 0 || - ifr->ifr_rdomainid > RT_TABLEID_MAX || - !rtable_exists(ifr->ifr_rdomainid)) { - error = EINVAL; + case SIOCDVNETID: + if (ISSET(ifp->if_flags, IFF_RUNNING)) { + error = EBUSY; break; } - sc->g_rtableid = ifr->ifr_rdomainid; - goto recompute; - case SIOCGLIFPHYRTABLE: - ifr->ifr_rdomainid = sc->g_rtableid; + error = gre_del_vnetid(&sc->sc_tunnel); break; + default: - error = ENOTTY; + error = gre_tunnel_ioctl(ifp, &sc->sc_tunnel, cmd, data); + break; } - splx(s); return (error); } -/* - * do a checksum of a buffer - much like in_cksum, which operates on - * mbufs. - */ -u_int16_t -gre_in_cksum(u_int16_t *p, u_int len) +static int +gre_up(struct gre_softc *sc) { - u_int32_t sum = 0; - int nwords = len >> 1; + struct gre_tunnel *t; + int error = 0; - while (nwords-- != 0) - sum += *p++; + if (sc->sc_tunnel.t_af == AF_UNSPEC) + return (ENXIO); - if (len & 1) { - union { - u_short w; - u_char c[2]; - } u; - u.c[0] = *(u_char *) p; - u.c[1] = 0; - sum += u.w; + error = rw_enter(&gre_lk, RW_WRITE | RW_INTR); + if (error != 0) + return (error); + + t = RBT_INSERT(gre_tree, &gre_softcs, &sc->sc_tunnel); + + rw_exit(&gre_lk); + + if (t == NULL) + SET(sc->sc_if.if_flags, IFF_RUNNING); + else + error = EBUSY; + + return (error); +} + +static int +gre_down(struct gre_softc *sc) +{ + int error; + + error = rw_enter(&gre_lk, RW_WRITE | RW_INTR); + if (error != 0) + return (error); + + RBT_REMOVE(gre_tree, &gre_softcs, &sc->sc_tunnel); + + rw_exit(&gre_lk); + + CLR(sc->sc_if.if_flags, IFF_RUNNING); + + return (error); +} + +static int +gre_l2_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) +{ + struct gre_l2_softc *sc = ifp->if_softc; + int error = 0; + + switch(cmd) { + case SIOCSIFADDR: + ifp->if_flags |= IFF_UP; + /* FALLTHROUGH */ + case SIOCSIFFLAGS: + if (ISSET(ifp->if_flags, IFF_UP)) { + if (!ISSET(ifp->if_flags, IFF_RUNNING)) + error = gre_l2_up(sc); + else + error = ENETRESET; + } else { + if (ISSET(ifp->if_flags, IFF_RUNNING)) + error = gre_l2_down(sc); + } + break; + + case SIOCADDMULTI: + case SIOCDELMULTI: + break; + + case SIOCSIFMEDIA: + case SIOCGIFMEDIA: + error = ifmedia_ioctl(ifp, (struct ifreq *)data, + &sc->sc_media, cmd); + break; + + default: + error = gre_tunnel_ioctl(ifp, &sc->sc_tunnel, cmd, data); + if (error == ENXIO) + error = ether_ioctl(ifp, &sc->sc_ac, cmd, data); + break; } - /* end-around-carry */ - sum = (sum >> 16) + (sum & 0xffff); - sum += (sum >> 16); - return (~sum); + return (error); } -void -gre_keepalive(void *arg) +static int +gre_l2_up(struct gre_l2_softc *sc) { - struct gre_softc *sc = arg; + struct gre_tunnel *t; + int error = 0; + + if (sc->sc_tunnel.t_af == AF_UNSPEC) + return (ENXIO); - if (!sc->sc_ka_timout) - return; + error = rw_enter(&gre_l2_lk, RW_WRITE | RW_INTR); + if (error != 0) + return (error); - sc->sc_ka_state = GRE_STATE_DOWN; - gre_link_state(sc); + t = RBT_INSERT(gre_l2_tree, &gre_l2_softcs, &sc->sc_tunnel); + + rw_exit(&gre_l2_lk); + + if (t == NULL) + SET(sc->sc_ac.ac_if.if_flags, IFF_RUNNING); + else + error = EBUSY; + + return (error); } -void -gre_send_keepalive(void *arg) +static int +gre_l2_down(struct gre_l2_softc *sc) { - struct gre_softc *sc = arg; - struct mbuf *m; - struct ip *ip; - struct gre_h *gh; - struct sockaddr dst; - int s; - - if (sc->sc_ka_timout) - timeout_add_sec(&sc->sc_ka_snd, sc->sc_ka_timout); - - if (sc->g_proto != IPPROTO_GRE) - return; - if ((sc->sc_if.if_flags & IFF_UP) == 0 || - sc->g_src.s_addr == INADDR_ANY || sc->g_dst.s_addr == INADDR_ANY) - return; + int error; - MGETHDR(m, M_DONTWAIT, MT_DATA); - if (m == NULL) { - sc->sc_if.if_oerrors++; - return; + error = rw_enter(&gre_l2_lk, RW_WRITE | RW_INTR); + if (error != 0) + return (error); + + RBT_REMOVE(gre_l2_tree, &gre_l2_softcs, &sc->sc_tunnel); + + rw_exit(&gre_l2_lk); + + CLR(sc->sc_ac.ac_if.if_flags, IFF_RUNNING); + + return (error); +} + +static int +gre_l2_media_change(struct ifnet *ifp) +{ + return (0); +} + +static void +gre_l2_media_status(struct ifnet *ifp, struct ifmediareq *imr) +{ + imr->ifm_active = IFM_ETHER | IFM_AUTO; + imr->ifm_status = IFM_AVALID | IFM_ACTIVE; +} + +static int +egre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) +{ + struct gre_l2_softc *sc = ifp->if_softc; + struct ifreq *ifr = (struct ifreq *)data; + int error = 0; + + switch(cmd) { + case SIOCSVNETID: + if (ISSET(ifp->if_flags, IFF_RUNNING)) { + error = EBUSY; + break; + } + error = gre_set_vnetid(&sc->sc_tunnel, ifr); + break; + case SIOCGVNETID: + error = gre_get_vnetid(&sc->sc_tunnel, ifr); + break; + case SIOCDVNETID: + if (ISSET(ifp->if_flags, IFF_RUNNING)) { + error = EBUSY; + break; + } + error = gre_del_vnetid(&sc->sc_tunnel); + break; + default: + error = gre_l2_ioctl(ifp, cmd, data); + } + + return (error); +} + +int +nvgre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) +{ + struct gre_l2_softc *sc = ifp->if_softc; + struct ifreq *ifr = (struct ifreq *)data; + int error = 0; + + switch(cmd) { + case SIOCSVNETID: + if (ISSET(ifp->if_flags, IFF_RUNNING)) { + error = EBUSY; + break; + } + error = nvgre_set_vnetid(&sc->sc_tunnel, ifr); + break; + case SIOCGVNETID: + error = nvgre_get_vnetid(&sc->sc_tunnel, ifr); + break; + case SIOCDVNETID: + error = EOPNOTSUPP; + default: + error = gre_l2_ioctl(ifp, cmd, data); } - m->m_len = m->m_pkthdr.len = sizeof(*ip) + sizeof(*gh); - MH_ALIGN(m, m->m_len); + return (error); +} + +static int +gre_set_tunnel(struct gre_tunnel *tunnel, struct if_laddrreq *req) +{ + struct sockaddr *src = (struct sockaddr *)&req->addr; + struct sockaddr *dst = (struct sockaddr *)&req->dstaddr; + struct sockaddr_in *sin; +#ifdef INET6 + struct sockaddr_in6 *sin6; + int error; +#endif + + /* sa_family and sa_len must be equal */ + if (src->sa_family != dst->sa_family || src->sa_len != dst->sa_len) + return (EINVAL); + + /* validate */ + switch (dst->sa_family) { + case AF_INET: + if (dst->sa_len != sizeof(*sin)) + return (EINVAL); + + sin = (struct sockaddr_in *)src; + if (in_nullhost(sin->sin_addr) || + IN_MULTICAST(sin->sin_addr.s_addr)) + return (EINVAL); + + tunnel->t_src[0] = sin->sin_addr.s_addr; + + sin = (struct sockaddr_in *)dst; + if (in_nullhost(sin->sin_addr) || + IN_MULTICAST(sin->sin_addr.s_addr)) + return (EINVAL); + + tunnel->t_dst[0] = sin->sin_addr.s_addr; + + break; +#ifdef INET6 + case AF_INET6: + if (dst->sa_len != sizeof(*sin6)) + return (EINVAL); + + sin6 = (struct sockaddr_in6 *)src; + if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) || + IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) + return (EINVAL); + + error = in6_embedscope((struct in6_addr *)tunnel->t_src, + sin6, NULL); + if (error != 0) + return (error); + + sin6 = (struct sockaddr_in6 *)dst; + if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) || + IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) + return (EINVAL); + + error = in6_embedscope((struct in6_addr *)tunnel->t_dst, + sin6, NULL); + if (error != 0) + return (error); + + break; +#endif + default: + return (EAFNOSUPPORT); + } + + /* commit */ + tunnel->t_af = dst->sa_family; + + return (0); +} + +static int +gre_get_tunnel(struct gre_tunnel *tunnel, struct if_laddrreq *req) +{ + struct sockaddr *src = (struct sockaddr *)&req->addr; + struct sockaddr *dst = (struct sockaddr *)&req->dstaddr; + struct sockaddr_in *sin; +#ifdef INET6 /* ifconfig already embeds the scopeid */ + struct sockaddr_in6 *sin6; +#endif + + switch (tunnel->t_af) { + case AF_UNSPEC: + return (EADDRNOTAVAIL); + case AF_INET: + sin = (struct sockaddr_in *)src; + memset(sin, 0, sizeof(*sin)); + sin->sin_family = AF_INET; + sin->sin_len = sizeof(*sin); + sin->sin_addr.s_addr = tunnel->t_src[0]; + + sin = (struct sockaddr_in *)dst; + memset(sin, 0, sizeof(*sin)); + sin->sin_family = AF_INET; + sin->sin_len = sizeof(*sin); + sin->sin_addr.s_addr = tunnel->t_dst[0]; + + break; + +#ifdef INET6 + case AF_INET6: + sin6 = (struct sockaddr_in6 *)src; + memset(sin6, 0, sizeof(*sin6)); + sin6->sin6_family = AF_INET6; + sin6->sin6_len = sizeof(*sin6); + in6_recoverscope(sin6, (struct in6_addr *)tunnel->t_src); + + sin6 = (struct sockaddr_in6 *)dst; + memset(sin6, 0, sizeof(*sin6)); + sin6->sin6_family = AF_INET6; + sin6->sin6_len = sizeof(*sin6); + in6_recoverscope(sin6, (struct in6_addr *)tunnel->t_dst); + + break; +#endif + default: + return (EAFNOSUPPORT); + } + + return (0); +} + +static int +gre_del_tunnel(struct gre_tunnel *tunnel) +{ + /* commit */ + tunnel->t_af = AF_UNSPEC; + + return (0); +} + +static int +gre_set_vnetid(struct gre_tunnel *tunnel, struct ifreq *ifr) +{ + uint32_t key; + + if (ifr->ifr_vnetid < 0 || ifr->ifr_vnetid > 0xffffffff) + return EINVAL; + + key = htonl(ifr->ifr_vnetid); + + if (tunnel->t_key_mask == GRE_KEY_MASK && tunnel->t_key == key) + return (0); + + /* commit */ + tunnel->t_key_mask = GRE_KEY_MASK; + tunnel->t_key = key; + + return (0); +} + +static int +gre_get_vnetid(struct gre_tunnel *tunnel, struct ifreq *ifr) +{ + if (tunnel->t_key_mask == GRE_KEY_NONE) + return (EADDRNOTAVAIL); - /* use the interface's rdomain when sending keepalives. */ - m->m_pkthdr.ph_rtableid = sc->sc_if.if_rdomain; + ifr->ifr_vnetid = (int64_t)ntohl(tunnel->t_key); + + return (0); +} + +static int +gre_del_vnetid(struct gre_tunnel *tunnel) +{ + tunnel->t_key_mask = GRE_KEY_NONE; + + return (0); +} + +static int +nvgre_set_vnetid(struct gre_tunnel *tunnel, struct ifreq *ifr) +{ + if (ifr->ifr_vnetid < NVGRE_VSID_MIN || + ifr->ifr_vnetid > NVGRE_VSID_MAX) + return EINVAL; + + /* commit */ + tunnel->t_key = htonl(ifr->ifr_vnetid << NVGRE_VSID_SHIFT); + + return (0); +} + +static int +nvgre_get_vnetid(struct gre_tunnel *tunnel, struct ifreq *ifr) +{ + ifr->ifr_vnetid = (int64_t)ntohl(tunnel->t_key) >> NVGRE_VSID_SHIFT; + + return (0); +} + +struct mbuf * +gre_if_input(struct mbuf *m, int hlen) +{ + struct gre_tunnel key; + struct ip *ip; - /* build the ip header */ ip = mtod(m, struct ip *); - ip->ip_v = IPVERSION; - ip->ip_hl = sizeof(*ip) >> 2; - ip->ip_tos = IPTOS_LOWDELAY; - ip->ip_len = htons(m->m_pkthdr.len); - ip->ip_id = htons(ip_randomid()); - ip->ip_off = htons(IP_DF); - ip->ip_ttl = ip_defttl; - ip->ip_p = IPPROTO_GRE; - ip->ip_src.s_addr = sc->g_dst.s_addr; - ip->ip_dst.s_addr = sc->g_src.s_addr; - ip->ip_sum = 0; - ip->ip_sum = in_cksum(m, sizeof(*ip)); - - gh = (struct gre_h *)(ip + 1); - /* We don't support any GRE flags for now */ - bzero(gh, sizeof(*gh)); - - bzero(&dst, sizeof(dst)); - dst.sa_family = AF_INET; - - s = splsoftnet(); - /* should we care about the error? */ - gre_output(&sc->sc_if, m, &dst, NULL); - splx(s); + key.t_af = AF_INET; + key.t_src[0] = ip->ip_dst.s_addr; + key.t_dst[0] = ip->ip_src.s_addr; + + return (gre_input_key(m, hlen, &key)); } -void -gre_recv_keepalive(struct gre_softc *sc) +#ifdef INET6 +struct mbuf * +gre_if_input6(struct mbuf *m, int hlen) { - if (!sc->sc_ka_timout) - return; + struct gre_tunnel key; + struct ip6_hdr *ip6; - /* link state flap dampening */ - switch (sc->sc_ka_state) { - case GRE_STATE_UKNWN: - case GRE_STATE_DOWN: - sc->sc_ka_state = GRE_STATE_HOLD; - sc->sc_ka_holdcnt = sc->sc_ka_holdmax; - sc->sc_ka_holdmax = MIN(sc->sc_ka_holdmax * 2, - 16 * sc->sc_ka_cnt); + ip6 = mtod(m, struct ip6_hdr *); + + key.t_af = AF_INET6; + memcpy(key.t_src, &ip6->ip6_dst, sizeof(key.t_src)); + memcpy(key.t_dst, &ip6->ip6_src, sizeof(key.t_dst)); + + return (gre_input_key(m, hlen, &key)); +} +#endif /* INET6 */ + +static struct mbuf * +gre_input_key(struct mbuf *m, int iphlen, struct gre_tunnel *key) +{ + struct gre_softc *sc; + struct ifnet *ifp; + caddr_t buf; + struct gre_header *gh; + struct gre_h_key *gkh; + int hlen; + void (*input)(struct mbuf *); + int af __unused = AF_UNSPEC; /* bpf */ + + hlen = iphlen + sizeof(*gh); + + m = m_pullup(m, hlen); + if (m == NULL) + return (NULL); + + buf = mtod(m, caddr_t); + gh = (struct gre_header *)(buf + iphlen); + + /* check the version */ + switch (gh->gre_flags & htons(GRE_VERS_MASK)) { + case ctons(GRE_VERS_0): break; - case GRE_STATE_HOLD: - if (--sc->sc_ka_holdcnt < 1) { - sc->sc_ka_state = GRE_STATE_UP; - gre_link_state(sc); + + case ctons(GRE_VERS_1): +#ifdef PIPEX + if (pipex_enable) { + struct pipex_session *session; + + session = pipex_pptp_lookup_session(m); + if (session != NULL && + pipex_pptp_input(m, session) == NULL) + return (NULL); } +#endif + /* FALLTHROUGH */ + default: + return (m); + } + + /* the only optional bit in the header is K flag */ + if ((gh->gre_flags & htons(~(GRE_KP|GRE_VERS_MASK))) != htons(0)) + return (m); + + if (gh->gre_flags & htons(GRE_KP)) { + hlen += sizeof(*gkh); + + m = m_pullup(m, hlen); + if (m == NULL) + return (NULL); + + buf = mtod(m, caddr_t); + gh = (struct gre_header *)(buf + iphlen); + gkh = (struct gre_h_key *)(gh + 1); + + key->t_key_mask = GRE_KEY_MASK; + key->t_key = gkh->gre_key; + } else + key->t_key_mask = GRE_KEY_NONE; + + key->t_rtableid = m->m_pkthdr.ph_rtableid; + + switch (gh->gre_proto) { + case ctons(ETHERTYPE_IP): + af = AF_INET; + input = gre_input_ip; + break; +#ifdef INET6 + case ctons(ETHERTYPE_IPV6): + af = AF_INET6; + input = gre_input_ip6; break; - case GRE_STATE_UP: - sc->sc_ka_holdmax--; - sc->sc_ka_holdmax = MAX(sc->sc_ka_holdmax, sc->sc_ka_cnt); +#endif +#ifdef MPLS + case ctons(ETHERTYPE_MPLS): + case ctons(ETHERTYPE_MPLS_MCAST): + af = AF_MPLS; + input = gre_input_mpls; break; +#endif + + case ctons(ETHERTYPE_TRANSETHER): + return (gre_l2_input(m, key, hlen)); + + default: + return (m); } - /* rescedule hold timer */ - timeout_add_sec(&sc->sc_ka_hold, sc->sc_ka_timout * sc->sc_ka_cnt); + rw_enter_read(&gre_lk); + sc = (struct gre_softc *)RBT_FIND(gre_tree, &gre_softcs, key); + rw_exit_read(&gre_lk); + if (sc == NULL) + return (m); + + ifp = &sc->sc_if; + + m_adj(m, hlen); + + m->m_flags &= ~(M_MCAST|M_BCAST); + m->m_pkthdr.ph_ifidx = ifp->if_index; + m->m_pkthdr.ph_rtableid = ifp->if_rdomain; + +#if NPF > 0 + pf_pkt_addr_changed(m); +#endif + + ifp->if_ipackets++; + ifp->if_ibytes += m->m_pkthdr.len; + +#if NBPFILTER > 0 + if (ifp->if_bpf) + bpf_mtap_af(ifp->if_bpf, af, m, BPF_DIRECTION_IN); +#endif + + (*input)(m); + return (NULL); +} + +static void +gre_input_ip(struct mbuf *m) +{ + niq_enqueue(&ipintrq, m); +} + +#ifdef INET6 +static void +gre_input_ip6(struct mbuf *m) +{ + niq_enqueue(&ip6intrq, m); +} +#endif + +#ifdef MPLS +static void +gre_input_mpls(struct mbuf *m) +{ + mpls_input(m); +} +#endif + +static struct mbuf * +gre_l2_input(struct mbuf *m, const struct gre_tunnel *key, int hlen) +{ + struct mbuf_list ml = MBUF_LIST_INITIALIZER(); + struct gre_l2_softc *sc; + struct mbuf *n; + int off; + + rw_enter_read(&gre_l2_lk); + sc = (struct gre_l2_softc *)RBT_FIND(gre_l2_tree, &gre_l2_softcs, key); + rw_exit_read(&gre_l2_lk); + if (sc == NULL) + return (m); + + /* from now on we're consuming the packet */ + + m_adj(m, hlen); + + if (m->m_pkthdr.len < sizeof(struct ether_header)) { + m_freem(m); + return (NULL); + } + + m = m_pullup(m, sizeof(struct ether_header)); + if (m == NULL) + return (NULL); + + n = m_getptr(m, sizeof(struct ether_header), &off); + if (n == NULL) { + m_freem(m); + return (NULL); + } + + if (!ALIGNED_POINTER(mtod(n, caddr_t) + off, uint32_t)) { + n = m_dup_pkt(m, ETHER_ALIGN, M_NOWAIT); + m_freem(m); + if (n == NULL) + return (NULL); + m = n; + } + + m->m_flags &= ~(M_MCAST|M_BCAST); +#if NPF > 0 + pf_pkt_addr_changed(m); +#endif + + /* fetch the flow id out of the key */ + if (sc->sc_tunnel.t_key_mask == GRE_KEY_NVGRE) { + m->m_pkthdr.ph_flowid = M_FLOWID_VALID | + (bemtoh32(&key->t_key) & NVGRE_FLOWID_MASK); + } + + ml_enqueue(&ml, m); + if_input(&sc->sc_ac.ac_if, &ml); + + return (NULL); } void -gre_link_state(struct gre_softc *sc) +egre_start(struct ifnet *ifp) { - struct ifnet *ifp = &sc->sc_if; - int link_state = LINK_STATE_UNKNOWN; + struct gre_l2_softc *sc = ifp->if_softc; + struct mbuf *m; - if (sc->sc_ka_state == GRE_STATE_UP) - link_state = LINK_STATE_UP; - else if (sc->sc_ka_state != GRE_STATE_UKNWN) - link_state = LINK_STATE_KALIVE_DOWN; - - if (ifp->if_link_state != link_state) { - ifp->if_link_state = link_state; - if_link_state_change(ifp); + while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) { +#if NBPFILTER > 0 + if (ifp->if_bpf) + bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT); +#endif + + ifp->if_opackets++; + + m = egre_encap(sc, m); + if (m == NULL || gre_ip_output(&sc->sc_tunnel, m, 0) != 0) + ifp->if_oerrors++; } } + +static struct mbuf * +egre_encap(struct gre_l2_softc *sc, struct mbuf *m0) +{ + struct mbuf *m; + struct gre_header *gh; + struct gre_h_key *gkh; + int hlen = sizeof(*gh); + + MGETHDR(m, M_DONTWAIT, m0->m_type); + if (m == NULL) { + m_freem(m0); + return (NULL); + } + + M_MOVE_PKTHDR(m, m0); + m->m_next = m0; + + if (sc->sc_tunnel.t_key_mask == GRE_KEY_MASK) + hlen += sizeof(*gkh); + + MH_ALIGN(m, hlen); + m->m_len = hlen; + m->m_pkthdr.len += hlen; + + gh = mtod(m, struct gre_header *); + gh->gre_flags = htons(GRE_VERS_0); + gh->gre_proto = htons(ETHERTYPE_TRANSETHER); + if (sc->sc_tunnel.t_key_mask == GRE_KEY_MASK) { + gh->gre_flags |= htons(GRE_KP); + + gkh = (struct gre_h_key *)(gh + 1); + gkh->gre_key = sc->sc_tunnel.t_key; + } + + return (m); +} + +void +nvgre_start(struct ifnet *ifp) +{ + struct gre_l2_softc *sc = ifp->if_softc; + struct mbuf *m; + + while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) { +#if NBPFILTER > 0 + if (ifp->if_bpf) + bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT); #endif + + ifp->if_opackets++; + + m = nvgre_encap(sc, m); + if (m == NULL || gre_ip_output(&sc->sc_tunnel, m, 0) != 0) + ifp->if_oerrors++; + } +} + +static struct mbuf * +nvgre_encap(struct gre_l2_softc *sc, struct mbuf *m0) +{ + struct mbuf *m; + struct nvgre_header *nh; + + MGETHDR(m, M_DONTWAIT, m0->m_type); + if (m == NULL) { + m_freem(m0); + return (NULL); + } + + M_MOVE_PKTHDR(m, m0); + m->m_next = m0; + + MH_ALIGN(m, sizeof(*nh)); + m->m_len = sizeof(*nh); + m->m_pkthdr.len += sizeof(*nh); + + nh = mtod(m, struct nvgre_header *); + nh->nvgre_flags_proto = NVGRE_HEADER; + nh->nvgre_key = sc->sc_tunnel.t_key; + if (ISSET(m->m_pkthdr.ph_flowid, M_FLOWID_VALID)) { + nh->nvgre_key |= htonl(NVGRE_FLOWID_MASK & + (m->m_pkthdr.ph_flowid & M_FLOWID_MASK)); + } + + return (m); +} + +/* + * do a checksum of a buffer - much like in_cksum, which operates on + * mbufs. + */ +u_int16_t +gre_in_cksum(u_int16_t *p, u_int len) +{ + u_int32_t sum = 0; + int nwords = len >> 1; + + while (nwords-- != 0) + sum += *p++; + + if (len & 1) { + union { + u_short w; + u_char c[2]; + } u; + u.c[0] = *(u_char *) p; + u.c[1] = 0; + sum += u.w; + } + + /* end-around-carry */ + sum = (sum >> 16) + (sum & 0xffff); + sum += (sum >> 16); + return (~sum); +} + +RBT_GENERATE(gre_tree, gre_tunnel, t_entry, gre_cmp); +RBT_GENERATE(gre_l2_tree, gre_tunnel, t_entry, gre_l2_cmp); Index: sys/net/if_gre.h =================================================================== RCS file: /cvs/src/sys/net/if_gre.h,v retrieving revision 1.13 diff -u -p -r1.13 if_gre.h --- sys/net/if_gre.h 26 Jun 2010 19:49:54 -0000 1.13 +++ sys/net/if_gre.h 12 Dec 2016 05:59:20 -0000 @@ -33,127 +33,56 @@ #ifndef _NET_IF_GRE_H #define _NET_IF_GRE_H -struct gre_softc { - struct ifnet sc_if; - LIST_ENTRY(gre_softc) sc_list; - struct timeout sc_ka_hold; - struct timeout sc_ka_snd; - struct in_addr g_src; /* source address of gre packets */ - struct in_addr g_dst; /* destination address of gre packets */ - struct route route; /* routing entry that determines, where - an encapsulated packet should go */ - u_int g_rtableid; /* routing table used for the tunnel */ - int gre_unit; - int gre_flags; - int sc_ka_timout; - int sc_ka_holdmax; - int sc_ka_holdcnt; - int sc_ka_cnt; - u_char g_proto; /* protocol of encapsulator */ - u_char sc_ka_state; -#define GRE_STATE_UKNWN 0 -#define GRE_STATE_DOWN 1 -#define GRE_STATE_HOLD 2 -#define GRE_STATE_UP 3 -}; - - -struct gre_h { - u_int16_t flags; /* GRE flags */ - u_int16_t ptype; /* protocol type of payload typically - Ether protocol type*/ -/* - * from here on: fields are optional, presence indicated by flags - * - u_int_16 checksum checksum (one-complements of GRE header - and payload - Present if (ck_pres | rt_pres == 1). - Valid if (ck_pres == 1). - u_int_16 offset offset from start of routing filed to - first octet of active SRE (see below). - Present if (ck_pres | rt_pres == 1). - Valid if (rt_pres == 1). - u_int_32 key inserted by encapsulator e.g. for - authentication - Present if (key_pres ==1 ). - u_int_32 seq_num Sequence number to allow for packet order - Present if (seq_pres ==1 ). - - struct gre_sre[] routing Routing fileds (see below) - Present if (rt_pres == 1) -*/ -} __packed; - -struct greip { - struct ip gi_i; - struct gre_h gi_g; -} __packed; - -#define gi_pr gi_i.ip_p -#define gi_len gi_i.ip_len -#define gi_src gi_i.ip_src -#define gi_dst gi_i.ip_dst -#define gi_ptype gi_g.ptype -#define gi_flags gi_g.flags - -#define GRE_CP 0x8000 /* Checksum Present */ -#define GRE_RP 0x4000 /* Routing Present */ -#define GRE_KP 0x2000 /* Key Present */ -#define GRE_SP 0x1000 /* Sequence Present */ -#define GRE_SS 0x0800 /* Strict Source Route */ - -/* gre_sre defines a Source route Entry. These are needed if packets - * should be routed over more than one tunnel hop by hop - */ - -struct gre_sre { - u_int16_t sre_family; /* address family */ - u_char sre_offset; /* offset to first octet of active entry */ - u_char sre_length; /* number of octets in the SRE. - sre_lengthl==0 -> last entry. */ - u_char *sre_rtinfo; /* the routing information */ -}; - -struct greioctl { - int unit; - struct in_addr addr; -}; - -/* for mobile encaps */ - -struct mobile_h { - u_int16_t proto; /* protocol and S-bit */ - u_int16_t hcrc; /* header checksum */ - u_int32_t odst; /* original destination address */ - u_int32_t osrc; /* original source addr, if S-bit set */ -} __packed; - -struct mobip_h { - struct ip mi; - struct mobile_h mh; -} __packed; - - -#define MOB_H_SIZ_S (sizeof(struct mobile_h) - sizeof(u_int32_t)) -#define MOB_H_SIZ_L (sizeof(struct mobile_h)) -#define MOB_H_SBIT 0x0080 - - -/* - * ioctls needed to manipulate the interface - */ +/* the mandantory portion of the GRE header */ + +struct gre_header { + uint16_t gre_flags; +#define GRE_CP 0x8000 /* Checksum Present */ +#define GRE_KP 0x2000 /* Key Present */ +#define GRE_SP 0x1000 /* Sequence Present */ + +#define GRE_VERS_MASK 0x0007 +#define GRE_VERS_0 0x0000 +#define GRE_VERS_1 0x0001 + + uint16_t gre_proto; +} __packed __aligned(4); + +struct gre_h_cksum { + uint16_t gre_cksum; + uint16_t gre_reserved1; +} __packed __aligned(4); + +struct gre_h_key { + uint32_t gre_key; +} __packed __aligned(4); + +struct gre_h_seq { + uint32_t gre_seq; +} __packed __aligned(4); + +/* provide a simplified layout for NVGRE headers */ +struct nvgre_header { + uint32_t nvgre_flags_proto; +#define NVGRE_HEADER htonl(((GRE_KP | GRE_VERS_0) << 16) | \ + ETHERTYPE_TRANSETHER) + uint32_t nvgre_key; +#define NVGRE_VSID_MASK 0xffffff00U +#define NVGRE_VSID_SHIFT 8 +#define NVGRE_FLOWID_MASK 0x000000ffU +#define NVGRE_FLOWID_SHIFT 0 +} __packed __aligned(4); + +#define NVGRE_VSID_MIN 0x000000U +#define NVGRE_VSID_MAX 0xffffffU #ifdef _KERNEL -extern LIST_HEAD(gre_softc_head, gre_softc) gre_softc_list; -extern int gre_allow; -extern int gre_wccp; -extern int ip_mobile_allow; - -void greattach(int); -int gre_ioctl(struct ifnet *, u_long, caddr_t); -int gre_output(struct ifnet *, struct mbuf *, struct sockaddr *, - struct rtentry *); -u_int16_t gre_in_cksum(u_int16_t *, u_int); -void gre_recv_keepalive(struct gre_softc *); +void greattach(int); + +struct mbuf *gre_if_input(struct mbuf *, int); +#ifdef INET6 +struct mbuf *gre_if_input6(struct mbuf *, int); +#endif + #endif /* _KERNEL */ #endif /* _NET_IF_GRE_H_ */ Index: sys/netinet/in_proto.c =================================================================== RCS file: /cvs/src/sys/netinet/in_proto.c,v retrieving revision 1.70 diff -u -p -r1.70 in_proto.c --- sys/netinet/in_proto.c 3 Dec 2015 21:57:59 -0000 1.70 +++ sys/netinet/in_proto.c 12 Dec 2016 05:59:20 -0000 @@ -134,7 +134,7 @@ #include "gif.h" #if NGIF > 0 -#include +#include #endif #ifdef INET6 @@ -145,13 +145,14 @@ #include #endif -#include -#include - #include "gre.h" #if NGRE > 0 #include -#include +#endif + +#include "mobileip.h" +#if NGRE > 0 +#include #endif #include "carp.h" @@ -172,7 +173,7 @@ #include "etherip.h" #if NETHERIP > 0 -#include +#include #endif u_char ip_protox[IPPROTO_MAX]; @@ -205,42 +206,24 @@ struct protosw inetsw[] = { }, #if NGIF > 0 { SOCK_RAW, &inetdomain, IPPROTO_IPV4, PR_ATOMIC|PR_ADDR, - in_gif_input, rip_output, 0, rip_ctloutput, - rip_usrreq, - 0, 0, 0, 0, ipip_sysctl -}, -{ SOCK_RAW, &inetdomain, IPPROTO_ETHERIP, PR_ATOMIC|PR_ADDR, - etherip_input, rip_output, 0, rip_ctloutput, + gif_input, rip_output, 0, rip_ctloutput, rip_usrreq, - 0, 0, 0, 0, etherip_sysctl + 0, 0, 0, 0, }, #ifdef INET6 { SOCK_RAW, &inetdomain, IPPROTO_IPV6, PR_ATOMIC|PR_ADDR, - in_gif_input, rip_output, 0, 0, + gif_input, rip_output, 0, 0, rip_usrreq, /*XXX*/ 0, 0, 0, 0, }, #endif #ifdef MPLS { SOCK_RAW, &inetdomain, IPPROTO_MPLS, PR_ATOMIC|PR_ADDR, - etherip_input, rip_output, 0, 0, + gif_input, rip_output, 0, 0, rip_usrreq, 0, 0, 0, 0, }, #endif -#else /* NGIF */ -{ SOCK_RAW, &inetdomain, IPPROTO_IPIP, PR_ATOMIC|PR_ADDR, - ip4_input, rip_output, 0, rip_ctloutput, - rip_usrreq, - 0, 0, 0, 0, ipip_sysctl -}, -#ifdef INET6 -{ SOCK_RAW, &inetdomain, IPPROTO_IPV6, PR_ATOMIC|PR_ADDR, - ip4_input, rip_output, 0, rip_ctloutput, - rip_usrreq, /*XXX*/ - 0, 0, 0, 0, -}, -#endif #endif /*NGIF*/ { SOCK_RAW, &inetdomain, IPPROTO_IGMP, PR_ATOMIC|PR_ADDR, igmp_input, rip_output, 0, rip_ctloutput, @@ -275,14 +258,16 @@ struct protosw inetsw[] = { { SOCK_RAW, &inetdomain, IPPROTO_GRE, PR_ATOMIC|PR_ADDR, gre_input, rip_output, 0, rip_ctloutput, gre_usrreq, - 0, 0, 0, 0, gre_sysctl + 0, 0, 0, 0, }, +#endif /* NGRE > 0 */ +#if NMOBILEIP > 0 { SOCK_RAW, &inetdomain, IPPROTO_MOBILE, PR_ATOMIC|PR_ADDR, - gre_mobile_input, rip_output, 0, rip_ctloutput, + mobileip_input, rip_output, 0, rip_ctloutput, rip_usrreq, - 0, 0, 0, 0, ipmobile_sysctl + 0, 0, 0, 0, }, -#endif /* NGRE > 0 */ +#endif /* NMOBILEIP > 0 */ #if NCARP > 0 { SOCK_RAW, &inetdomain, IPPROTO_CARP, PR_ATOMIC|PR_ADDR, carp_proto_input, rip_output, 0, rip_ctloutput, @@ -305,10 +290,10 @@ struct protosw inetsw[] = { }, #endif /* NPF > 0 */ #if NETHERIP > 0 -{ SOCK_RAW, &inetdomain, IPPROTO_ETHERIP, PR_ATOMIC|PR_ADDR, - ip_etherip_input, rip_output, 0, rip_ctloutput, +{ SOCK_RAW, &inetdomain, IPPROTO_ETHERIP, PR_ATOMIC|PR_ADDR, + etherip_input, rip_output, 0, rip_ctloutput, rip_usrreq, - 0, 0, 0, 0, ip_etherip_sysctl + 0, 0, 0, 0, }, #endif /* NETHERIP */ /* raw wildcard */ Index: sys/netinet/ip_ether.c =================================================================== RCS file: /cvs/src/sys/netinet/ip_ether.c,v retrieving revision 1.81 diff -u -p -r1.81 ip_ether.c --- sys/netinet/ip_ether.c 24 Sep 2016 14:51:37 -0000 1.81 +++ sys/netinet/ip_ether.c 12 Dec 2016 05:59:20 -0000 @@ -1,530 +0,0 @@ -/* $OpenBSD: ip_ether.c,v 1.81 2016/09/24 14:51:37 naddy Exp $ */ -/* - * The author of this code is Angelos D. Keromytis (kermit@adk.gr) - * - * This code was written by Angelos D. Keromytis for OpenBSD in October 1999. - * - * Copyright (C) 1999-2001 Angelos D. Keromytis. - * - * Permission to use, copy, and modify this software with or without fee - * is hereby granted, provided that this entire notice is included in - * all copies of any software which is or includes a copy or - * modification of this software. - * You may use this code under the GNU public license if you so wish. Please - * contribute changes back to the authors under this freer than GPL license - * so that we may further the use of strong encryption without limitations to - * all. - * - * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR - * IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY - * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE - * MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR - * PURPOSE. - */ - -/* - * Ethernet-inside-IP processing (RFC3378). - */ - -#include "bridge.h" -#include "pf.h" - -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include -#include -#include -#include - -#include -#include - -#include - -#if NBRIDGE > 0 -#include -#endif -#ifdef MPLS -#include -#endif -#if NPF > 0 -#include -#endif - -#include "bpfilter.h" - -#ifdef ENCDEBUG -#define DPRINTF(x) if (encdebug) printf x -#else -#define DPRINTF(x) -#endif - -#if NBRIDGE > 0 -void etherip_decap(struct mbuf *, int); -#endif -#ifdef MPLS -void mplsip_decap(struct mbuf *, int); -#endif -struct gif_softc *etherip_getgif(struct mbuf *); - -/* - * We can control the acceptance of EtherIP packets by altering the sysctl - * net.inet.etherip.allow value. Zero means drop them, all else is acceptance. - */ -int etherip_allow = 0; - -struct etheripstat etheripstat; - -/* - * etherip_input gets called when we receive an encapsulated packet. - * Only a wrapper for the IPv4 case. - */ -void -etherip_input(struct mbuf *m, ...) -{ - struct ip *ip; - va_list ap; - int iphlen; - - ip = mtod(m, struct ip *); - - va_start(ap, m); - iphlen = va_arg(ap, int); - va_end(ap); - - switch (ip->ip_p) { -#if NBRIDGE > 0 - case IPPROTO_ETHERIP: - /* If we do not accept EtherIP explicitly, drop. */ - if (!etherip_allow && (m->m_flags & (M_AUTH|M_CONF)) == 0) { - DPRINTF(("etherip_input(): dropped due to policy\n")); - etheripstat.etherip_pdrops++; - m_freem(m); - return; - } - etherip_decap(m, iphlen); - return; -#endif -#ifdef MPLS - case IPPROTO_MPLS: - mplsip_decap(m, iphlen); - return; -#endif - default: - DPRINTF(("etherip_input(): dropped, unhandled protocol\n")); - etheripstat.etherip_pdrops++; - m_freem(m); - return; - } -} - -#ifdef INET6 -int -etherip_input6(struct mbuf **mp, int *offp, int proto) -{ - switch (proto) { -#if NBRIDGE > 0 - case IPPROTO_ETHERIP: - /* If we do not accept EtherIP explicitly, drop. */ - if (!etherip_allow && ((*mp)->m_flags & (M_AUTH|M_CONF)) == 0) { - DPRINTF(("etherip_input6(): dropped due to policy\n")); - etheripstat.etherip_pdrops++; - m_freem(*mp); - return IPPROTO_DONE; - } - etherip_decap(*mp, *offp); - return IPPROTO_DONE; -#endif -#ifdef MPLS - case IPPROTO_MPLS: - mplsip_decap(*mp, *offp); - return IPPROTO_DONE; -#endif - default: - DPRINTF(("etherip_input6(): dropped, unhandled protocol\n")); - etheripstat.etherip_pdrops++; - m_freem(*mp); - return IPPROTO_DONE; - } -} -#endif - -#if NBRIDGE > 0 -void -etherip_decap(struct mbuf *m, int iphlen) -{ - struct etherip_header eip; - struct gif_softc *sc; - struct mbuf_list ml = MBUF_LIST_INITIALIZER(); - - etheripstat.etherip_ipackets++; - - /* - * Make sure there's at least an ethernet header's and an EtherIP - * header's of worth of data after the outer IP header. - */ - if (m->m_pkthdr.len < iphlen + sizeof(struct ether_header) + - sizeof(struct etherip_header)) { - DPRINTF(("etherip_input(): encapsulated packet too short\n")); - etheripstat.etherip_hdrops++; - m_freem(m); - return; - } - - /* Verify EtherIP version number */ - m_copydata(m, iphlen, sizeof(struct etherip_header), (caddr_t)&eip); - if (eip.eip_ver == ETHERIP_VERSION) { - /* Correct */ - } else { - DPRINTF(("etherip_input(): received EtherIP version number " - "%d not suppoorted\n", eip.eip_ver)); - etheripstat.etherip_adrops++; - m_freem(m); - return; - } - - /* Finally, the pad value must be zero. */ - if (eip.eip_pad) { - DPRINTF(("etherip_input(): received EtherIP invalid " - "pad value\n")); - etheripstat.etherip_adrops++; - m_freem(m); - return; - } - - /* Make sure the ethernet header at least is in the first mbuf. */ - if (m->m_len < iphlen + sizeof(struct ether_header) + - sizeof(struct etherip_header)) { - if ((m = m_pullup(m, iphlen + sizeof(struct ether_header) + - sizeof(struct etherip_header))) == NULL) { - DPRINTF(("etherip_input(): m_pullup() failed\n")); - etheripstat.etherip_adrops++; - return; - } - } - - sc = etherip_getgif(m); - if (sc == NULL) - return; - if (sc->gif_if.if_bridgeport == NULL) { - DPRINTF(("etherip_input(): interface not part of bridge\n")); - etheripstat.etherip_noifdrops++; - m_freem(m); - return; - } - - /* Chop off the `outer' IP and EtherIP headers and reschedule. */ - m_adj(m, iphlen + sizeof(struct etherip_header)); - - /* Statistics */ - etheripstat.etherip_ibytes += m->m_pkthdr.len; - - /* Reset the flags based on the inner packet */ - m->m_flags &= ~(M_BCAST|M_MCAST|M_AUTH|M_CONF|M_PROTO1); - -#if NPF > 0 - pf_pkt_addr_changed(m); -#endif - - ml_enqueue(&ml, m); - if_input(&sc->gif_if, &ml); -} -#endif - -#ifdef MPLS -void -mplsip_decap(struct mbuf *m, int iphlen) -{ - struct gif_softc *sc; - - etheripstat.etherip_ipackets++; - - /* - * Make sure there's at least one MPLS label worth of data after - * the outer IP header. - */ - if (m->m_pkthdr.len < iphlen + sizeof(struct shim_hdr)) { - DPRINTF(("mplsip_input(): encapsulated packet too short\n")); - etheripstat.etherip_hdrops++; - m_freem(m); - return; - } - - /* Make sure the mpls label at least is in the first mbuf. */ - if (m->m_len < iphlen + sizeof(struct shim_hdr)) { - if ((m = m_pullup(m, iphlen + sizeof(struct shim_hdr))) == - NULL) { - DPRINTF(("mplsip_input(): m_pullup() failed\n")); - etheripstat.etherip_adrops++; - return; - } - } - - sc = etherip_getgif(m); - if (sc == NULL) - return; - - /* Chop off the `outer' IP header and reschedule. */ - m_adj(m, iphlen); - - /* Statistics */ - etheripstat.etherip_ibytes += m->m_pkthdr.len; - - /* Reset the flags based */ - m->m_flags &= ~(M_BCAST|M_MCAST); - -#if NBPFILTER > 0 - if (sc->gif_if.if_bpf) - bpf_mtap_af(sc->gif_if.if_bpf, AF_MPLS, m, BPF_DIRECTION_IN); -#endif - - m->m_pkthdr.ph_ifidx = sc->gif_if.if_index; - m->m_pkthdr.ph_rtableid = sc->gif_if.if_rdomain; -#if NPF > 0 - pf_pkt_addr_changed(m); -#endif - - mpls_input(m); -} -#endif - -struct gif_softc * -etherip_getgif(struct mbuf *m) -{ - union sockaddr_union ssrc, sdst; - struct gif_softc *sc; - u_int8_t v; - - /* Copy the addresses for use later. */ - memset(&ssrc, 0, sizeof(ssrc)); - memset(&sdst, 0, sizeof(sdst)); - - v = *mtod(m, u_int8_t *); - switch (v >> 4) { - case 4: - ssrc.sa.sa_len = sdst.sa.sa_len = sizeof(struct sockaddr_in); - ssrc.sa.sa_family = sdst.sa.sa_family = AF_INET; - m_copydata(m, offsetof(struct ip, ip_src), - sizeof(struct in_addr), - (caddr_t) &ssrc.sin.sin_addr); - m_copydata(m, offsetof(struct ip, ip_dst), - sizeof(struct in_addr), - (caddr_t) &sdst.sin.sin_addr); - break; -#ifdef INET6 - case 6: - ssrc.sa.sa_len = sdst.sa.sa_len = sizeof(struct sockaddr_in6); - ssrc.sa.sa_family = sdst.sa.sa_family = AF_INET6; - m_copydata(m, offsetof(struct ip6_hdr, ip6_src), - sizeof(struct in6_addr), - (caddr_t) &ssrc.sin6.sin6_addr); - m_copydata(m, offsetof(struct ip6_hdr, ip6_dst), - sizeof(struct in6_addr), - (caddr_t) &sdst.sin6.sin6_addr); - break; -#endif /* INET6 */ - default: - DPRINTF(("etherip_input(): invalid protocol %d\n", v)); - m_freem(m); - etheripstat.etherip_hdrops++; - return NULL; - } - - /* Find appropriate gif(4) interface */ - LIST_FOREACH(sc, &gif_softc_list, gif_list) { - if ((sc->gif_psrc == NULL) || - (sc->gif_pdst == NULL) || - !(sc->gif_if.if_flags & (IFF_UP|IFF_RUNNING))) - continue; - - if (!memcmp(sc->gif_psrc, &sdst, sc->gif_psrc->sa_len) && - !memcmp(sc->gif_pdst, &ssrc, sc->gif_pdst->sa_len)) - break; - } - - /* None found. */ - if (sc == NULL) { - DPRINTF(("etherip_input(): no interface found\n")); - etheripstat.etherip_noifdrops++; - m_freem(m); - return NULL; - } - - return sc; -} - -int -etherip_output(struct mbuf *m, struct tdb *tdb, struct mbuf **mp, int proto) -{ - struct ip *ipo; -#ifdef INET6 - struct ip6_hdr *ip6; -#endif /* INET6 */ - struct etherip_header eip; - ushort hlen; - - /* Some address family sanity checks. */ - if ((tdb->tdb_src.sa.sa_family != 0) && - (tdb->tdb_src.sa.sa_family != AF_INET) && - (tdb->tdb_src.sa.sa_family != AF_INET6)) { - DPRINTF(("etherip_output(): IP in protocol-family <%d> " - "attempted, aborting", tdb->tdb_src.sa.sa_family)); - etheripstat.etherip_adrops++; - m_freem(m); - return EINVAL; - } - - if ((tdb->tdb_dst.sa.sa_family != AF_INET) && - (tdb->tdb_dst.sa.sa_family != AF_INET6)) { - DPRINTF(("etherip_output(): IP in protocol-family <%d> " - "attempted, aborting", tdb->tdb_dst.sa.sa_family)); - etheripstat.etherip_adrops++; - m_freem(m); - return EINVAL; - } - - if (tdb->tdb_dst.sa.sa_family != tdb->tdb_src.sa.sa_family) { - DPRINTF(("etherip_output(): mismatch in tunnel source and " - "destination address protocol families (%d/%d), aborting", - tdb->tdb_src.sa.sa_family, tdb->tdb_dst.sa.sa_family)); - etheripstat.etherip_adrops++; - m_freem(m); - return EINVAL; - } - - switch (tdb->tdb_dst.sa.sa_family) { - case AF_INET: - hlen = sizeof(struct ip); - break; -#ifdef INET6 - case AF_INET6: - hlen = sizeof(struct ip6_hdr); - break; -#endif /* INET6 */ - default: - DPRINTF(("etherip_output(): unsupported tunnel protocol " - "family <%d>, aborting", tdb->tdb_dst.sa.sa_family)); - etheripstat.etherip_adrops++; - m_freem(m); - return EINVAL; - } - - if (proto == IPPROTO_ETHERIP) - /* Don't forget the EtherIP header. */ - hlen += sizeof(struct etherip_header); - - M_PREPEND(m, hlen, M_DONTWAIT); - if (m == NULL) { - DPRINTF(("etherip_output(): M_PREPEND failed\n")); - etheripstat.etherip_adrops++; - return ENOBUFS; - } - - /* - * Normalize mbuf so that it can be reinjected into higherlevel - * output functions (alignment also required in this function). - */ - if ((long)mtod(m, caddr_t) & 0x03) { - int off = (long)mtod(m, caddr_t) & 0x03; - if (M_LEADINGSPACE(m) < off) - panic("etherip_output: no space for align fixup"); - m->m_data -= off; - memmove(mtod(m, caddr_t), mtod(m, caddr_t) + off, m->m_len); - } - - /* Statistics */ - etheripstat.etherip_opackets++; - etheripstat.etherip_obytes += m->m_pkthdr.len - hlen; - - switch (tdb->tdb_dst.sa.sa_family) { - case AF_INET: - ipo = mtod(m, struct ip *); - - ipo->ip_v = IPVERSION; - ipo->ip_hl = 5; - ipo->ip_len = htons(m->m_pkthdr.len); - ipo->ip_ttl = ip_defttl; - ipo->ip_p = proto; - ipo->ip_tos = 0; - ipo->ip_off = 0; - ipo->ip_sum = 0; - ipo->ip_id = htons(ip_randomid()); - - /* - * We should be keeping tunnel soft-state and send back - * ICMPs as needed. - */ - - ipo->ip_src = tdb->tdb_src.sin.sin_addr; - ipo->ip_dst = tdb->tdb_dst.sin.sin_addr; - break; -#ifdef INET6 - case AF_INET6: - ip6 = mtod(m, struct ip6_hdr *); - - ip6->ip6_flow = 0; - ip6->ip6_nxt = proto; - ip6->ip6_vfc &= ~IPV6_VERSION_MASK; - ip6->ip6_vfc |= IPV6_VERSION; - ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(*ip6)); - ip6->ip6_hlim = ip_defttl; - ip6->ip6_dst = tdb->tdb_dst.sin6.sin6_addr; - ip6->ip6_src = tdb->tdb_src.sin6.sin6_addr; - break; -#endif /* INET6 */ - } - - if (proto == IPPROTO_ETHERIP) { - /* - * OpenBSD developers convinced IETF folk to create a - * "version 3" protocol which would solve a byte order - * problem -- our discussion placed "3" into the first byte. - * They knew we were starting to deploy this. When IETF - * published the standard this had changed to a nibble... - * but they failed to inform us. Awesome. - * - * We will transition step by step to the new model. - */ - eip.eip_ver = ETHERIP_VERSION; - eip.eip_res = 0; - eip.eip_pad = 0; - m_copyback(m, hlen - sizeof(struct etherip_header), - sizeof(struct etherip_header), &eip, M_NOWAIT); - } - - *mp = m; - - return 0; -} - -int -etherip_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, - void *newp, size_t newlen) -{ - /* All sysctl names at this level are terminal. */ - if (namelen != 1) - return (ENOTDIR); - - switch (name[0]) { - case ETHERIPCTL_ALLOW: - return (sysctl_int(oldp, oldlenp, newp, newlen, - ðerip_allow)); - case ETHERIPCTL_STATS: - if (newp != NULL) - return (EPERM); - return (sysctl_struct(oldp, oldlenp, newp, newlen, - ðeripstat, sizeof(etheripstat))); - default: - return (ENOPROTOOPT); - } - /* NOTREACHED */ -} Index: sys/netinet/ip_ether.h =================================================================== RCS file: /cvs/src/sys/netinet/ip_ether.h,v retrieving revision 1.18 diff -u -p -r1.18 ip_ether.h --- sys/netinet/ip_ether.h 14 Jul 2014 12:18:30 -0000 1.18 +++ sys/netinet/ip_ether.h 12 Dec 2016 05:59:20 -0000 @@ -1,84 +0,0 @@ -/* $OpenBSD: ip_ether.h,v 1.18 2014/07/14 12:18:30 deraadt Exp $ */ -/* - * The author of this code is Angelos D. Keromytis (angelos@adk.gr) - * - * This code was written by Angelos D. Keromytis in October 1999. - * - * Copyright (C) 1999-2001 Angelos D. Keromytis. - * - * Permission to use, copy, and modify this software with or without fee - * is hereby granted, provided that this entire notice is included in - * all copies of any software which is or includes a copy or - * modification of this software. - * You may use this code under the GNU public license if you so wish. Please - * contribute changes back to the authors under this freer than GPL license - * so that we may further the use of strong encryption without limitations to - * all. - * - * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR - * IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY - * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE - * MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR - * PURPOSE. - */ - -#ifndef _NETINET_IP_ETHER_H_ -#define _NETINET_IP_ETHER_H_ - -/* - * Ethernet-inside-IP processing. - */ - -struct etheripstat { - u_int32_t etherip_hdrops; /* packet shorter than header shows */ - u_int32_t etherip_qfull; /* bridge queue full, packet dropped */ - u_int32_t etherip_noifdrops; /* no interface/bridge information */ - u_int32_t etherip_pdrops; /* packet dropped due to policy */ - u_int32_t etherip_adrops; /* all other drops */ - u_int32_t etherip_ipackets; /* total input packets */ - u_int32_t etherip_opackets; /* total output packets */ - u_int64_t etherip_ibytes; /* input bytes */ - u_int64_t etherip_obytes; /* output bytes */ -}; - -struct etherip_header { -#if BYTE_ORDER == LITTLE_ENDIAN - u_int eip_res:4; /* reserved */ - u_int eip_ver:4; /* version */ -#endif -#if BYTE_ORDER == BIG_ENDIAN - u_int eip_ver:4; /* version */ - u_int eip_res:4; /* reserved */ -#endif - u_int8_t eip_pad; /* required padding byte */ -} __packed; - -#define ETHERIP_VERSION 0x03 - -/* - * Names for Ether-IP sysctl objects - */ -#define ETHERIPCTL_ALLOW 1 /* accept incoming EtherIP packets */ -#define ETHERIPCTL_STATS 2 /* etherip stats */ -#define ETHERIPCTL_MAXID 3 - -#define ETHERIPCTL_NAMES { \ - { 0, 0 }, \ - { "allow", CTLTYPE_INT }, \ - { "stats", CTLTYPE_STRUCT }, \ -} - -#ifdef _KERNEL -struct tdb; - -int etherip_output(struct mbuf *, struct tdb *, struct mbuf **, int); -void etherip_input(struct mbuf *, ...); -#ifdef INET6 -int etherip_input6(struct mbuf **, int *, int); -#endif -int etherip_sysctl(int *, u_int, void *, size_t *, void *, size_t); - -extern int etherip_allow; -extern struct etheripstat etheripstat; -#endif /* _KERNEL */ -#endif /* _NETINET_IP_ETHER_H_ */ Index: sys/netinet/ip_etherip.c =================================================================== RCS file: sys/netinet/ip_etherip.c diff -N sys/netinet/ip_etherip.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/netinet/ip_etherip.c 12 Dec 2016 05:59:20 -0000 @@ -0,0 +1,62 @@ +/* $OpenBSD$ */ + +/* + * Copyright (c) 2016 David Gwynne + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include + +void +etherip_input(struct mbuf *m, ...) +{ + int hlen; + va_list ap; + + va_start(ap, m); + hlen = va_arg(ap, int); + va_end(ap); + + m = etherip_ip4_input(m, hlen); + if (m != NULL) + rip_input(m, hlen, IPPROTO_ETHERIP); +} + +#ifdef INET6 +int +etherip_input6(struct mbuf **mp, int *offp, int proto) +{ + *mp = etherip_ip6_input(*mp, *offp); + if (*mp != NULL) + return (rip6_input(mp, offp, proto)); + + return (IPPROTO_DONE); +} +#endif Index: sys/netinet/ip_etherip.h =================================================================== RCS file: sys/netinet/ip_etherip.h diff -N sys/netinet/ip_etherip.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/netinet/ip_etherip.h 12 Dec 2016 05:59:20 -0000 @@ -0,0 +1,30 @@ +/* $OpenBSD$ */ + +/* + * Copyright (c) 2016 David Gwynne + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef _NETINET_IP_ETHERIP_H_ +#define _NETINET_IP_ETHERIP_H_ + + +#ifdef _KERNEL +void etherip_input(struct mbuf *, ...); +#ifdef INET6 +int etherip_input6(struct mbuf **, int *, int); +#endif +#endif /* _KERNEL */ + +#endif /* _NETINET_IP_ETHERIP_H_ */ Index: sys/netinet/ip_gif.c =================================================================== RCS file: sys/netinet/ip_gif.c diff -N sys/netinet/ip_gif.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/netinet/ip_gif.c 12 Dec 2016 05:59:20 -0000 @@ -0,0 +1,62 @@ +/* $OpenBSD$ */ + +/* + * Copyright (c) 2016 David Gwynne + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include + +void +gif_input(struct mbuf *m, ...) +{ + int hlen; + va_list ap; + + va_start(ap, m); + hlen = va_arg(ap, int); + va_end(ap); + + m = gif_ip4_input(m, hlen); + if (m != NULL) + rip_input(m, hlen); +} + +#ifdef INET6 +int +gif_input6(struct mbuf **mp, int *offp, int proto) +{ + *mp = gif_ip6_input(*mp, *offp, proto); + if (*mp != NULL) + return (rip6_input(mp, offp, proto)); + + return (IPPROTO_DONE); +} +#endif Index: sys/netinet/ip_gif.h =================================================================== RCS file: sys/netinet/ip_gif.h diff -N sys/netinet/ip_gif.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/netinet/ip_gif.h 12 Dec 2016 05:59:20 -0000 @@ -0,0 +1,29 @@ +/* $OpenBSD$ */ + +/* + * Copyright (c) 2016 David Gwynne + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef _NETINET_IP_GIF_H_ +#define _NETINET_IP_GIF_H_ + +#ifdef _KERNEL +void gif_input(struct mbuf *, ...); +#ifdef INET6 +int gif_input6(struct mbuf **, int *, int); +#endif +#endif /* _KERNEL */ + +#endif /* _NETINET_IP_GIF_H_ */ Index: sys/netinet/ip_gre.c =================================================================== RCS file: /cvs/src/sys/netinet/ip_gre.c,v retrieving revision 1.59 diff -u -p -r1.59 ip_gre.c --- sys/netinet/ip_gre.c 4 Mar 2016 22:38:23 -0000 1.59 +++ sys/netinet/ip_gre.c 12 Dec 2016 05:59:20 -0000 @@ -30,16 +30,6 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/* - * decapsulate tunneled packets and send them on - * output half is in net/if_gre.[ch] - * This currently handles IPPROTO_GRE, IPPROTO_MOBILE - */ - - -#include "gre.h" -#if NGRE > 0 - #include #include #include @@ -48,366 +38,40 @@ #include #include #include -#include #include -#include #include #include #include #include -#include #include -#ifdef MPLS -#include -#endif - -#include "bpfilter.h" -#include "pf.h" - -#if NPF > 0 -#include -#endif +#include #ifdef PIPEX #include #endif -/* Needs IP headers. */ -#include - -struct gre_softc *gre_lookup(struct mbuf *, u_int8_t); -int gre_input2(struct mbuf *, int, u_char); - -/* - * Decapsulate. - * Does the real work and is called from gre_input() (above) - * returns 0 if packet is not yet processed - * and 1 if it needs no further processing - * proto is the protocol number of the "calling" foo_input() - * routine. - */ - -int -gre_input2(struct mbuf *m, int hlen, u_char proto) -{ - struct greip *gip; - struct niqueue *ifq; - struct gre_softc *sc; - u_short flags; - u_int af; - - if ((sc = gre_lookup(m, proto)) == NULL) { - /* No matching tunnel or tunnel is down. */ - return (0); - } - - if (m->m_len < sizeof(*gip)) { - m = m_pullup(m, sizeof(*gip)); - if (m == NULL) - return (ENOBUFS); - } - gip = mtod(m, struct greip *); - - m->m_pkthdr.ph_ifidx = sc->sc_if.if_index; - m->m_pkthdr.ph_rtableid = sc->sc_if.if_rdomain; - - sc->sc_if.if_ipackets++; - sc->sc_if.if_ibytes += m->m_pkthdr.len; - - switch (proto) { - case IPPROTO_GRE: - hlen += sizeof (struct gre_h); - - /* process GRE flags as packet can be of variable len */ - flags = ntohs(gip->gi_flags); - - /* Checksum & Offset are present */ - if ((flags & GRE_CP) | (flags & GRE_RP)) - hlen += 4; - - /* We don't support routing fields (variable length) */ - if (flags & GRE_RP) - return (0); - - if (flags & GRE_KP) - hlen += 4; - - if (flags & GRE_SP) - hlen += 4; - - switch (ntohs(gip->gi_ptype)) { /* ethertypes */ - case GREPROTO_WCCP: - /* WCCP/GRE: - * So far as I can see (and test) it seems that Cisco's WCCP - * GRE tunnel is precisely a IP-in-GRE tunnel that differs - * only in its protocol number. At least, it works for me. - * - * The Internet Drafts can be found if you look for - * the following: - * draft-forster-wrec-wccp-v1-00.txt - * draft-wilson-wrec-wccp-v2-01.txt - * - * So yes, we're doing a fall-through (unless, of course, - * net.inet.gre.wccp is 0). - */ - if (!gre_wccp) - return (0); - /* - * For WCCPv2, additionally skip the 4 byte - * redirect header. - */ - if (gre_wccp == 2) - hlen += 4; - case ETHERTYPE_IP: /* shouldn't need a schednetisr(), as */ - ifq = &ipintrq; /* we are in ip_input */ - af = AF_INET; - break; -#ifdef INET6 - case ETHERTYPE_IPV6: - ifq = &ip6intrq; - af = AF_INET6; - break; -#endif - case 0: - /* keepalive reply, retrigger hold timer */ - gre_recv_keepalive(sc); - m_freem(m); - return (1); -#ifdef MPLS - case ETHERTYPE_MPLS: - case ETHERTYPE_MPLS_MCAST: - mpls_input(m); - return (1); -#endif - default: /* others not yet supported */ - return (0); - } - break; - default: - /* others not yet supported */ - return (0); - } - - if (hlen > m->m_pkthdr.len) { - m_freem(m); - return (EINVAL); - } - m_adj(m, hlen); - -#if NBPFILTER > 0 - if (sc->sc_if.if_bpf) - bpf_mtap_af(sc->sc_if.if_bpf, af, m, BPF_DIRECTION_IN); -#endif - -#if NPF > 0 - pf_pkt_addr_changed(m); -#endif - - niq_enqueue(ifq, m); - - return (1); /* packet is done, no further processing needed */ -} - -/* - * Decapsulate a packet and feed it back through ip_input (this - * routine is called whenever IP gets a packet with proto type - * IPPROTO_GRE and a local destination address). - */ void gre_input(struct mbuf *m, ...) { - int hlen, ret; - va_list ap; - - va_start(ap, m); - hlen = va_arg(ap, int); - va_end(ap); - - if (!gre_allow) { - m_freem(m); - return; - } - -#ifdef PIPEX - if (pipex_enable) { - struct pipex_session *session; - - if ((session = pipex_pptp_lookup_session(m)) != NULL) { - if (pipex_pptp_input(m, session) == NULL) - return; - } - } -#endif - - ret = gre_input2(m, hlen, IPPROTO_GRE); - /* - * ret == 0: packet not processed, but input from here - * means no matching tunnel that is up is found. - * we inject it to raw ip socket to see if anyone picks it up. - * possible that we received a WCCPv1-style GRE packet - * but we're not set to accept them. - */ - if (!ret) - rip_input(m, hlen, IPPROTO_GRE); -} - -/* - * Input routine for IPPROTO_MOBILE. - * This is a little bit different from the other modes, as the - * encapsulating header was not prepended, but instead inserted - * between IP header and payload. - */ - -void -gre_mobile_input(struct mbuf *m, ...) -{ - struct ip *ip; - struct mobip_h *mip; - struct gre_softc *sc; int hlen; va_list ap; - u_char osrc = 0; - int msiz; va_start(ap, m); hlen = va_arg(ap, int); va_end(ap); - if (!ip_mobile_allow) { - m_freem(m); - return; - } - - if ((sc = gre_lookup(m, IPPROTO_MOBILE)) == NULL) { - /* No matching tunnel or tunnel is down. */ - m_freem(m); - return; - } - - if (m->m_len < sizeof(*mip)) { - m = m_pullup(m, sizeof(*mip)); - if (m == NULL) - return; - } - ip = mtod(m, struct ip *); - mip = mtod(m, struct mobip_h *); - - m->m_pkthdr.ph_ifidx = sc->sc_if.if_index; - - sc->sc_if.if_ipackets++; - sc->sc_if.if_ibytes += m->m_pkthdr.len; - - if (ntohs(mip->mh.proto) & MOB_H_SBIT) { - osrc = 1; - msiz = MOB_H_SIZ_L; - mip->mi.ip_src.s_addr = mip->mh.osrc; - } else - msiz = MOB_H_SIZ_S; - - if (m->m_len < (ip->ip_hl << 2) + msiz) { - m = m_pullup(m, (ip->ip_hl << 2) + msiz); - if (m == NULL) - return; - ip = mtod(m, struct ip *); - mip = mtod(m, struct mobip_h *); - } - - mip->mi.ip_dst.s_addr = mip->mh.odst; - mip->mi.ip_p = (ntohs(mip->mh.proto) >> 8); - - if (gre_in_cksum((u_short *) &mip->mh, msiz) != 0) { - m_freem(m); - return; - } - - memmove(ip + (ip->ip_hl << 2), ip + (ip->ip_hl << 2) + msiz, - m->m_len - msiz - (ip->ip_hl << 2)); - - m->m_len -= msiz; - ip->ip_len = htons(ntohs(ip->ip_len) - msiz); - m->m_pkthdr.len -= msiz; - - ip->ip_sum = 0; - ip->ip_sum = in_cksum(m,(ip->ip_hl << 2)); - -#if NBPFILTER > 0 - if (sc->sc_if.if_bpf) - bpf_mtap_af(sc->sc_if.if_bpf, AF_INET, m, BPF_DIRECTION_IN); -#endif - -#if NPF > 0 - pf_pkt_addr_changed(m); -#endif - - niq_enqueue(&ipintrq, m); -} - -/* - * Find the gre interface associated with our src/dst/proto set. - */ -struct gre_softc * -gre_lookup(struct mbuf *m, u_int8_t proto) -{ - struct ip *ip = mtod(m, struct ip *); - struct gre_softc *sc; - - LIST_FOREACH(sc, &gre_softc_list, sc_list) { - if ((sc->g_dst.s_addr == ip->ip_src.s_addr) && - (sc->g_src.s_addr == ip->ip_dst.s_addr) && - (sc->g_proto == proto) && - (rtable_l2(sc->g_rtableid) == - rtable_l2(m->m_pkthdr.ph_rtableid)) && - ((sc->sc_if.if_flags & IFF_UP) != 0)) - return (sc); - } - - return (NULL); -} - -int -gre_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, - size_t newlen) -{ - /* All sysctl names at this level are terminal. */ - if (namelen != 1) - return (ENOTDIR); - - switch (name[0]) { - case GRECTL_ALLOW: - return (sysctl_int(oldp, oldlenp, newp, newlen, &gre_allow)); - case GRECTL_WCCP: - return (sysctl_int(oldp, oldlenp, newp, newlen, &gre_wccp)); - default: - return (ENOPROTOOPT); - } - /* NOTREACHED */ -} - -int -ipmobile_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, - void *newp, size_t newlen) -{ - /* All sysctl names at this level are terminal. */ - if (namelen != 1) - return (ENOTDIR); - - switch (name[0]) { - case MOBILEIPCTL_ALLOW: - return (sysctl_int(oldp, oldlenp, newp, newlen, - &ip_mobile_allow)); - default: - return (ENOPROTOOPT); - } - /* NOTREACHED */ + m = gre_if_input(m, hlen); + if (m != NULL) + rip_input(m, hlen, IPPROTO_GRE); } int gre_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, struct mbuf *control, struct proc *p) { -#ifdef PIPEX +#ifdef PIPEX struct inpcb *inp = sotoinpcb(so); if (inp != NULL && inp->inp_pipex && req == PRU_SEND) { @@ -440,4 +104,15 @@ gre_usrreq(struct socket *so, int req, s #endif return rip_usrreq(so, req, m, nam, control, p); } -#endif /* if NGRE > 0 */ + +#ifdef INET6 +int +gre_input6(struct mbuf **mp, int *offp, int proto) +{ + *mp = gre_if_input6(*mp, *offp); + if (*mp != NULL) + return (rip6_input(mp, offp, proto)); + + return (IPPROTO_DONE); +} +#endif Index: sys/netinet/ip_gre.h =================================================================== RCS file: /cvs/src/sys/netinet/ip_gre.h,v retrieving revision 1.9 diff -u -p -r1.9 ip_gre.h --- sys/netinet/ip_gre.h 12 Jan 2010 23:33:24 -0000 1.9 +++ sys/netinet/ip_gre.h 12 Dec 2016 05:59:20 -0000 @@ -52,23 +52,12 @@ { "wccp", CTLTYPE_INT }, \ } -/* - * Names for MobileIP sysctl objects - */ -#define MOBILEIPCTL_ALLOW 1 /* accept incoming MobileIP packets */ -#define MOBILEIPCTL_MAXID 2 - -#define MOBILEIPCTL_NAMES { \ - { 0, 0 }, \ - { "allow", CTLTYPE_INT }, \ -} - #ifdef _KERNEL -void gre_input(struct mbuf *, ...); -void gre_mobile_input(struct mbuf *, ...); +void gre_input(struct mbuf *, ...); +#ifdef INET6 +int gre_input6(struct mbuf **, int *, int); +#endif -int ipmobile_sysctl(int *, u_int, void *, size_t *, void *, size_t); -int gre_sysctl(int *, u_int, void *, size_t *, void *, size_t); int gre_usrreq(struct socket *, int, struct mbuf *, struct mbuf *, struct mbuf *, struct proc *); #endif /* _KERNEL */ Index: sys/netinet/ip_mobileip.c =================================================================== RCS file: sys/netinet/ip_mobileip.c diff -N sys/netinet/ip_mobileip.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/netinet/ip_mobileip.c 12 Dec 2016 05:59:20 -0000 @@ -0,0 +1,64 @@ +/* $OpenBSD: ip_gre.c,v 1.59 2016/03/04 22:38:23 sashan Exp $ */ +/* $NetBSD: ip_gre.c,v 1.9 1999/10/25 19:18:11 drochner Exp $ */ + +/* + * Copyright (c) 1998 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Heiko W.Rupp + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include + +void +mobileip_input(struct mbuf *m, ...) +{ + int hlen; + va_list ap; + + va_start(ap, m); + hlen = va_arg(ap, int); + va_end(ap); + + m = mobileip_if_input(m, hlen); + if (m != NULL) + rip_input(m, hlen, IPPROTO_MOBILE); +} Index: sys/netinet/ip_mobileip.h =================================================================== RCS file: sys/netinet/ip_mobileip.h diff -N sys/netinet/ip_mobileip.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/netinet/ip_mobileip.h 12 Dec 2016 05:59:20 -0000 @@ -0,0 +1,26 @@ +/* $OpenBSD$ */ + +/* + * Copyright (c) 2016 David Gwynne + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef _NETINET_IP_MOBILEIP_H_ +#define _NETINET_IP_MOBILEIP_H_ + +#ifdef _KERNEL +void mobileip_input(struct mbuf *, ...); +#endif /* _KERNEL */ + +#endif /* _NETINET_IP_MOBILEIP_H_ */ Index: sys/netinet6/in6_proto.c =================================================================== RCS file: /cvs/src/sys/netinet6/in6_proto.c,v retrieving revision 1.86 diff -u -p -r1.86 in6_proto.c --- sys/netinet6/in6_proto.c 1 Jun 2016 11:11:44 -0000 1.86 +++ sys/netinet6/in6_proto.c 12 Dec 2016 05:59:20 -0000 @@ -89,7 +89,6 @@ #include #include #include -#include #ifdef PIM #include @@ -101,8 +100,12 @@ #include "gif.h" #if NGIF > 0 -#include -#include +#include +#endif + +#include "gre.h" +#if NGRE > 0 +#include #endif #include "carp.h" @@ -117,7 +120,7 @@ #include "etherip.h" #if NETHERIP > 0 -#include +#include #endif /* @@ -191,33 +194,31 @@ struct ip6protosw inet6sw[] = { }, #endif /* IPSEC */ #if NGIF > 0 -{ SOCK_RAW, &inet6domain, IPPROTO_ETHERIP,PR_ATOMIC|PR_ADDR, - etherip_input6, rip6_output, 0, rip6_ctloutput, - rip6_usrreq, - 0, 0, 0, 0, etherip_sysctl -}, { SOCK_RAW, &inet6domain, IPPROTO_IPV6, PR_ATOMIC|PR_ADDR, - in6_gif_input, rip6_output, 0, rip6_ctloutput, + gif_input6, rip6_output, 0, rip6_ctloutput, rip6_usrreq, /* XXX */ 0, 0, 0, 0, }, { SOCK_RAW, &inet6domain, IPPROTO_IPV4, PR_ATOMIC|PR_ADDR, - in6_gif_input, rip6_output, 0, rip6_ctloutput, + gif_input6, rip6_output, 0, rip6_ctloutput, rip6_usrreq, /* XXX */ 0, 0, 0, 0, }, -#else /* NGIF */ -{ SOCK_RAW, &inet6domain, IPPROTO_IPV6, PR_ATOMIC|PR_ADDR, - ip4_input6, rip6_output, 0, rip6_ctloutput, - rip6_usrreq, /* XXX */ - 0, 0, 0, 0, ipip_sysctl -}, +#ifdef MPLS { SOCK_RAW, &inet6domain, IPPROTO_IPV4, PR_ATOMIC|PR_ADDR, - ip4_input6, rip6_output, 0, rip6_ctloutput, + gif_input6, rip6_output, 0, rip6_ctloutput, rip6_usrreq, /* XXX */ 0, 0, 0, 0, }, -#endif /* GIF */ +#endif +#endif /* NGIF */ +#if NGRE > 0 +{ SOCK_RAW, &inet6domain, IPPROTO_GRE, PR_ATOMIC|PR_ADDR, + gre_input6, rip6_output, 0, rip6_ctloutput, + rip6_usrreq, + 0, 0, 0, 0, +}, +#endif /* NGRE */ #ifdef PIM { SOCK_RAW, &inet6domain, IPPROTO_PIM, PR_ATOMIC|PR_ADDR, pim6_input, rip6_output, 0, rip6_ctloutput, @@ -241,9 +242,9 @@ struct ip6protosw inet6sw[] = { #endif /* NPF > 0 */ #if NETHERIP > 0 { SOCK_RAW, &inet6domain, IPPROTO_ETHERIP,PR_ATOMIC|PR_ADDR, - ip6_etherip_input, rip6_output, 0, rip6_ctloutput, + etherip_input6, rip6_output, 0, rip6_ctloutput, rip6_usrreq, - 0, 0, 0, 0, ip_etherip_sysctl + 0, 0, 0, 0, }, #endif /* NETHERIP */ /* raw wildcard */ Index: usr.sbin/tcpdump/print-etherip.c =================================================================== RCS file: /cvs/src/usr.sbin/tcpdump/print-etherip.c,v retrieving revision 1.8 diff -u -p -r1.8 print-etherip.c --- usr.sbin/tcpdump/print-etherip.c 16 Jan 2015 06:40:21 -0000 1.8 +++ usr.sbin/tcpdump/print-etherip.c 12 Dec 2016 05:59:20 -0000 @@ -41,7 +41,8 @@ #include #include #include -#include + +#include #include #include Index: usr.sbin/tcpdump/print-gre.c =================================================================== RCS file: /cvs/src/usr.sbin/tcpdump/print-gre.c,v retrieving revision 1.11 diff -u -p -r1.11 print-gre.c --- usr.sbin/tcpdump/print-gre.c 5 Nov 2015 11:55:21 -0000 1.11 +++ usr.sbin/tcpdump/print-gre.c 12 Dec 2016 05:59:20 -0000 @@ -39,6 +39,8 @@ #include #include +#include + #include #include @@ -55,13 +57,15 @@ #define GRE_AP 0x0080 /* acknowledgment# present */ #define GRE_VERS 0x0007 /* protocol version */ -#define GREPROTO_IP 0x0800 /* IP */ -#define GREPROTO_PPP 0x880b /* PPTP */ - /* source route entry types */ #define GRESRE_IP 0x0800 /* IP */ #define GRESRE_ASN 0xfffe /* ASN */ +#define NVGRE_VSID_MASK 0xffffff00U +#define NVGRE_VSID_SHIFT 8 +#define NVGRE_FLOWID_MASK 0x000000ffU +#define NVGRE_FLOWID_SHIFT 0 + void gre_print_0(const u_char *, u_int); void gre_print_1(const u_char *, u_int); void gre_sre_print(u_int16_t, u_int8_t, u_int8_t, const u_char *, u_int); @@ -82,14 +86,17 @@ gre_print(const u_char *bp, u_int length } vers = EXTRACT_16BITS(bp) & GRE_VERS; - if (vers == 0) + switch (vers) { + case 0: gre_print_0(bp, len); - else if (vers == 1) + break; + case 1: gre_print_1(bp, len); - else + break; + default: printf("gre-unknown-version=%u", vers); - return; - + break; + } } void @@ -114,6 +121,8 @@ gre_print_0(const u_char *bp, u_int leng if (len < 2) goto trunc; prot = EXTRACT_16BITS(bp); + printf("%s", etherproto_string(prot)); + len -= 2; bp += 2; @@ -121,21 +130,32 @@ gre_print_0(const u_char *bp, u_int leng if (len < 2) goto trunc; if (vflag) - printf("sum 0x%x ", EXTRACT_16BITS(bp)); + printf(" sum 0x%x", EXTRACT_16BITS(bp)); bp += 2; len -= 2; if (len < 2) goto trunc; - printf("off 0x%x ", EXTRACT_16BITS(bp)); + printf(" off 0x%x", EXTRACT_16BITS(bp)); bp += 2; len -= 2; } if (flags & GRE_KP) { + uint32_t key, vsid; + if (len < 4) goto trunc; - printf("key=0x%x ", EXTRACT_32BITS(bp)); + key = EXTRACT_32BITS(bp); + + /* maybe NVGRE? */ + if (flags == (GRE_KP | 0) && prot == ETHERTYPE_TRANSETHER) { + vsid = (key & NVGRE_VSID_MASK) >> NVGRE_VSID_SHIFT; + printf(" NVGRE vsid=%u (0x%x)+flowid=0x%02x /", + vsid, vsid, + (key & NVGRE_FLOWID_MASK) >> NVGRE_FLOWID_SHIFT); + } + printf(" key=%u (0x%x)", key, key); bp += 4; len -= 4; } @@ -143,7 +163,7 @@ gre_print_0(const u_char *bp, u_int leng if (flags & GRE_SP) { if (len < 4) goto trunc; - printf("seq %u ", EXTRACT_32BITS(bp)); + printf(" seq %u", EXTRACT_32BITS(bp)); bp += 4; len -= 4; } @@ -174,10 +194,21 @@ gre_print_0(const u_char *bp, u_int leng } } + printf(": "); + switch (prot) { - case GREPROTO_IP: + case ETHERTYPE_IP: ip_print(bp, len); break; + case ETHERTYPE_IPV6: + ip6_print(bp, len); + break; + case ETHERTYPE_MPLS: + mpls_print(bp, len); + break; + case ETHERTYPE_TRANSETHER: + ether_print(bp, len); + break; default: printf("gre-proto-0x%x", prot); } @@ -198,7 +229,7 @@ gre_print_1(const u_char *bp, u_int leng bp += 2; if (vflag) { - printf("[%s%s%s%s%s%s] ", + printf("[%s%s%s%s%s%s]", (flags & GRE_CP) ? "C" : "", (flags & GRE_RP) ? "R" : "", (flags & GRE_KP) ? "K" : "", @@ -214,19 +245,19 @@ gre_print_1(const u_char *bp, u_int leng bp += 2; if (flags & GRE_CP) { - printf("cpset!"); + printf(" cpset!"); return; } if (flags & GRE_RP) { - printf("rpset!"); + printf(" rpset!"); return; } if ((flags & GRE_KP) == 0) { - printf("kpunset!"); + printf(" kpunset!"); return; } if (flags & GRE_sP) { - printf("spset!"); + printf(" spset!"); return; } @@ -236,7 +267,7 @@ gre_print_1(const u_char *bp, u_int leng if (len < 4) goto trunc; k = EXTRACT_32BITS(bp); - printf("call %d ", k & 0xffff); + printf(" call %d", k & 0xffff); len -= 4; bp += 4; } @@ -244,7 +275,7 @@ gre_print_1(const u_char *bp, u_int leng if (flags & GRE_SP) { if (len < 4) goto trunc; - printf("seq %u ", EXTRACT_32BITS(bp)); + printf(" seq %u", EXTRACT_32BITS(bp)); bp += 4; len -= 4; } @@ -252,18 +283,20 @@ gre_print_1(const u_char *bp, u_int leng if (flags & GRE_AP) { if (len < 4) goto trunc; - printf("ack %u ", EXTRACT_32BITS(bp)); + printf(" ack %u", EXTRACT_32BITS(bp)); bp += 4; len -= 4; } if ((flags & GRE_SP) == 0) { - printf("no-payload"); + printf(" no-payload"); return; } + printf(": "); + switch (prot) { - case GREPROTO_PPP: + case ETHERTYPE_PPP: printf("gre-ppp-payload"); break; default: @@ -282,17 +315,17 @@ gre_sre_print(u_int16_t af, u_int8_t sre { switch (af) { case GRESRE_IP: - printf("(rtaf=ip"); + printf(" (rtaf=ip"); gre_sre_ip_print(sreoff, srelen, bp, len); - printf(") "); + printf(")"); break; case GRESRE_ASN: - printf("(rtaf=asn"); + printf(" (rtaf=asn"); gre_sre_asn_print(sreoff, srelen, bp, len); - printf(") "); + printf(")"); break; default: - printf("(rtaf=0x%x) ", af); + printf(" (rtaf=0x%x)", af); } } void Index: usr.sbin/tcpdump/print-ip6.c =================================================================== RCS file: /cvs/src/usr.sbin/tcpdump/print-ip6.c,v retrieving revision 1.23 diff -u -p -r1.23 print-ip6.c --- usr.sbin/tcpdump/print-ip6.c 16 Nov 2015 00:16:39 -0000 1.23 +++ usr.sbin/tcpdump/print-ip6.c 12 Dec 2016 05:59:20 -0000 @@ -184,6 +184,18 @@ ip6_print(const u_char *bp, u_int length if (! vflag) printf(" (encap)"); goto end; + +#ifndef IPPROTO_GRE +#define IPPROTO_GRE 47 +#endif + case IPPROTO_GRE: + gre_print(cp, len); + if (! vflag) { + printf(" (gre encap)"); + goto out; + } + goto end; + case IPPROTO_NONE: (void)printf("no next header"); goto end;