Index: share/man/man4/frame.4 =================================================================== RCS file: share/man/man4/frame.4 diff -N share/man/man4/frame.4 --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ share/man/man4/frame.4 12 Dec 2024 12:07:48 -0000 @@ -0,0 +1,244 @@ +.\" $OpenBSD$ +.\" +.\" Copyright (c) 2024 David Gwynne +.\" +.\" Permission to use, copy, modify, and distribute this software for any +.\" purpose with or without fee is hereby granted, provided that the above +.\" copyright notice and this permission notice appear in all copies. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +.\" +.Dd $Mdocdate$ +.Dt FRAME 4 +.Os +.Sh NAME +.Nm frame +.Nd frame protocol family +.Sh SYNOPSIS +.Cd "pseudo-device af_frame" +.Pp +.In sys/types.h +.In net/frame.h +.Sh DESCRIPTION +The +.Nm +protocol family provides an interface for sending and receiving low +level network interface frames through the normal +.Xr socket 2 +mechanisms. +.Pp +The +.Nm +protocol family supports the +.Dv SOCK_DGRAM +socket type. +Only root may create +.Nm +protocol family sockets. +.Pp +.Nm +protocol family sockets are designed as an alternative to +.Xr bpf 4 +for handling low data and packet rate communication protocols. +Rather than filtering every frame entering the system before the +network stack like +.Xr bpf 4 , +the +.Nm +protocol family processing avoids this overhead by running after +the built in protocol handlers in the kernel. +For example, it is not possible to handle IPv4 or IPv6 packets +with +.Nm +protocol sockets because the kernel network stack consumes them +before the receive handling for +.Nm +sockets is run. +.Pp +Sockets can be created in the +.Nm +protocol family by using +.Dv AF_FRAME +as the +.Fa domain +argument to +.Xr socket 2 . +The type of interface, as per +.In net/if_types.h , +is specified as the socket +.Fa protocol . +Currently only Ethernet interfaces are supported. +.Pp +Sockets bound to the +.Nm +family use the following address structure: +.Bd -literal -offset indent +#define FRAME_ADDRLEN 8 +#define FRAME_DATALEN 32 + +struct sockaddr_frame { + uint8_t sfrm_len; + uint8_t sfrm_family; + uint16_t sfrm_proto; + unsigned int sfrm_ifindex; + uint8_t sfrm_addr[FRAME_ADDRLEN]; + char sfrm_ifname[IFNAMSIZ]; + uint8_t sfrm_data[FRAME_DATALEN]; +}; +.Ed +.Pp +The interface used for transmitting or receiving frames with a +.Nm +domain socket may be specified by using an interface index with +.Fa sfrm_ifindex , +or by name with +.Fa sfrm_ifname . +The use of other +.Vt struct sockaddr_frame +fields depends on the type of interface. +.Ss Ethernet frame sockets +A +.Nm +socket for use with Ethernet interfaces can be created using +.Dv IFT_ETHER +as the +.Fa protocol +argument to +.Xr socket 2 : +.Bd -literal -offset indent +int sock = socket(AF_FRAME, SOCK_DGRAM, IFT_ETHER); +.Ed +.Pp +The Ethernet protocol is specified with +.Fa sfrm_proto +in network byte order. +Ethernet addresses are specified using the first 6 bytes of +.Fa sfrm_addr . +.Pp +Ethernet +.Nm +sockets may receive frames on all interfaces by specifying 0 for +.Va sfrm_ifindex +when using +.Xr bind 2 . +Similarly, a +.Dq wildcard +local address of all zeros can be specified in +.Fa sfrm_addr . +.Pp +An interface and address must be specified when sending Ethernet frames. +.Pp +Ethernet sockets support the following +.Nm +socket options +using +.Dv IFT_ETHER +as the +.Fa level +argument with +.Xr setsockopt 2 +and +.Xr getsockopt 2 : +.Bl -tag +.It Dv FRAME_RECVDSTADDR Ft int +Enable or disable the reception of the Ethernet destination address as a +.Vt struct ether_addr +control message for frames received with +.Xr recvmsg 2 . +.It Dv FRAME_RECVPRIO Ft int +Enable or disable the reception of the mbuf packet priority field as a +.Vt int +sized control message for frames received with +.Xr recvmsg 2 . +.It Dv FRAME_ADD_MEMBERSHIP Ft struct frame_mreq +Configure an Ethernet interface to enable reception of +frames destined to the specified multicast Ethernet address. +.Bd -literal -offset indent +struct frame_mreq { + unsigned int fmr_ifindex; + uint8_t fmr_addr[FRAME_ADDRLEN]; + char fmr_ifname[IFNAMSIZ]; +}; +.Ed +.Pp +An interface must be specified using either +.Fa fmr_ifindex +or +.Fa fmr_ifname . +The multicast Ethernet address is specified in the first 6 bytes of +.Fa fmr_addr . +.It Dv FRAME_DEL_MEMBERSHIP Ft struct frame_mreq +Configure an Ethernet interface to disable reception of frames destined +to the specified multicast Ethernet address. +.It Dv FRAME_SENDPRIO Ft int +Specify a mbuf priority value between +.Dv IF_HDRPRIO_MIN +.Pq 0 +and +.Dv IF_HDRPRIO_MAX +.Pq 7 +for frames sent with the Ethernet +.Nm +socket, or +.Dv IF_HDRPRIO_PACKET +to use the existing mbuf priority value. +The default is +.Dv IF_HDRPRIO_PACKET . +.El +.Sh EXAMPLES +To receive LLDP frames on the em0 Ethernet interface: +.Bd -literal -offset indent +struct sockaddr_frame sfrm = { + .sfrm_family = AF_FRAME, + .sfrm_ifname = "em0", + .sfrm_proto = htons(ETHERTYPE_LLDP), +}; +struct frame_mreq fmr = { + .fmr_ifname = "em0", + .fmr_addr = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x0e }, +}; +int sock; + +sock = socket(AF_FRAME, SOCK_DGRAM, IFT_ETHER); +if (sock == -1) + err(1, "ethernet frame socket"); +if (bind(sock, (struct addrinfo *)&sfrm, sizeof(sfrm)) == -1) + err(1, "bind"); +if (setsockopt(sock, IFT_ETHER, FRAME_ADD_MEMBERSHIP, + &fmr, sizeof(fmr)) == -1) + err(1, "join lldp multicast group"); + +for (;;) { + socklen_t sfrmlen = sizeof(sfrm); + uint8_t frame[2048]; + ssize_t rv; + + rv = recvfrom(sock, frame, sizeof(frame), 0, + (struct sockaddr *)&sfrm, &sfrmlen); + if (rv == -1) + err(1, "lldp recv"); + printf("received %zd bytes from %s", rv, + ether_ntoa((struct ether_addr *)sfrm->sfrm_addr)); +} +.Ed +.Sh SEE ALSO +.Xr socket 2 , +.Xr netintro 4 +.Sh HISTORY +.Nm +domain sockets appeared in +.Ox 7.7 . +.\" The +.\" .Ox +.\" implementation was influenced by the Linux +.\" .Dv AF_PACKET +.\" .Dq packet interface on device level +.\" socket interface, but is not compatible with it. +.Sh AUTHORS +.An David Gwynne Aq Mt dlg@openbsd.org . Index: sys/conf/GENERIC =================================================================== RCS file: /cvs/src/sys/conf/GENERIC,v diff -u -p -r1.299 GENERIC --- sys/conf/GENERIC 3 Oct 2024 04:39:09 -0000 1.299 +++ sys/conf/GENERIC 12 Dec 2024 12:07:48 -0000 @@ -114,5 +115,7 @@ pseudo-device wg # WireGuard pseudo-device bio 1 # ioctl multiplexing device pseudo-device fuse # fuse device + +pseudo-device af_frame # (Ethernet) frame sockets option BOOT_CONFIG # add support for boot -c Index: sys/conf/files =================================================================== RCS file: /cvs/src/sys/conf/files,v diff -u -p -r1.741 files --- sys/conf/files 31 Oct 2024 13:55:21 -0000 1.741 +++ sys/conf/files 12 Dec 2024 12:07:48 -0000 @@ -601,6 +601,9 @@ pseudo-device pppx: ifnet pseudo-device vxlan: ifnet, ether, etherbridge pseudo-device wg: ifnet +pseudo-device af_frame +file net/af_frame.c af_frame needs-flag + pseudo-device ksyms file dev/ksyms.c ksyms needs-flag Index: sys/kern/uipc_domain.c =================================================================== RCS file: /cvs/src/sys/kern/uipc_domain.c,v diff -u -p -r1.68 uipc_domain.c --- sys/kern/uipc_domain.c 16 Aug 2024 09:20:34 -0000 1.68 +++ sys/kern/uipc_domain.c 12 Dec 2024 12:07:48 -0000 @@ -41,9 +41,14 @@ #include #include +#include "af_frame.h" #include "bpfilter.h" #include "pflow.h" +#if NAF_FRAME > 0 +extern const struct domain framedomain; +#endif + const struct domain *const domains[] = { #ifdef MPLS &mplsdomain, @@ -57,6 +62,9 @@ const struct domain *const domains[] = { &inetdomain, &unixdomain, &routedomain, +#if NAF_FRAME > 0 + &framedomain, +#endif NULL }; Index: sys/kern/uipc_socket.c =================================================================== RCS file: /cvs/src/sys/kern/uipc_socket.c,v diff -u -p -r1.345 uipc_socket.c --- sys/kern/uipc_socket.c 8 Nov 2024 21:47:03 -0000 1.345 +++ sys/kern/uipc_socket.c 12 Dec 2024 12:07:48 -0000 @@ -167,6 +167,7 @@ soalloc(const struct protosw *prp, int w case AF_KEY: case AF_ROUTE: case AF_UNIX: + case AF_FRAME: so->so_snd.sb_flags |= SB_MTXLOCK; so->so_rcv.sb_flags |= SB_MTXLOCK; break; Index: sys/net/af_frame.c =================================================================== RCS file: sys/net/af_frame.c diff -N sys/net/af_frame.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/net/af_frame.c 12 Dec 2024 12:07:48 -0000 @@ -0,0 +1,62 @@ +/* $OpenBSD$ */ + +/* + * Copyright (c) 2024 David Gwynne + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + +const struct domain framedomain; + +/* reach over to if_ethersubr.c */ +int ether_frm_ctloutput(int, struct socket *, int, int, struct mbuf *); +extern const struct pr_usrreqs ether_frm_usrreqs; + +static const struct protosw framesw[] = { + { + .pr_type = SOCK_DGRAM, + .pr_domain = &framedomain, + .pr_protocol = IFT_ETHER, + .pr_flags = PR_ATOMIC|PR_ADDR|PR_MPINPUT|PR_MPSOCKET, + + .pr_ctloutput = ether_frm_ctloutput, + .pr_usrreqs = ðer_frm_usrreqs, + .pr_sysctl = NULL /* ether_frm_sysctl */, + }, +}; + +const struct domain framedomain = { + .dom_family = AF_FRAME, + .dom_name = "frame", + .dom_protosw = framesw, + .dom_protoswNPROTOSW = &framesw[nitems(framesw)], +}; + +void +af_frameattach(int n) +{ + /* nop */ +} Index: sys/net/if_ethersubr.c =================================================================== RCS file: /cvs/src/sys/net/if_ethersubr.c,v diff -u -p -r1.293 if_ethersubr.c --- sys/net/if_ethersubr.c 14 Feb 2024 22:41:48 -0000 1.293 +++ sys/net/if_ethersubr.c 12 Dec 2024 12:07:48 -0000 @@ -140,6 +140,14 @@ didn't get a copy, you may request one f #include #endif /* MPLS */ +#include "af_frame.h" +#if NAF_FRAME > 0 +#include + +static struct mbuf * + ether_frm_input(struct ifnet *, struct mbuf *, uint64_t, uint16_t); +#endif + /* #define ETHERDEBUG 1 */ #ifdef ETHERDEBUG int etherdebug = ETHERDEBUG; @@ -578,6 +586,9 @@ ether_input(struct ifnet *ifp, struct mb return; #endif default: +#if NAF_FRAME > 0 + m = ether_frm_input(ifp, m, dst, etype); +#endif goto dropanyway; } @@ -1247,3 +1258,927 @@ ether_extract_headers(struct mbuf *m0, s ext->tcp ? "tcp," : "", ext->udp ? "udp," : "", ext->iplen, ext->iphlen, ext->tcphlen, ext->paylen); } + +#if NAF_FRAME > 0 + +#include +#include +#include + +/* + * lock order is: + * + * - socket lock + * - ether_pcb_lock + * - socket buffer mtx + */ + +struct ether_pcb; + +struct ether_pcb_group { + TAILQ_ENTRY(ether_pcb_group) + epg_entry; + struct ether_pcb * + epg_pcb; + unsigned int epg_ifindex; + uint8_t epg_addr[ETHER_ADDR_LEN]; + struct task epg_hook; +}; + +TAILQ_HEAD(ether_pcb_groups, ether_pcb_group); + +struct ether_pcb { + TAILQ_ENTRY(ether_pcb) + ep_entry; + struct rwlock ep_lock; + + struct socket *ep_socket; + + uint64_t ep_laddr; + uint64_t ep_faddr; + unsigned int ep_ifindex; + uint16_t ep_etype; + + uint64_t ep_options; + int ep_txprio; + + struct ether_pcb_groups + ep_groups; +}; + +TAILQ_HEAD(ether_pcb_list, ether_pcb); + +static int ether_frm_attach(struct socket *, int, int); +static int ether_frm_detach(struct socket *); +static int ether_frm_bind(struct socket *, struct mbuf *, struct proc *); +static int ether_frm_connect(struct socket *, struct mbuf *); +static int ether_frm_disconnect(struct socket *); +static int ether_frm_shutdown(struct socket *); +static int ether_frm_send(struct socket *, struct mbuf *, struct mbuf *, + struct mbuf *); +static int ether_frm_control(struct socket *, u_long, caddr_t, + struct ifnet *); +static int ether_frm_sockaddr(struct socket *, struct mbuf *); +static int ether_frm_peeraddr(struct socket *, struct mbuf *); + +const struct pr_usrreqs ether_frm_usrreqs = { + .pru_attach = ether_frm_attach, + .pru_detach = ether_frm_detach, + .pru_bind = ether_frm_bind, + .pru_connect = ether_frm_connect, + .pru_disconnect = ether_frm_disconnect, + .pru_shutdown = ether_frm_shutdown, + .pru_send = ether_frm_send, + .pru_control = ether_frm_control, + .pru_sockaddr = ether_frm_sockaddr, + .pru_peeraddr = ether_frm_peeraddr, +}; + +static struct rwlock ether_pcb_lock = RWLOCK_INITIALIZER("ethsocks"); +static struct ether_pcb_list ether_pcbs = TAILQ_HEAD_INITIALIZER(ether_pcbs); + +static int +ether_frm_valid_etype(uint16_t etype) +{ + switch (etype) { + case ETHERTYPE_LLDP: + case ETHERTYPE_EAPOL: + return (1); + } + + return (0); +} + +static int +ether_frm_nam2sfrm(struct sockaddr_frame **sfrmp, const struct mbuf *nam) +{ + struct sockaddr_frame *sfrm; + + if (nam->m_len != sizeof(*sfrm)) + return (EINVAL); + + sfrm = mtod(nam, struct sockaddr_frame *); + if (sfrm->sfrm_family != AF_FRAME) + return (EAFNOSUPPORT); + *sfrmp = sfrm; + return (0); +} + +static int +ether_frm_ifp(struct ifnet **ifpp, const struct sockaddr_frame *sfrm) +{ + struct ifnet *ifp; + + if (sfrm->sfrm_ifindex != 0) + ifp = if_get(sfrm->sfrm_ifindex); + else if (sfrm->sfrm_ifname[0] != '\0') { + KERNEL_LOCK(); + ifp = if_unit(sfrm->sfrm_ifname); + KERNEL_UNLOCK(); + } else { + *ifpp = NULL; + return (0); + } + + if (ifp == NULL) + return (ENXIO); + + if (ifp->if_type != IFT_ETHER) { + if_put(ifp); + return (EAFNOSUPPORT); + } + + *ifpp = ifp; + return (0); +} + +static int +ether_frm_attach(struct socket *so, int proto, int wait) +{ + struct ether_pcb *ep; + int error; + + if (so->so_pcb != NULL) + return (EINVAL); + + error = suser(curproc); + if (error != 0) + return (error); + + error = soreserve(so, MCLBYTES, MCLBYTES); + if (error != 0) + return (error); + + ep = malloc(sizeof(*ep), M_PCB, (wait ? M_WAITOK : M_NOWAIT) | M_ZERO); + if (ep == NULL) + return (ENOMEM); + + rw_init(&ep->ep_lock, "ethsock"); + + so->so_pcb = ep; + ep->ep_socket = so; /* shares a ref with the list */ + + ep->ep_txprio = IF_HDRPRIO_PACKET; + TAILQ_INIT(&ep->ep_groups); + + /* give the ref to the list */ + rw_enter_write(ðer_pcb_lock); + TAILQ_INSERT_TAIL(ðer_pcbs, ep, ep_entry); + rw_exit_write(ðer_pcb_lock); + + return (0); +} + +static int +ether_frm_detach(struct socket *so) +{ + struct ether_pcb *ep; + struct ether_pcb_group *epg, *nepg; + struct ifnet *ifp; + + soassertlocked(so); + + ep = so->so_pcb; + + /* take the ref from the list */ + rw_enter_write(ðer_pcb_lock); + TAILQ_REMOVE(ðer_pcbs, ep, ep_entry); + rw_exit_write(ðer_pcb_lock); + + so->so_pcb = NULL; /* shares a ref with the list */ + + /* XXX locking */ + TAILQ_FOREACH_SAFE(epg, &ep->ep_groups, epg_entry, nepg) { + ifp = if_get(epg->epg_ifindex); + if (ifp != NULL) { + struct ifreq ifr; + struct sockaddr *sa; + + if_detachhook_del(ifp, &epg->epg_hook); + + memset(&ifr, 0, sizeof(ifr)); + strlcpy(ifr.ifr_name, ifp->if_xname, + sizeof(ifr.ifr_name)); + sa = &ifr.ifr_addr; + sa->sa_family = AF_UNSPEC; + memcpy(sa->sa_data, &epg->epg_addr, ETHER_ADDR_LEN); + + (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr); + } + if_put(ifp); + + TAILQ_REMOVE(&ep->ep_groups, epg, epg_entry); + free(epg, M_PCB, sizeof(*epg)); + } + + free(ep, M_PCB, sizeof(*ep)); + + return (0); +} + +static int +ether_frm_bind(struct socket *so, struct mbuf *nam, struct proc *p) +{ + struct sockaddr_frame *sfrm; + struct ether_pcb *ep; + struct ether_pcb *epe; + struct ifnet *ifp = NULL; + unsigned int ifindex = 0; + uint16_t etype; + uint64_t laddr; + int error; + + soassertlocked(so); + + error = ether_frm_nam2sfrm(&sfrm, nam); + if (error != 0) + return (error); + + etype = ntohs(sfrm->sfrm_proto); + if (!ether_frm_valid_etype(etype)) + return (EADDRNOTAVAIL); + + ep = so->so_pcb; + if (ep->ep_etype != 0) + return (EINVAL); + + error = ether_frm_ifp(&ifp, sfrm); + if (error != 0) + return (error); + if (ifp != NULL) + ifindex = ifp->if_index; + + laddr = ether_addr_to_e64((struct ether_addr *)sfrm->sfrm_addr); + + rw_enter_write(ðer_pcb_lock); + TAILQ_FOREACH(epe, ðer_pcbs, ep_entry) { + if (ep == epe) + continue; + + /* XXX check stuff */ + } + + if (error == 0) { + /* serialised by the socket lock */ + ep->ep_etype = etype; + ep->ep_ifindex = ifindex; + ep->ep_laddr = laddr; + } + rw_exit_write(ðer_pcb_lock); + + if_put(ifp); + return (error); +} + +static int +ether_frm_connect(struct socket *so, struct mbuf *nam) +{ + struct sockaddr_frame *sfrm; + struct ether_pcb *ep; + struct ether_pcb *epe; + struct ifnet *ifp = NULL; + uint64_t faddr; + uint16_t etype; + int error; + + soassertlocked(so); + + error = ether_frm_nam2sfrm(&sfrm, nam); + if (error != 0) + return (error); + + etype = ntohs(sfrm->sfrm_proto); + if (!ether_frm_valid_etype(etype)) + return (EADDRNOTAVAIL); + + faddr = ether_addr_to_e64((struct ether_addr *)sfrm->sfrm_addr); + if (faddr == 0) + return (EADDRNOTAVAIL); + + error = ether_frm_ifp(&ifp, sfrm); + if (error != 0) + return (error); + if (ifp == NULL) + return (EADDRNOTAVAIL); + + ep = so->so_pcb; + if (ep->ep_etype != 0) { + if (ep->ep_faddr != 0 || + ep->ep_etype != etype) { + error = EISCONN; + goto put; + } + } + if (ep->ep_ifindex != 0) { + if (ep->ep_ifindex != ifp->if_index) { + error = EADDRNOTAVAIL; + goto put; + } + } + + rw_enter_write(ðer_pcb_lock); + TAILQ_FOREACH(epe, ðer_pcbs, ep_entry) { + if (ep == epe) + continue; + /* XXX check stuff */ + } + + if (error == 0) { + /* serialised by the socket lock */ + ep->ep_etype = etype; + ep->ep_ifindex = ifp->if_index; + ep->ep_faddr = faddr; + } + rw_exit_write(ðer_pcb_lock); + +put: + if_put(ifp); + return (error); +} + +static int +ether_frm_disconnect(struct socket *so) +{ + struct ether_pcb *ep; + + soassertlocked(so); + + ep = so->so_pcb; + if (ep->ep_faddr == 0) + return (ENOTCONN); + + rw_enter_write(ðer_pcb_lock); + ep->ep_ifindex = 0; + ep->ep_etype = 0; + ep->ep_laddr = 0; + ep->ep_faddr = 0; + rw_exit_write(ðer_pcb_lock); + + return (0); +} + +static int +ether_frm_shutdown(struct socket *so) +{ + soassertlocked(so); + socantsendmore(so); + return (0); +} + +static int +ether_frm_send(struct socket *so, struct mbuf *m, struct mbuf *nam, + struct mbuf *control) +{ + struct ether_pcb *ep; + int error; + uint16_t etype; + uint64_t laddr; + uint64_t faddr; + struct ifnet *ifp = NULL; + struct arpcom *ac; + struct ether_header *eh; + int txprio; + + soassertlocked_readonly(so); + + ep = so->so_pcb; + KASSERTMSG(ep != NULL, "%s: NULL pcb on socket %p", __func__, so); + txprio = ep->ep_txprio; + + /* XXX get prio out of a cmsg */ + m_freem(control); + + if (nam != NULL) { + struct sockaddr_frame *sfrm; + + error = ether_frm_nam2sfrm(&sfrm, nam); + if (error != 0) + goto drop; + + etype = ntohs(sfrm->sfrm_proto); + if (!ether_frm_valid_etype(etype)) { + error = EADDRNOTAVAIL; + goto drop; + } + + if (ep->ep_faddr != 0) { + error = EISCONN; + goto drop; + } + faddr = ether_addr_to_e64((struct ether_addr *)sfrm->sfrm_addr); + if (faddr == 0) { + error = EADDRNOTAVAIL; + goto drop; + } + + error = ether_frm_ifp(&ifp, sfrm); + if (error != 0) + goto drop; + if (ifp == NULL) { + ifp = if_get(ep->ep_ifindex); + if (ifp == NULL) { + error = EADDRNOTAVAIL; + goto drop; + } + } else { + if (ep->ep_ifindex != 0 && + ep->ep_ifindex != ifp->if_index) { + error = EADDRNOTAVAIL; + goto drop; + } + } + + if (ep->ep_etype != etype) { + if (ep->ep_etype == 0) { + /* this is cheeky */ + rw_enter_write(ðer_pcb_lock); + ep->ep_etype = etype; + rw_exit_write(ðer_pcb_lock); + } else { + error = EADDRNOTAVAIL; + goto drop; + } + } + } else { + faddr = ep->ep_faddr; + if (faddr == 0) { + error = ENOTCONN; + goto drop; + } + + ifp = if_get(ep->ep_ifindex); + if (ifp == NULL) { + error = ENXIO; + goto drop; + } + + etype = ep->ep_etype; + } + + if (ifp->if_type != IFT_ETHER) { + error = EAFNOSUPPORT; + goto drop; + } + + ac = (struct arpcom *)ifp; + + laddr = ether_addr_to_e64((struct ether_addr *)ac->ac_enaddr); + if (ep->ep_laddr != laddr) { + if (ep->ep_laddr != 0) { + error = EADDRNOTAVAIL; + goto drop; + } + } + + m = m_prepend(m, ETHER_ALIGN + sizeof(*eh), M_NOWAIT); + if (m == NULL) + goto drop; + m_adj(m, ETHER_ALIGN); + + if (txprio != IF_HDRPRIO_PACKET) + m->m_pkthdr.pf.prio = txprio; + + eh = mtod(m, struct ether_header *); + ether_e64_to_addr((struct ether_addr *)eh->ether_dhost, faddr); + ether_e64_to_addr((struct ether_addr *)eh->ether_shost, laddr); + eh->ether_type = htons(etype); + + error = if_enqueue(ifp, m); + m = NULL; + +drop: + if_put(ifp); + m_freem(m); + return (error); +} + +static int +ether_frm_control(struct socket *so, u_long cmd, caddr_t data, + struct ifnet *ifp) +{ + return (EOPNOTSUPP); +} + +static int +ether_frm_sockaddr_frame(struct ether_pcb *ep, struct mbuf *nam, uint64_t addr) +{ + struct sockaddr_frame *sfrm; + struct ifnet *ifp; + + nam->m_len = sizeof(*sfrm); + sfrm = mtod(nam, struct sockaddr_frame *); + memset(sfrm, 0, sizeof(*sfrm)); + sfrm->sfrm_len = sizeof(*sfrm); + sfrm->sfrm_family = AF_FRAME; + + ether_e64_to_addr((struct ether_addr *)sfrm->sfrm_addr, addr); + + if (ep->ep_etype) { + sfrm->sfrm_proto = htons(ep->ep_etype); + sfrm->sfrm_ifindex = ep->ep_ifindex; + + ifp = if_get(ep->ep_ifindex); + if (ifp != NULL) { + strlcpy(sfrm->sfrm_ifname, ifp->if_xname, + sizeof(sfrm->sfrm_ifname)); + } + if_put(ifp); + } + + return (0); +} + +static int +ether_frm_sockaddr(struct socket *so, struct mbuf *nam) +{ + struct ether_pcb *ep = so->so_pcb; + + return (ether_frm_sockaddr_frame(ep, nam, ep->ep_laddr)); +} + +static int +ether_frm_peeraddr(struct socket *so, struct mbuf *nam) +{ + struct ether_pcb *ep = so->so_pcb; + + return (ether_frm_sockaddr_frame(ep, nam, ep->ep_faddr)); +} + +static void +ether_frm_group_detach(void *arg) +{ + struct ether_pcb_group *epg = arg; + struct ether_pcb *ep = epg->epg_pcb; + struct socket *so = ep->ep_socket; + struct ifnet *ifp; + + ifp = if_get(epg->epg_ifindex); + + /* XXX locking^Wreference counts */ + solock(so); + if (ifp != NULL) + if_detachhook_del(ifp, &epg->epg_hook); + TAILQ_REMOVE(&ep->ep_groups, epg, epg_entry); + sounlock(so); + + if_put(ifp); + free(epg, M_PCB, sizeof(*epg)); +} + +static int +ether_frm_group(struct socket *so, int optname, struct mbuf *m) +{ + struct frame_mreq *fmr; + struct ifreq ifr; + struct sockaddr *sa; + struct ifnet *ifp; + struct ether_pcb *ep; + struct ether_pcb_group *epg; + u_long cmd; + int error; + + soassertlocked(so); + + if (m->m_len != sizeof(*fmr)) + return (EINVAL); + + fmr = mtod(m, struct frame_mreq *); + if (!ETHER_IS_MULTICAST(fmr->fmr_addr)) + return (EADDRNOTAVAIL); + + if (fmr->fmr_ifindex == 0) { + KERNEL_LOCK(); + ifp = if_unit(fmr->fmr_ifname); + KERNEL_UNLOCK(); + } else + ifp = if_get(fmr->fmr_ifindex); + if (ifp == NULL) + return (ENXIO); + + if (ifp->if_type != IFT_ETHER) { + error = EADDRNOTAVAIL; + goto put; + } + + if (ETHER_IS_BROADCAST(fmr->fmr_addr)) { + error = 0; + goto put; + } + + ep = so->so_pcb; + TAILQ_FOREACH(epg, &ep->ep_groups, epg_entry) { + if (epg->epg_ifindex != ifp->if_index) + continue; + if (!ETHER_IS_EQ(epg->epg_addr, fmr->fmr_addr)) + continue; + + break; + } + + switch (optname) { + case FRAME_ADD_MEMBERSHIP: + if (epg != NULL) { + error = EISCONN; + goto put; + } + epg = malloc(sizeof(*epg), M_PCB, M_DONTWAIT); + if (epg == NULL) { + error = ENOMEM; + goto put; + } + + epg->epg_pcb = ep; + epg->epg_ifindex = ifp->if_index; + memcpy(&epg->epg_addr, fmr->fmr_addr, sizeof(epg->epg_addr)); + task_set(&epg->epg_hook, ether_frm_group_detach, epg); + + cmd = SIOCADDMULTI; + break; + case FRAME_DEL_MEMBERSHIP: + if (epg == NULL) { + error = ENOTCONN; + goto put; + } + cmd = SIOCDELMULTI; + break; + default: + panic("%s: unexpected optname %d", __func__, optname); + /* NOTREACHED */ + } + + memset(&ifr, 0, sizeof(ifr)); + strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name)); + sa = &ifr.ifr_addr; + sa->sa_family = AF_UNSPEC; + memcpy(sa->sa_data, fmr->fmr_addr, ETHER_ADDR_LEN); + + /* XXX soref? */ + /* this could lead to multiple epgs for the same if/group */ + sounlock(so); + KERNEL_LOCK(); + NET_LOCK(); + error = (*ifp->if_ioctl)(ifp, cmd, (caddr_t)&ifr); + NET_UNLOCK(); + KERNEL_UNLOCK(); + solock(so); + + switch (optname) { + case FRAME_ADD_MEMBERSHIP: + if (error != 0) { + free(epg, M_PCB, sizeof(*epg)); + break; + } + + TAILQ_INSERT_TAIL(&ep->ep_groups, epg, epg_entry); + if_detachhook_add(ifp, &epg->epg_hook); + break; + case FRAME_DEL_MEMBERSHIP: + if (error != 0) + break; + + if_detachhook_del(ifp, &epg->epg_hook); + TAILQ_REMOVE(&ep->ep_groups, epg, epg_entry); + free(epg, M_PCB, sizeof(*epg)); + break; + } +put: + if_put(ifp); + + return (error); +} + +#define ETHER_PCB_OPTM(_v) (1ULL << (_v)) + +#define ETHER_PCB_OPTS \ + ETHER_PCB_OPTM(FRAME_RECVDSTADDR) | \ + ETHER_PCB_OPTM(FRAME_RECVPRIO) + +static int +ether_frm_setopt(struct ether_pcb *ep, int optname, struct mbuf *m) +{ + uint64_t optm = ETHER_PCB_OPTM(optname); + int opt; + + if (!ISSET(ETHER_PCB_OPTS, optm)) + return (ENOPROTOOPT); + + if (m->m_len != sizeof(opt)) + return (EINVAL); + + opt = *mtod(m, int *); + if (opt) + SET(ep->ep_options, optm); + else + CLR(ep->ep_options, optm); + + return (0); +} + +static int +ether_frm_setsockopt(struct socket *so, int optname, struct mbuf *m) +{ + struct ether_pcb *ep = so->so_pcb; + int error = ENOPROTOOPT; + int v; + + if (optname >= 0 && optname < 64) + return (ether_frm_setopt(ep, optname, m)); + + switch (optname) { + case FRAME_ADD_MEMBERSHIP: + case FRAME_DEL_MEMBERSHIP: + error = ether_frm_group(so, optname, m); + break; + case FRAME_SENDPRIO: + if (m->m_len != sizeof(v)) { + error = EINVAL; + break; + } + v = *mtod(m, int *); + error = if_txhprio_l2_check(v); + if (error != 0) + break; + ep->ep_txprio = v; + break; + + default: + break; + } + + return (error); +} + +static int +ether_frm_getopt(struct ether_pcb *ep, int optname, struct mbuf *m) +{ + uint64_t optm = ETHER_PCB_OPTM(optname); + int opt; + + if (!ISSET(ETHER_PCB_OPTS, optm)) + return (ENOPROTOOPT); + + opt = !!ISSET(ep->ep_options, optm); + + m->m_len = sizeof(opt); + *mtod(m, int *) = opt; + + return (0); +} + +static int +ether_frm_getsockopt(struct socket *so, int optname, struct mbuf *m) +{ + struct ether_pcb *ep = so->so_pcb; + int error = ENOPROTOOPT; + + if (optname >= 0 && optname < 64) + return (ether_frm_getopt(ep, optname, m)); + + switch (optname) { + default: + break; + } + + return (error); +} + +int +ether_frm_ctloutput(int op, struct socket *so, int level, int optname, + struct mbuf *m) +{ + int error = 0; + + if (level != IFT_ETHER) + return (EINVAL); + + switch (op) { + case PRCO_SETOPT: + error = ether_frm_setsockopt(so, optname, m); + break; + case PRCO_GETOPT: + error = ether_frm_getsockopt(so, optname, m); + break; + } + + return (error); +} + +static struct mbuf * +ether_frm_cmsg(struct mbuf *cmsgs, const void *data, size_t datalen, + int type, int level) +{ + struct mbuf *cm; + + cm = sbcreatecontrol(data, datalen, type, level); + if (cm != NULL) { + cm->m_next = cmsgs; + cmsgs = cm; + } + + return (cmsgs); +} + +static void +ether_frm_recv(struct socket *so, struct mbuf *m0, + const struct sockaddr_frame *sfrm) +{ + struct ether_pcb *ep = so->so_pcb; + struct mbuf *m; + struct mbuf *cmsgs = NULL; + int ok; + + /* offset 0 and m_adj cos sbappendaddr needs m_pkthdr.len */ + m = m_copym(m0, 0, M_COPYALL, M_DONTWAIT); + if (m == NULL) + return; + m_adj(m, sizeof(struct ether_header)); + + if (ISSET(ep->ep_options, ETHER_PCB_OPTM(FRAME_RECVPRIO))) { + int rxprio = m0->m_pkthdr.pf.prio; + cmsgs = ether_frm_cmsg(cmsgs, &rxprio, sizeof(rxprio), + FRAME_RECVPRIO, IFT_ETHER); + } + + if (ISSET(ep->ep_options, ETHER_PCB_OPTM(FRAME_RECVDSTADDR))) { + struct ether_header *eh = mtod(m0, struct ether_header *); + cmsgs = ether_frm_cmsg(cmsgs, eh->ether_dhost, ETHER_ADDR_LEN, + FRAME_RECVDSTADDR, IFT_ETHER); + } + + if (ISSET(so->so_options, SO_TIMESTAMP)) { + struct timeval tv; + m_microtime(m0, &tv); + cmsgs = ether_frm_cmsg(cmsgs, &tv, sizeof(tv), + SCM_TIMESTAMP, SOL_SOCKET); + } + + mtx_enter(&so->so_rcv.sb_mtx); + ok = sbappendaddr(so, &so->so_rcv, (struct sockaddr *)sfrm, m, cmsgs); + mtx_leave(&so->so_rcv.sb_mtx); + + if (!ok) { + m_freem(m); + m_freem(cmsgs); + return; + } + + sorwakeup(so); +} + +static struct mbuf * +ether_frm_input(struct ifnet *ifp, struct mbuf *m, uint64_t dst, uint16_t etype) +{ + struct sockaddr_frame sfrm = { .sfrm_family = AF_UNSPEC }; + struct ether_pcb *ep; + struct ether_header *eh; + uint64_t src; + + if (TAILQ_EMPTY(ðer_pcbs)) + return (m); + + eh = mtod(m, struct ether_header *); + src = ether_addr_to_e64((struct ether_addr *)eh->ether_shost); + if (src == 0) + return (m); + + rw_enter_read(ðer_pcb_lock); + TAILQ_FOREACH(ep, ðer_pcbs, ep_entry) { + if (ep->ep_etype == 0) /* bound? */ + continue; + if (ep->ep_etype != etype) + continue; + if (ep->ep_ifindex != 0) { + if (ep->ep_ifindex != ifp->if_index) + continue; + } + + if (ep->ep_laddr != 0) { + if (ep->ep_laddr != dst) + continue; + } + /* ether_input says dst is valid for local delivery */ + + if (ep->ep_faddr != 0) { /* connected? */ + if (ep->ep_faddr != src) + continue; + } + + if (sfrm.sfrm_family == AF_UNSPEC) { + sfrm.sfrm_len = sizeof(sfrm); + sfrm.sfrm_family = AF_FRAME; + sfrm.sfrm_proto = htons(etype); + sfrm.sfrm_ifindex = ifp->if_index; + ether_e64_to_addr((struct ether_addr *)sfrm.sfrm_addr, + src); + strlcpy(sfrm.sfrm_ifname, ifp->if_xname, + sizeof(sfrm.sfrm_ifname)); + } + + ether_frm_recv(ep->ep_socket, m, &sfrm); + } + rw_exit_read(ðer_pcb_lock); + + return (m); +} + +#endif /* NAF_FRAME */ Index: sys/net/frame.h =================================================================== RCS file: sys/net/frame.h diff -N sys/net/frame.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/net/frame.h 12 Dec 2024 12:07:48 -0000 @@ -0,0 +1,47 @@ +/* $OpenBSD$ */ + +/* + * Copyright (c) 2024 David Gwynne + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef _NET_FRAME_H_ +#define _NET_FRAME_H_ + +#define FRAME_ADDRLEN 8 /* big enough for Ethernet */ +#define FRAME_DATALEN 32 + +struct sockaddr_frame { + uint8_t sfrm_len; + uint8_t sfrm_family; /* AF_FRAME */ + uint16_t sfrm_proto; + unsigned int sfrm_ifindex; + uint8_t sfrm_addr[FRAME_ADDRLEN]; + char sfrm_ifname[IFNAMSIZ]; + uint8_t sfrm_data[FRAME_DATALEN]; +}; + +#define FRAME_RECVDSTADDR 0 /* int */ +#define FRAME_RECVPRIO 1 /* int */ +#define FRAME_ADD_MEMBERSHIP 64 /* struct frame_mreq */ +#define FRAME_DEL_MEMBERSHIP 65 /* struct frame_mreq */ +#define FRAME_SENDPRIO 66 /* int: IF_HDRPRIO_{MIN-MAX,PACKET} */ + +struct frame_mreq { + unsigned int fmr_ifindex; + uint8_t fmr_addr[FRAME_ADDRLEN]; + char fmr_ifname[IFNAMSIZ]; +}; + +#endif /* _NET_FRAME_H_ */ Index: sys/sys/socket.h =================================================================== RCS file: /cvs/src/sys/sys/socket.h,v diff -u -p -r1.105 socket.h --- sys/sys/socket.h 3 Sep 2022 21:13:48 -0000 1.105 +++ sys/sys/socket.h 12 Dec 2024 12:07:48 -0000 @@ -200,7 +200,8 @@ struct splice { #define AF_MPLS 33 /* MPLS */ #define pseudo_AF_PFLOW 34 /* pflow */ #define pseudo_AF_PIPEX 35 /* PIPEX */ -#define AF_MAX 36 +#define AF_FRAME 36 /* frame (Ethernet) sockets */ +#define AF_MAX 37 /* * Structure used by kernel to store most @@ -284,6 +285,7 @@ struct sockproto { #define PF_MPLS AF_MPLS #define PF_PFLOW pseudo_AF_PFLOW #define PF_PIPEX pseudo_AF_PIPEX +#define PF_FRAME AF_FRAME #define PF_MAX AF_MAX /*