Index: conf/GENERIC =================================================================== RCS file: /cvs/src/sys/conf/GENERIC,v diff -u -p -r1.299 GENERIC --- conf/GENERIC 3 Oct 2024 04:39:09 -0000 1.299 +++ conf/GENERIC 26 Nov 2024 09:28:18 -0000 @@ -114,5 +115,7 @@ pseudo-device wg # WireGuard pseudo-device bio 1 # ioctl multiplexing device pseudo-device fuse # fuse device + +pseudo-device af_packet # packet (Ethernet) sockets option BOOT_CONFIG # add support for boot -c Index: conf/files =================================================================== RCS file: /cvs/src/sys/conf/files,v diff -u -p -r1.741 files --- conf/files 31 Oct 2024 13:55:21 -0000 1.741 +++ conf/files 26 Nov 2024 09:28:18 -0000 @@ -601,6 +601,9 @@ pseudo-device pppx: ifnet pseudo-device vxlan: ifnet, ether, etherbridge pseudo-device wg: ifnet +pseudo-device af_packet +file net/af_packet.c af_packet needs-flag + pseudo-device ksyms file dev/ksyms.c ksyms needs-flag Index: kern/uipc_domain.c =================================================================== RCS file: /cvs/src/sys/kern/uipc_domain.c,v diff -u -p -r1.68 uipc_domain.c --- kern/uipc_domain.c 16 Aug 2024 09:20:34 -0000 1.68 +++ kern/uipc_domain.c 26 Nov 2024 09:28:18 -0000 @@ -41,9 +41,14 @@ #include #include +#include "af_packet.h" #include "bpfilter.h" #include "pflow.h" +#if NAF_PACKET > 0 +extern const struct domain packetdomain; +#endif + const struct domain *const domains[] = { #ifdef MPLS &mplsdomain, @@ -57,6 +62,9 @@ const struct domain *const domains[] = { &inetdomain, &unixdomain, &routedomain, +#if NAF_PACKET > 0 + &packetdomain, +#endif NULL }; @@ -202,9 +210,13 @@ net_sysctl(int *name, u_int namelen, voi if (family == PF_UNSPEC) return (0); - if (family == PF_LINK) - return (net_link_sysctl(name + 1, namelen - 1, oldp, oldlenp, - newp, newlen)); + if (family == PF_LINK) { + int error = net_link_sysctl(name + 1, namelen - 1, + oldp, oldlenp, newp, newlen); + /* let link sockets have a go */ + if (error != ENOPROTOOPT) + return (error); + } if (family == PF_UNIX) return (uipc_sysctl(name + 1, namelen - 1, oldp, oldlenp, newp, newlen)); Index: kern/uipc_socket.c =================================================================== RCS file: /cvs/src/sys/kern/uipc_socket.c,v diff -u -p -r1.345 uipc_socket.c --- kern/uipc_socket.c 8 Nov 2024 21:47:03 -0000 1.345 +++ kern/uipc_socket.c 26 Nov 2024 09:28:18 -0000 @@ -167,6 +167,7 @@ soalloc(const struct protosw *prp, int w case AF_KEY: case AF_ROUTE: case AF_UNIX: + case AF_PACKET: so->so_snd.sb_flags |= SB_MTXLOCK; so->so_rcv.sb_flags |= SB_MTXLOCK; break; @@ -787,48 +788,36 @@ m_getuio(struct mbuf **mp, int atomic, l { struct mbuf *m, *top = NULL; struct mbuf **nextp = ⊤ - u_long len, mlen; - size_t resid = uio->uio_resid; + u_long len, mlen, alen; + int align = atomic ? roundup(max_hdr, sizeof(long)) : 0; int error; - do { - if (top == NULL) { - MGETHDR(m, M_WAIT, MT_DATA); - mlen = MHLEN; - } else { - MGET(m, M_WAIT, MT_DATA); - mlen = MLEN; - } + m = m_gethdr(M_WAIT, MT_DATA); + mlen = MHLEN; + + for (;;) { /* chain mbuf together */ *nextp = m; nextp = &m->m_next; - resid = ulmin(resid, space); - if (resid >= MINCLSIZE) { - MCLGETL(m, M_NOWAIT, ulmin(resid, MAXMCLBYTES)); - if ((m->m_flags & M_EXT) == 0) + /* How much data we want to put in this mbuf? */ + len = ulmin(uio->uio_resid, space); + /* How much space are we allocating for that data? */ + alen = align + len; + if (alen > mlen) { + MCLGETL(m, M_NOWAIT, ulmin(alen, MAXMCLBYTES)); + if (!ISSET(m->m_flags, M_EXT) && alen > MCLBYTES) MCLGETL(m, M_NOWAIT, MCLBYTES); - if ((m->m_flags & M_EXT) == 0) - goto nopages; - mlen = m->m_ext.ext_size; - len = ulmin(mlen, resid); - /* - * For datagram protocols, leave room - * for protocol headers in first mbuf. - */ - if (atomic && m == top && len < mlen - max_hdr) - m->m_data += max_hdr; - } else { -nopages: - len = ulmin(mlen, resid); - /* - * For datagram protocols, leave room - * for protocol headers in first mbuf. - */ - if (atomic && m == top && len < mlen - max_hdr) - m_align(m, len); + if (ISSET(m->m_flags, M_EXT)) + mlen = m->m_ext.ext_size; } + /* Avoid pain from a stupid max_hdr value */ + if (align < mlen) + m->m_data += align; + + /* How much data can we put in this mbuf? */ + len = ulmin(mlen, len); error = uiomove(mtod(m, caddr_t), len, uio); if (error) { m_freem(top); @@ -836,13 +825,19 @@ nopages: } /* adjust counters */ - resid = uio->uio_resid; space -= len; m->m_len = len; top->m_pkthdr.len += len; /* Is there more space and more data? */ - } while (space > 0 && resid > 0); + if (space == 0 || uio->uio_resid == 0) + break; + + align = 0; + + m = m_get(M_WAIT, MT_DATA); + mlen = MLEN; + } *mp = top; return 0; Index: net/af_packet.c =================================================================== RCS file: net/af_packet.c diff -N net/af_packet.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ net/af_packet.c 26 Nov 2024 09:28:18 -0000 @@ -0,0 +1,62 @@ +/* $OpenBSD$ */ + +/* + * Copyright (c) 2024 David Gwynne + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + +const struct domain packetdomain; + +/* reach over to if_ethersubr.c */ +int ether_pkt_ctloutput(int, struct socket *, int, int, struct mbuf *); +extern const struct pr_usrreqs ether_pkt_usrreqs; + +static const struct protosw packetsw[] = { + { + .pr_type = SOCK_DGRAM, + .pr_domain = &packetdomain, + .pr_protocol = IFT_ETHER, + .pr_flags = PR_ATOMIC|PR_ADDR|PR_MPINPUT|PR_MPSOCKET, + + .pr_ctloutput = ether_pkt_ctloutput, + .pr_usrreqs = ðer_pkt_usrreqs, + .pr_sysctl = NULL /* ether_sysctl */, + }, +}; + +const struct domain packetdomain = { + .dom_family = AF_PACKET, + .dom_name = "packet", + .dom_protosw = packetsw, + .dom_protoswNPROTOSW = &packetsw[nitems(packetsw)], +}; + +void +af_packetattach(int n) +{ + /* nop */ +} Index: net/if_aggr.c =================================================================== RCS file: /cvs/src/sys/net/if_aggr.c,v diff -u -p -r1.46 if_aggr.c --- net/if_aggr.c 4 Sep 2024 07:54:52 -0000 1.46 +++ net/if_aggr.c 26 Nov 2024 09:28:18 -0000 @@ -745,15 +745,9 @@ aggr_start(struct ifqueue *ifq) } static inline int -aggr_eh_is_slow(const struct ether_header *eh) +aggr_eh_is_slow(uint64_t etype, uint64_t dst) { - uint64_t dst; - - if (eh->ether_type != htons(ETHERTYPE_SLOW)) - return (0); - - dst = ether_addr_to_e64((struct ether_addr *)eh->ether_dhost); - return (dst == LACP_ADDR_SLOW_E64); + return (etype == htons(ETHERTYPE_SLOW) && dst == LACP_ADDR_SLOW_E64); } static void @@ -765,13 +759,16 @@ aggr_input(struct ifnet *ifp0, struct mb struct ifnet *ifp = &sc->sc_if; struct ether_header *eh; int hlen = sizeof(*eh); + uint64_t dst; if (!ISSET(ifp->if_flags, IFF_RUNNING)) goto drop; eh = mtod(m, struct ether_header *); + dst = ether_addr_to_e64((struct ether_addr *)eh->ether_dhost); + if (!ISSET(m->m_flags, M_VLANTAG) && - __predict_false(aggr_eh_is_slow(eh))) { + __predict_false(aggr_eh_is_slow(eh->ether_type, dst))) { unsigned int rx_proto = AGGR_PROTO_RX_LACP; struct ether_slowproto_hdr *sph; int drop = 0; @@ -812,6 +809,12 @@ aggr_input(struct ifnet *ifp0, struct mb default: break; } + } + + if (!ISSET(m->m_flags, M_VLANTAG) && + __predict_false(dst == 0x0180c200000e)) { + p->p_input(ifp0, m); + return; } if (__predict_false(!p->p_collecting)) Index: net/if_ethersubr.c =================================================================== RCS file: /cvs/src/sys/net/if_ethersubr.c,v diff -u -p -r1.293 if_ethersubr.c --- net/if_ethersubr.c 14 Feb 2024 22:41:48 -0000 1.293 +++ net/if_ethersubr.c 26 Nov 2024 09:28:18 -0000 @@ -140,6 +140,14 @@ didn't get a copy, you may request one f #include #endif /* MPLS */ +#include "af_packet.h" +#if NAF_PACKET > 0 +#include + +static struct mbuf * + ether_pkt_input(struct ifnet *, struct mbuf *, uint64_t, uint16_t); +#endif + /* #define ETHERDEBUG 1 */ #ifdef ETHERDEBUG int etherdebug = ETHERDEBUG; @@ -578,6 +586,9 @@ ether_input(struct ifnet *ifp, struct mb return; #endif default: +#if NAF_PACKET > 0 + m = ether_pkt_input(ifp, m, dst, etype); +#endif goto dropanyway; } @@ -1247,3 +1258,916 @@ ether_extract_headers(struct mbuf *m0, s ext->tcp ? "tcp," : "", ext->udp ? "udp," : "", ext->iplen, ext->iphlen, ext->tcphlen, ext->paylen); } + +#if NAF_PACKET > 0 + +#include +#include +#include + +/* + * lock order is: + * + * - socket lock + * - ether_pcb_lock + * - socket buffer mtx + */ + +struct ether_pcb; + +struct ether_pcb_group { + TAILQ_ENTRY(ether_pcb_group) + epg_entry; + struct ether_pcb * + epg_pcb; + unsigned int epg_ifindex; + uint8_t epg_addr[ETHER_ADDR_LEN]; + struct task epg_hook; +}; + +TAILQ_HEAD(ether_pcb_groups, ether_pcb_group); + +struct ether_pcb { + TAILQ_ENTRY(ether_pcb) + ep_entry; + struct rwlock ep_lock; + + struct socket *ep_socket; + + uint64_t ep_laddr; + uint64_t ep_faddr; + unsigned int ep_ifindex; + uint16_t ep_etype; + + uint64_t ep_options; + int ep_txprio; + + struct ether_pcb_groups + ep_groups; +}; + +TAILQ_HEAD(ether_pcb_list, ether_pcb); + +static int ether_pkt_attach(struct socket *, int, int); +static int ether_pkt_detach(struct socket *); +static int ether_pkt_bind(struct socket *, struct mbuf *, struct proc *); +static int ether_pkt_connect(struct socket *, struct mbuf *); +static int ether_pkt_disconnect(struct socket *); +static int ether_pkt_shutdown(struct socket *); +static int ether_pkt_send(struct socket *, struct mbuf *, struct mbuf *, + struct mbuf *); +static int ether_pkt_control(struct socket *, u_long, caddr_t, + struct ifnet *); +static int ether_pkt_sockaddr(struct socket *, struct mbuf *); +static int ether_pkt_peeraddr(struct socket *, struct mbuf *); + +const struct pr_usrreqs ether_pkt_usrreqs = { + .pru_attach = ether_pkt_attach, + .pru_detach = ether_pkt_detach, + .pru_bind = ether_pkt_bind, + .pru_connect = ether_pkt_connect, + .pru_disconnect = ether_pkt_disconnect, + .pru_shutdown = ether_pkt_shutdown, + .pru_send = ether_pkt_send, + .pru_control = ether_pkt_control, + .pru_sockaddr = ether_pkt_sockaddr, + .pru_peeraddr = ether_pkt_peeraddr, +}; + +static struct rwlock ether_pcb_lock = RWLOCK_INITIALIZER("ethsocks"); +static struct ether_pcb_list ether_pcbs = TAILQ_HEAD_INITIALIZER(ether_pcbs); + +static int +ether_pkt_nam2spkt(struct sockaddr_pkt **spktp, const struct mbuf *nam) +{ + struct sockaddr_pkt *spkt; + + if (nam->m_len != sizeof(*spkt)) + return (EINVAL); + + spkt = mtod(nam, struct sockaddr_pkt *); + if (spkt->spkt_family != AF_PACKET) + return (EAFNOSUPPORT); + *spktp = spkt; + return (0); +} + + +static int +ether_pkt_ifp(struct ifnet **ifpp, const struct sockaddr_pkt *spkt) +{ + struct ifnet *ifp; + + if (spkt->spkt_ifindex != 0) + ifp = if_get(spkt->spkt_ifindex); + else if (spkt->spkt_ifname[0] != '\0') { + KERNEL_LOCK(); + ifp = if_unit(spkt->spkt_ifname); + KERNEL_UNLOCK(); + } else { + *ifpp = NULL; + return (0); + } + + if (ifp == NULL) + return (ENXIO); + + if (ifp->if_type != IFT_ETHER) { + if_put(ifp); + return (EAFNOSUPPORT); + } + + *ifpp = ifp; + return (0); +} + +static int +ether_pkt_attach(struct socket *so, int proto, int wait) +{ + struct ether_pcb *ep; + int error; + + if (so->so_pcb != NULL) + return (EINVAL); + + error = suser(curproc); + if (error != 0) + return (error); + + error = soreserve(so, MCLBYTES, MCLBYTES); + if (error != 0) + return (error); + + ep = malloc(sizeof(*ep), M_PCB, (wait ? M_WAITOK : M_NOWAIT) | M_ZERO); + if (ep == NULL) + return (ENOMEM); + + rw_init(&ep->ep_lock, "ethsock"); + + so->so_pcb = ep; + ep->ep_socket = so; /* shares a ref with the list */ + + ep->ep_txprio = IF_HDRPRIO_PACKET; + TAILQ_INIT(&ep->ep_groups); + + /* give the ref to the list */ + rw_enter_write(ðer_pcb_lock); + TAILQ_INSERT_TAIL(ðer_pcbs, ep, ep_entry); + rw_exit_write(ðer_pcb_lock); + + return (0); +} + +static int +ether_pkt_detach(struct socket *so) +{ + struct ether_pcb *ep; + struct ether_pcb_group *epg, *nepg; + struct ifnet *ifp; + + soassertlocked(so); + + ep = so->so_pcb; + + /* take the ref from the list */ + rw_enter_write(ðer_pcb_lock); + TAILQ_REMOVE(ðer_pcbs, ep, ep_entry); + rw_exit_write(ðer_pcb_lock); + + so->so_pcb = NULL; /* shares a ref with the list */ + + /* XXX locking */ + TAILQ_FOREACH_SAFE(epg, &ep->ep_groups, epg_entry, nepg) { + ifp = if_get(epg->epg_ifindex); + if (ifp != NULL) { + struct ifreq ifr; + struct sockaddr *sa; + + if_detachhook_del(ifp, &epg->epg_hook); + + memset(&ifr, 0, sizeof(ifr)); + strlcpy(ifr.ifr_name, ifp->if_xname, + sizeof(ifr.ifr_name)); + sa = &ifr.ifr_addr; + sa->sa_family = AF_UNSPEC; + memcpy(sa->sa_data, &epg->epg_addr, ETHER_ADDR_LEN); + + (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr); + } + if_put(ifp); + + TAILQ_REMOVE(&ep->ep_groups, epg, epg_entry); + free(epg, M_PCB, sizeof(*epg)); + } + + free(ep, M_PCB, sizeof(*ep)); + + return (0); +} + +static int +ether_pkt_bind(struct socket *so, struct mbuf *nam, struct proc *p) +{ + struct sockaddr_pkt *spkt; + struct ether_pcb *ep; + struct ether_pcb *epe; + struct ifnet *ifp = NULL; + unsigned int ifindex = 0; + uint16_t etype; + uint64_t laddr; + int error; + + soassertlocked(so); + + error = ether_pkt_nam2spkt(&spkt, nam); + if (error != 0) + return (error); + + etype = ntohs(spkt->spkt_proto); + if (etype != ETHERTYPE_LLDP) + return (EADDRNOTAVAIL); + + ep = so->so_pcb; + if (ep->ep_etype != 0) + return (EINVAL); + + error = ether_pkt_ifp(&ifp, spkt); + if (error != 0) + return (error); + if (ifp != NULL) + ifindex = ifp->if_index; + + laddr = ether_addr_to_e64((struct ether_addr *)spkt->spkt_addr); + + rw_enter_write(ðer_pcb_lock); + TAILQ_FOREACH(epe, ðer_pcbs, ep_entry) { + if (ep == epe) + continue; + + /* XXX check stuff */ + } + + if (error == 0) { + /* serialised by the socket lock */ + ep->ep_etype = etype; + ep->ep_ifindex = ifindex; + ep->ep_laddr = laddr; + } + rw_exit_write(ðer_pcb_lock); + + if_put(ifp); + return (error); +} + +static int +ether_pkt_connect(struct socket *so, struct mbuf *nam) +{ + struct sockaddr_pkt *spkt; + struct ether_pcb *ep; + struct ether_pcb *epe; + struct ifnet *ifp = NULL; + uint64_t faddr; + uint16_t etype; + int error; + + soassertlocked(so); + + error = ether_pkt_nam2spkt(&spkt, nam); + if (error != 0) + return (error); + + etype = ntohs(spkt->spkt_proto); + if (etype != ETHERTYPE_LLDP) + return (EADDRNOTAVAIL); + + faddr = ether_addr_to_e64((struct ether_addr *)spkt->spkt_addr); + if (faddr == 0) + return (EADDRNOTAVAIL); + + error = ether_pkt_ifp(&ifp, spkt); + if (error != 0) + return (error); + if (ifp == NULL) + return (EADDRNOTAVAIL); + + ep = so->so_pcb; + if (ep->ep_etype != 0) { + if (ep->ep_faddr != 0 || + ep->ep_etype != etype) { + error = EISCONN; + goto put; + } + } + if (ep->ep_ifindex != 0) { + if (ep->ep_ifindex != ifp->if_index) { + error = EADDRNOTAVAIL; + goto put; + } + } + + rw_enter_write(ðer_pcb_lock); + TAILQ_FOREACH(epe, ðer_pcbs, ep_entry) { + if (ep == epe) + continue; + /* XXX check stuff */ + } + + if (error == 0) { + /* serialised by the socket lock */ + ep->ep_etype = etype; + ep->ep_ifindex = ifp->if_index; + ep->ep_faddr = faddr; + } + rw_exit_write(ðer_pcb_lock); + +put: + if_put(ifp); + return (error); +} + +static int +ether_pkt_disconnect(struct socket *so) +{ + struct ether_pcb *ep; + + soassertlocked(so); + + ep = so->so_pcb; + if (ep->ep_faddr == 0) + return (ENOTCONN); + + rw_enter_write(ðer_pcb_lock); + ep->ep_ifindex = 0; + ep->ep_etype = 0; + ep->ep_laddr = 0; + ep->ep_faddr = 0; + rw_exit_write(ðer_pcb_lock); + + return (0); +} + +static int +ether_pkt_shutdown(struct socket *so) +{ + soassertlocked(so); + socantsendmore(so); + return (0); +} + +static int +ether_pkt_send(struct socket *so, struct mbuf *m, struct mbuf *nam, + struct mbuf *control) +{ + struct ether_pcb *ep; + int error; + uint16_t etype; + uint64_t laddr; + uint64_t faddr; + struct ifnet *ifp = NULL; + struct arpcom *ac; + struct ether_header *eh; + int txprio; + + soassertlocked_readonly(so); + + ep = so->so_pcb; + KASSERTMSG(ep != NULL, "%s: NULL pcb on socket %p", __func__, so); + txprio = ep->ep_txprio; + + /* XXX get prio out of a cmsg */ + m_freem(control); + + if (nam != NULL) { + struct sockaddr_pkt *spkt; + + error = ether_pkt_nam2spkt(&spkt, nam); + if (error != 0) + goto drop; + + etype = ntohs(spkt->spkt_proto); + if (etype != ETHERTYPE_LLDP) { + error = EADDRNOTAVAIL; + goto drop; + } + + if (ep->ep_faddr != 0) { + error = EISCONN; + goto drop; + } + faddr = ether_addr_to_e64((struct ether_addr *)spkt->spkt_addr); + if (faddr == 0) { + error = EADDRNOTAVAIL; + goto drop; + } + + error = ether_pkt_ifp(&ifp, spkt); + if (error != 0) + goto drop; + if (ifp == NULL) { + ifp = if_get(ep->ep_ifindex); + if (ifp == NULL) { + error = EADDRNOTAVAIL; + goto drop; + } + } else { + if (ep->ep_ifindex != 0 && + ep->ep_ifindex != ifp->if_index) { + error = EADDRNOTAVAIL; + goto drop; + } + } + + if (ep->ep_etype != etype) { + if (ep->ep_etype == 0) { + /* this is cheeky */ + rw_enter_write(ðer_pcb_lock); + ep->ep_etype = etype; + rw_exit_write(ðer_pcb_lock); + } else { + error = EADDRNOTAVAIL; + goto drop; + } + } + } else { + faddr = ep->ep_faddr; + if (faddr == 0) { + error = ENOTCONN; + goto drop; + } + + ifp = if_get(ep->ep_ifindex); + if (ifp == NULL) { + error = ENXIO; + goto drop; + } + + etype = ep->ep_etype; + } + + if (ifp->if_type != IFT_ETHER) { + error = EAFNOSUPPORT; + goto drop; + } + + ac = (struct arpcom *)ifp; + + laddr = ether_addr_to_e64((struct ether_addr *)ac->ac_enaddr); + if (ep->ep_laddr != laddr) { + if (ep->ep_laddr != 0) { + error = EADDRNOTAVAIL; + goto drop; + } + } + + m = m_prepend(m, ETHER_ALIGN + sizeof(*eh), M_NOWAIT); + if (m == NULL) + goto drop; + m_adj(m, ETHER_ALIGN); + + if (txprio != IF_HDRPRIO_PACKET) + m->m_pkthdr.pf.prio = txprio; + + eh = mtod(m, struct ether_header *); + ether_e64_to_addr((struct ether_addr *)eh->ether_dhost, faddr); + ether_e64_to_addr((struct ether_addr *)eh->ether_shost, laddr); + eh->ether_type = htons(etype); + + error = if_enqueue(ifp, m); + m = NULL; + +drop: + if_put(ifp); + m_freem(m); + return (error); +} + +static int +ether_pkt_control(struct socket *so, u_long cmd, caddr_t data, + struct ifnet *ifp) +{ + return (EOPNOTSUPP); +} + +static int +ether_pkt_sockaddr_pkt(struct ether_pcb *ep, struct mbuf *nam, uint64_t addr) +{ + struct sockaddr_pkt *spkt; + struct ifnet *ifp; + + nam->m_len = sizeof(*spkt); + spkt = mtod(nam, struct sockaddr_pkt *); + memset(spkt, 0, sizeof(*spkt)); + spkt->spkt_len = sizeof(*spkt); + spkt->spkt_family = AF_PACKET; + + ether_e64_to_addr((struct ether_addr *)spkt->spkt_addr, addr); + + if (ep->ep_etype) { + spkt->spkt_proto = htons(ep->ep_etype); + spkt->spkt_ifindex = ep->ep_ifindex; + + ifp = if_get(ep->ep_ifindex); + if (ifp != NULL) { + strlcpy(spkt->spkt_ifname, ifp->if_xname, + sizeof(spkt->spkt_ifname)); + } + if_put(ifp); + } + + return (0); +} + +static int +ether_pkt_sockaddr(struct socket *so, struct mbuf *nam) +{ + struct ether_pcb *ep = so->so_pcb; + + return (ether_pkt_sockaddr_pkt(ep, nam, ep->ep_laddr)); +} + +static int +ether_pkt_peeraddr(struct socket *so, struct mbuf *nam) +{ + struct ether_pcb *ep = so->so_pcb; + + return (ether_pkt_sockaddr_pkt(ep, nam, ep->ep_faddr)); +} + +static void +ether_pkt_group_detach(void *arg) +{ + struct ether_pcb_group *epg = arg; + struct ether_pcb *ep = epg->epg_pcb; + struct socket *so = ep->ep_socket; + struct ifnet *ifp; + + ifp = if_get(epg->epg_ifindex); + + /* XXX locking^Wreference counts */ + solock(so); + if (ifp != NULL) + if_detachhook_del(ifp, &epg->epg_hook); + TAILQ_REMOVE(&ep->ep_groups, epg, epg_entry); + sounlock(so); + + if_put(ifp); + free(epg, M_PCB, sizeof(*epg)); +} + +static int +ether_pkt_group(struct socket *so, int optname, struct mbuf *m) +{ + struct packet_mreq *pmr; + struct ifreq ifr; + struct sockaddr *sa; + struct ifnet *ifp; + struct ether_pcb *ep; + struct ether_pcb_group *epg; + u_long cmd; + int error; + + soassertlocked(so); + + if (m->m_len != sizeof(*pmr)) + return (EINVAL); + + pmr = mtod(m, struct packet_mreq *); + if (!ETHER_IS_MULTICAST(pmr->pmr_addr)) + return (EADDRNOTAVAIL); + + if (pmr->pmr_ifindex == 0) { + KERNEL_LOCK(); + ifp = if_unit(pmr->pmr_ifname); + KERNEL_UNLOCK(); + } else + ifp = if_get(pmr->pmr_ifindex); + if (ifp == NULL) + return (ENXIO); + + if (ifp->if_type != IFT_ETHER) { + error = EADDRNOTAVAIL; + goto put; + } + + if (ETHER_IS_BROADCAST(pmr->pmr_addr)) { + error = 0; + goto put; + } + + ep = so->so_pcb; + TAILQ_FOREACH(epg, &ep->ep_groups, epg_entry) { + if (epg->epg_ifindex != ifp->if_index) + continue; + if (!ETHER_IS_EQ(epg->epg_addr, pmr->pmr_addr)) + continue; + + break; + } + + switch (optname) { + case PACKET_ADD_MEMBERSHIP: + if (epg != NULL) { + error = EISCONN; + goto put; + } + epg = malloc(sizeof(*epg), M_PCB, M_DONTWAIT); + if (epg == NULL) { + error = ENOMEM; + goto put; + } + + epg->epg_pcb = ep; + epg->epg_ifindex = ifp->if_index; + memcpy(&epg->epg_addr, pmr->pmr_addr, sizeof(epg->epg_addr)); + task_set(&epg->epg_hook, ether_pkt_group_detach, epg); + + cmd = SIOCADDMULTI; + break; + case PACKET_DEL_MEMBERSHIP: + if (epg == NULL) { + error = ENOTCONN; + goto put; + } + cmd = SIOCDELMULTI; + break; + default: + panic("%s: unexpected optname %d", __func__, optname); + /* NOTREACHED */ + } + + memset(&ifr, 0, sizeof(ifr)); + strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name)); + sa = &ifr.ifr_addr; + sa->sa_family = AF_UNSPEC; + memcpy(sa->sa_data, pmr->pmr_addr, ETHER_ADDR_LEN); + + /* XXX soref? */ + /* this could lead to multiple epgs for the same if/group */ + sounlock(so); + KERNEL_LOCK(); + NET_LOCK(); + error = (*ifp->if_ioctl)(ifp, cmd, (caddr_t)&ifr); + NET_UNLOCK(); + KERNEL_UNLOCK(); + solock(so); + + switch (optname) { + case PACKET_ADD_MEMBERSHIP: + if (error != 0) { + free(epg, M_PCB, sizeof(*epg)); + break; + } + + TAILQ_INSERT_TAIL(&ep->ep_groups, epg, epg_entry); + if_detachhook_add(ifp, &epg->epg_hook); + break; + case PACKET_DEL_MEMBERSHIP: + if (error != 0) + break; + + if_detachhook_del(ifp, &epg->epg_hook); + TAILQ_REMOVE(&ep->ep_groups, epg, epg_entry); + free(epg, M_PCB, sizeof(*epg)); + break; + } +put: + if_put(ifp); + + return (error); +} + +#define ETHER_PCB_OPTM(_v) (1ULL << (_v)) + +#define ETHER_PCB_OPTS \ + ETHER_PCB_OPTM(PACKET_RECVDSTADDR) | \ + ETHER_PCB_OPTM(PACKET_RECVPRIO) + +static int +ether_pkt_setopt(struct ether_pcb *ep, int optname, struct mbuf *m) +{ + uint64_t optm = ETHER_PCB_OPTM(optname); + int opt; + + if (!ISSET(ETHER_PCB_OPTS, optm)) + return (ENOPROTOOPT); + + if (m->m_len != sizeof(opt)) + return (EINVAL); + + opt = *mtod(m, int *); + if (opt) + SET(ep->ep_options, optm); + else + CLR(ep->ep_options, optm); + + return (0); +} + +static int +ether_pkt_setsockopt(struct socket *so, int optname, struct mbuf *m) +{ + struct ether_pcb *ep = so->so_pcb; + int error = ENOPROTOOPT; + int v; + + if (optname >= 0 && optname < 64) + return (ether_pkt_setopt(ep, optname, m)); + + switch (optname) { + case PACKET_ADD_MEMBERSHIP: + case PACKET_DEL_MEMBERSHIP: + error = ether_pkt_group(so, optname, m); + break; + case PACKET_SENDPRIO: + if (m->m_len != sizeof(v)) { + error = EINVAL; + break; + } + v = *mtod(m, int *); + error = if_txhprio_l2_check(v); + if (error != 0) + break; + ep->ep_txprio = v; + break; + + default: + break; + } + + return (error); +} + +static int +ether_pkt_getopt(struct ether_pcb *ep, int optname, struct mbuf *m) +{ + uint64_t optm = ETHER_PCB_OPTM(optname); + int opt; + + if (!ISSET(ETHER_PCB_OPTS, optm)) + return (ENOPROTOOPT); + + opt = !!ISSET(ep->ep_options, optm); + + m->m_len = sizeof(opt); + *mtod(m, int *) = opt; + + return (0); +} + +static int +ether_pkt_getsockopt(struct socket *so, int optname, struct mbuf *m) +{ + struct ether_pcb *ep = so->so_pcb; + int error = ENOPROTOOPT; + + if (optname >= 0 && optname < 64) + return (ether_pkt_getopt(ep, optname, m)); + + switch (optname) { + default: + break; + } + + return (error); +} + +int +ether_pkt_ctloutput(int op, struct socket *so, int level, int optname, + struct mbuf *m) +{ + int error = 0; + + if (level != IFT_ETHER) + return (EINVAL); + + switch (op) { + case PRCO_SETOPT: + error = ether_pkt_setsockopt(so, optname, m); + break; + case PRCO_GETOPT: + error = ether_pkt_getsockopt(so, optname, m); + break; + } + + return (error); +} + +static struct mbuf * +ether_pkt_cmsg(struct mbuf *cmsgs, const void *data, size_t datalen, + int type, int level) +{ + struct mbuf *cm; + + cm = sbcreatecontrol(data, datalen, type, level); + if (cm != NULL) { + cm->m_next = cmsgs; + cmsgs = cm; + } + + return (cmsgs); +} + +static void +ether_pkt_recv(struct socket *so, struct mbuf *m0, + const struct sockaddr_pkt *spkt) +{ + struct ether_pcb *ep = so->so_pcb; + struct mbuf *m; + struct mbuf *cmsgs = NULL; + int ok; + + /* offset 0 and m_adj cos sbappendaddr needs m_pkthdr.len */ + m = m_copym(m0, 0, M_COPYALL, M_DONTWAIT); + if (m == NULL) + return; + m_adj(m, sizeof(struct ether_header)); + + if (ISSET(ep->ep_options, ETHER_PCB_OPTM(PACKET_RECVPRIO))) { + int rxprio = m0->m_pkthdr.pf.prio; + cmsgs = ether_pkt_cmsg(cmsgs, &rxprio, sizeof(rxprio), + PACKET_RECVPRIO, IFT_ETHER); + } + + if (ISSET(ep->ep_options, ETHER_PCB_OPTM(PACKET_RECVDSTADDR))) { + struct ether_header *eh = mtod(m0, struct ether_header *); + cmsgs = ether_pkt_cmsg(cmsgs, eh->ether_dhost, ETHER_ADDR_LEN, + PACKET_RECVDSTADDR, IFT_ETHER); + } + + if (ISSET(so->so_options, SO_TIMESTAMP)) { + struct timeval tv; + m_microtime(m0, &tv); + cmsgs = ether_pkt_cmsg(cmsgs, &tv, sizeof(tv), + SCM_TIMESTAMP, SOL_SOCKET); + } + + mtx_enter(&so->so_rcv.sb_mtx); + ok = sbappendaddr(so, &so->so_rcv, (struct sockaddr *)spkt, m, cmsgs); + mtx_leave(&so->so_rcv.sb_mtx); + + if (!ok) { + m_freem(m); + m_freem(cmsgs); + return; + } + + sorwakeup(so); +} + +static struct mbuf * +ether_pkt_input(struct ifnet *ifp, struct mbuf *m, uint64_t dst, uint16_t etype) +{ + struct sockaddr_pkt spkt = { .spkt_family = AF_UNSPEC }; + struct ether_pcb *ep; + struct ether_header *eh; + uint64_t src; + + if (TAILQ_EMPTY(ðer_pcbs)) + return (m); + + eh = mtod(m, struct ether_header *); + src = ether_addr_to_e64((struct ether_addr *)eh->ether_shost); + if (src == 0) + return (m); + + rw_enter_read(ðer_pcb_lock); + TAILQ_FOREACH(ep, ðer_pcbs, ep_entry) { + if (ep->ep_etype == 0) /* bound? */ + continue; + if (ep->ep_etype != etype) + continue; + if (ep->ep_ifindex != 0) { + if (ep->ep_ifindex != ifp->if_index) + continue; + } + + if (ep->ep_laddr != 0) { + if (ep->ep_laddr != dst) + continue; + } + /* ether_input says dst is valid for local delivery */ + + if (ep->ep_faddr != 0) { /* connected? */ + if (ep->ep_faddr != src) + continue; + } + + if (spkt.spkt_family == AF_UNSPEC) { + spkt.spkt_len = sizeof(spkt); + spkt.spkt_family = AF_PACKET; + spkt.spkt_proto = htons(etype); + spkt.spkt_ifindex = ifp->if_index; + ether_e64_to_addr((struct ether_addr *)spkt.spkt_addr, + src); + strlcpy(spkt.spkt_ifname, ifp->if_xname, + sizeof(spkt.spkt_ifname)); + } + + ether_pkt_recv(ep->ep_socket, m, &spkt); + } + rw_exit_read(ðer_pcb_lock); + + return (m); +} + +#endif /* NAF_PACKET */ Index: net/packet.h =================================================================== RCS file: net/packet.h diff -N net/packet.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ net/packet.h 26 Nov 2024 09:28:18 -0000 @@ -0,0 +1,51 @@ +/* $OpenBSD$ */ + +/* + * Copyright (c) 2024 David Gwynne + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef _NET_PACKET_H_ +#define _NET_PACKET_H_ + +/* + * link sockets + */ + +#define PACKET_ADDRLEN 8 /* big enough for Ethernet */ +#define PACKET_DATALEN 32 + +struct sockaddr_pkt { + uint8_t spkt_len; + uint8_t spkt_family; /* AF_PACKET */ + uint16_t spkt_proto; + unsigned int spkt_ifindex; + uint8_t spkt_addr[PACKET_ADDRLEN]; + char spkt_ifname[IFNAMSIZ]; + uint8_t spkt_data[PACKET_DATALEN]; +}; + +#define PACKET_RECVDSTADDR 0 /* int */ +#define PACKET_RECVPRIO 1 /* int */ +#define PACKET_ADD_MEMBERSHIP 64 /* struct link_mreq */ +#define PACKET_DEL_MEMBERSHIP 65 /* struct link_mreq */ +#define PACKET_SENDPRIO 66 /* int: IF_HDRPRIO_{MIN-MAX,PACKET} */ + +struct packet_mreq { + unsigned int pmr_ifindex; + uint8_t pmr_addr[PACKET_ADDRLEN]; + char pmr_ifname[IFNAMSIZ]; +}; + +#endif /* _NET_PACKET_H_ */ Index: sys/socket.h =================================================================== RCS file: /cvs/src/sys/sys/socket.h,v diff -u -p -r1.105 socket.h --- sys/socket.h 3 Sep 2022 21:13:48 -0000 1.105 +++ sys/socket.h 26 Nov 2024 09:28:18 -0000 @@ -200,7 +200,8 @@ struct splice { #define AF_MPLS 33 /* MPLS */ #define pseudo_AF_PFLOW 34 /* pflow */ #define pseudo_AF_PIPEX 35 /* PIPEX */ -#define AF_MAX 36 +#define AF_PACKET 36 /* packet (Ethernet) sockets */ +#define AF_MAX 37 /* * Structure used by kernel to store most @@ -284,6 +285,7 @@ struct sockproto { #define PF_MPLS AF_MPLS #define PF_PFLOW pseudo_AF_PFLOW #define PF_PIPEX pseudo_AF_PIPEX +#define PF_PACKET AF_PACKET #define PF_MAX AF_MAX /*