Index: net/if_aggr.c =================================================================== RCS file: net/if_aggr.c diff -N net/if_aggr.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ net/if_aggr.c 27 Jun 2019 03:14:16 -0000 @@ -0,0 +1,2675 @@ +/* $OpenBSD */ + +/* + * Copyright (c) 2019 The University of Queensland + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * This driver implements 802.1AX Link Aggregation (formerly 802.3ad) + * + * The specification describes systems with multiple ports that that + * can dynamically form aggregations. The relationships between ports + * and aggregations is such that arbitrary ports connected to ports + * on other systems may move between aggregations, and there can be + * as many aggregations as ports. An aggregation in this model is + * effectively an interface, and becomes the point that Ethernet traffic + * enters and leaves the system. The spec also contains a description + * of the Link Aggregation Control Protocol (LACP) for use on the wire, + * and how to process it and select ports and aggregations based on + * it. + * + * This driver implements a simplified or constrained model where each + * aggr(4) interface is effectively an independent system, and will + * only support one aggregation. This supports the use of the kernel + * interface as a static entity that is created and configured once, + * and has the link "come up" when that one aggregation is selected + * by the LACP protocol. + */ + +/* + * This code was written by David Gwynne as part + * of the Information Technology Infrastructure Group (ITIG) in the + * Faculty of Engineering, Architecture and Information Technology + * (EAIT). + */ + +/* + * TODO: + * + * - add locking + * - figure out the Ready_N and Ready logic + */ + +#include "bpfilter.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +#include +#include + +#include /* if_trunk.h uses siphash bits */ +#include + +#if NBPFILTER > 0 +#include +#endif + +/* + * Link Aggregation Control Protocol (LACP) + */ + +struct ether_slowproto_hdr { + uint8_t sph_subtype; + uint8_t sph_version; +} __packed; + +#define SLOWPROTOCOLS_SUBTYPE_LACP 1 +#define SLOWPROTOCOLS_SUBTYPE_LACP_MARKER \ + 2 + +#define LACP_VERSION 1 + +#define LACP_FAST_PERIODIC_TIME 1 +#define LACP_SLOW_PERIODIC_TIME 30 +#define LACP_TIMEOUT_FACTOR 3 +#define LACP_AGGREGATION_WAIT_TIME 2 + +#define LACP_TX_MACHINE_RATE 3 /* per LACP_FAST_PERIODIC_TIME */ + +#define LACP_ADDR_C_BRIDGE { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 } +#define LACP_ADDR_SLOW { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x02 } +#define LACP_ADDR_NON_TPMR_BRIDGE { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x03 } + +struct lacp_tlv_hdr { + uint8_t lacp_tlv_type; + uint8_t lacp_tlv_length; +} __packed __aligned(2); + +/* LACP TLV types */ + +#define LACP_T_TERMINATOR 0x00 +#define LACP_T_ACTOR 0x01 +#define LACP_T_PARTNER 0x02 +#define LACP_T_COLLECTOR 0x03 + +/* LACPv2 TLV types */ + +#define LACP_T_PORT_ALGORITHM 0x04 +#define LACP_T_PORT_CONVERSATION_ID_DIGEST \ + 0x05 +#define LACP_T_PORT_CONVERSATION_MASK 0x06 +#define LACP_T_PORT_CONVERSATION_SERVICE_MAPPING \ + 0x0a + +struct lacp_sysid { + uint16_t lacp_sysid_priority; + uint8_t lacp_sysid_mac[ETHER_ADDR_LEN]; +} __packed __aligned(2); + +struct lacp_portid { + uint16_t lacp_portid_priority; + uint16_t lacp_portid_number; +} __packed __aligned(2); + +struct lacp_port_info { + struct lacp_sysid lacp_sysid; + uint16_t lacp_key; + struct lacp_portid lacp_portid; + uint8_t lacp_state; + uint8_t lacp_reserved[3]; +} __packed __aligned(2); + +#define LACP_STATE_ACTIVITY (1 << 0) +#define LACP_STATE_TIMEOUT (1 << 1) +#define LACP_STATE_AGGREGATION (1 << 2) +#define LACP_STATE_SYNC (1 << 3) +#define LACP_STATE_COLLECTING (1 << 4) +#define LACP_STATE_DISTRIBUTING (1 << 5) +#define LACP_STATE_DEFAULTED (1 << 6) +#define LACP_STATE_EXPIRED (1 << 7) + +struct lacp_collector_info { + uint16_t lacp_maxdelay; + uint8_t lacp_reserved[12]; +} __packed __aligned(2); + +struct lacp_du { + struct ether_slowproto_hdr + lacp_du_sph; + struct lacp_tlv_hdr lacp_actor_info_tlv; + struct lacp_port_info lacp_actor_info; + struct lacp_tlv_hdr lacp_partner_info_tlv; + struct lacp_port_info lacp_partner_info; + struct lacp_tlv_hdr lacp_collector_info_tlv; + struct lacp_collector_info + lacp_collector_info; + /* other TLVs go here */ + struct lacp_tlv_hdr lacp_terminator; + uint8_t lacp_pad[50]; +} __packed __aligned(2); + +enum lacp_rxm_state { + LACP_RXM_S_BEGIN = 0, + LACP_RXM_S_INITIALIZE, + LACP_RXM_S_PORT_DISABLED, + LACP_RXM_S_EXPIRED, + LACP_RXM_S_LACP_DISABLED, + LACP_RXM_S_DEFAULTED, + LACP_RXM_S_CURRENT, +}; + +enum lacp_rxm_event { + LACP_RXM_E_BEGIN, + LACP_RXM_E_PORT_MOVED, + LACP_RXM_E_NOT_PORT_MOVED, + LACP_RXM_E_PORT_ENABLED, + LACP_RXM_E_NOT_PORT_ENABLED, + LACP_RXM_E_LACP_ENABLED, + LACP_RXM_E_NOT_LACP_ENABLED, + LACP_RXM_E_LACPDU, /* CtrlMuxN:M_UNITDATA.indication(LACPDU) */ + LACP_RXM_E_TIMER_EXPIRED, /* current_while_timer expired */ +}; + +enum lacp_mux_state { + LACP_MUX_S_BEGIN = 0, + LACP_MUX_S_DETACHED, + LACP_MUX_S_WAITING, + LACP_MUX_S_ATTACHED, + LACP_MUX_S_DISTRIBUTING, + LACP_MUX_S_COLLECTING, +}; + +enum lacp_mux_event { + LACP_MUX_E_BEGIN, + LACP_MUX_E_SELECTED, + LACP_MUX_E_STANDBY, + LACP_MUX_E_UNSELECTED, + LACP_MUX_E_READY, + LACP_MUX_E_SYNC, + LACP_MUX_E_NOT_SYNC, + LACP_MUX_E_COLLECTING, + LACP_MUX_E_NOT_COLLECTING, +}; + +/* + * LACP variables + */ + +static const uint8_t lacp_address_slow[ETHER_ADDR_LEN] = LACP_ADDR_SLOW; + +static const char *lacp_rxm_state_names[] = { + "BEGIN", + "INITIALIZE", + "PORT_DISABLED", + "EXPIRED", + "LACP_DISABLED", + "DEFAULTED", + "CURRENT", +}; + +static const char *lacp_rxm_event_names[] = { + "BEGIN", + "port_moved", + "!port_moved", + "port_enabled", + "!port_enabled", + "LACP_Enabled", + "!LACP_Enabled", + "LACPDU", + "current_while_timer expired", +}; + +static const char *lacp_mux_state_names[] = { + "BEGIN", + "DETACHED", + "WAITING", + "ATTACHED", + "DISTRIBUTING", + "COLLECTING", +}; + +static const char *lacp_mux_event_names[] = { + "BEGIN", + "Selected == SELECTED", + "Selected == STANDBY", + "Selected == UNSELECTED", + "Ready", + "Partner.Sync", + "! Partner.Sync", + "Partner.Collecting", + "! Partner.Collecting", +}; + +/* + * aggr interface + */ + +#define AGGR_MAX_PORTS 32 +#define AGGR_MAX_SLOW_PKTS (AGGR_MAX_PORTS * 3) + +struct aggr_multiaddr { + TAILQ_ENTRY(aggr_multiaddr) + m_entry; + unsigned int m_refs; + uint8_t m_addrlo[ETHER_ADDR_LEN]; + uint8_t m_addrhi[ETHER_ADDR_LEN]; + struct sockaddr m_addr; +}; +TAILQ_HEAD(aggr_multiaddrs, aggr_multiaddr); + +struct aggr_softc; + +enum aggr_port_selected { + AGGR_PORT_UNSELECTED, + AGGR_PORT_SELECTED, + AGGR_PORT_STANDBY, +}; + +struct aggr_port { + struct ifnet *p_ifp0; + + uint8_t p_lladdr[ETHER_ADDR_LEN]; + + int (*p_ioctl)(struct ifnet *, u_long, caddr_t); + int (*p_output)(struct ifnet *, struct mbuf *, struct sockaddr *, + struct rtentry *); + + void *p_lcookie; + void *p_dcookie; + + struct aggr_softc *p_aggr; + TAILQ_ENTRY(aggr_port) p_entry; + + unsigned int p_collecting; + unsigned int p_distributing; + TAILQ_ENTRY(aggr_port) p_entry_distributing; + TAILQ_ENTRY(aggr_port) p_entry_muxen; + + /* Partner information */ + enum aggr_port_selected p_muxed; + enum aggr_port_selected p_selected; /* Selected */ + struct lacp_port_info p_partner; +#define p_partner_state p_partner.lacp_state + + uint8_t p_actor_state; + uint8_t p_lacp_timeout; + + struct timeout p_current_while_timer; + struct timeout p_wait_while_timer; + + /* Receive machine */ + enum lacp_rxm_state p_rxm_state; + struct mbuf_queue p_rxm_mq; + struct task p_rxm_task; + + /* Periodic Transmission machine */ + struct timeout p_ptm_tx; + + /* Mux machine */ + enum lacp_mux_state p_mux_state; + + /* Transmit machine */ + int p_txm_log[LACP_TX_MACHINE_RATE]; + unsigned int p_txm_slot; + struct timeout p_txm_ntt; +}; + +TAILQ_HEAD(aggr_port_list, aggr_port); + +struct aggr_map { + struct ifnet *m_ifp0s[AGGR_MAX_PORTS]; +}; + +struct aggr_softc { + struct arpcom sc_ac; +#define sc_if sc_ac.ac_if + unsigned int sc_dead; + unsigned int sc_promisc; + struct ifmedia sc_media; + + struct aggr_multiaddrs sc_multiaddrs; + + unsigned int sc_mix; + + struct aggr_map sc_maps[2]; + unsigned int sc_map_gen; + struct aggr_map *sc_map; + + struct rwlock sc_lock; + struct aggr_port_list sc_ports; + struct aggr_port_list sc_distributing; + struct aggr_port_list sc_muxen; + unsigned int sc_nports; + unsigned int sc_ndistributing; + + struct timeout sc_tick; + + uint8_t sc_lacp_mode; +#define AGGR_LACP_MODE_PASSIVE 0 +#define AGGR_LACP_MODE_ACTIVE 1 + uint8_t sc_lacp_timeout; +#define AGGR_LACP_TIMEOUT_SLOW 0 +#define AGGR_LACP_TIMEOUT_FAST 1 + uint16_t sc_lacp_prio; + uint16_t sc_lacp_port_prio; + + struct lacp_sysid sc_partner_system; + uint16_t sc_partner_key; +}; + +#define DPRINTF(_sc, fmt...) do { \ + if (ISSET((_sc)->sc_if.if_flags, IFF_DEBUG)) \ + printf(fmt); \ +} while (0) + +static const unsigned int aggr_periodic_times[] = { + [AGGR_LACP_TIMEOUT_SLOW] = LACP_SLOW_PERIODIC_TIME, + [AGGR_LACP_TIMEOUT_FAST] = LACP_FAST_PERIODIC_TIME, +}; + +static int aggr_clone_create(struct if_clone *, int); +static int aggr_clone_destroy(struct ifnet *); + +static int aggr_ioctl(struct ifnet *, u_long, caddr_t); +static void aggr_start(struct ifqueue *); +static int aggr_enqueue(struct ifnet *, struct mbuf *); + +static int aggr_media_change(struct ifnet *); +static void aggr_media_status(struct ifnet *, struct ifmediareq *); + +static int aggr_up(struct aggr_softc *); +static int aggr_down(struct aggr_softc *); +static int aggr_iff(struct aggr_softc *); + +static void aggr_p_linkch(void *); +static void aggr_p_detach(void *); +static int aggr_p_ioctl(struct ifnet *, u_long, caddr_t); +static int aggr_p_output(struct ifnet *, struct mbuf *, + struct sockaddr *, struct rtentry *); + +static int aggr_get_trunk(struct aggr_softc *, struct trunk_reqall *); +static int aggr_set_options(struct aggr_softc *, + const struct trunk_opts *); +static int aggr_get_options(struct aggr_softc *, struct trunk_opts *); +static int aggr_set_lladdr(struct aggr_softc *, const struct ifreq *); +static void aggr_p_dtor(struct aggr_softc *, struct aggr_port *, + const char *); +static int aggr_p_setlladdr(struct aggr_port *, const uint8_t *); +static int aggr_add_port(struct aggr_softc *, + const struct trunk_reqport *); +static int aggr_get_port(struct aggr_softc *, struct trunk_reqport *); +static int aggr_del_port(struct aggr_softc *, + const struct trunk_reqport *); +static int aggr_group(struct aggr_softc *, struct aggr_port *, u_long); +static int aggr_multi(struct aggr_softc *, struct aggr_port *, + const struct aggr_multiaddr *, u_long); +static uint32_t aggr_hardmtu(struct aggr_softc *); +static void aggr_set_lacp_mode(struct aggr_softc *, int); +static void aggr_set_lacp_timeout(struct aggr_softc *, int); +static int aggr_multi_add(struct aggr_softc *, struct ifreq *); +static int aggr_multi_del(struct aggr_softc *, struct ifreq *); + +static void aggr_map(struct aggr_softc *); + +static void aggr_current_while_timer(void *); +static void aggr_wait_while_timer(void *); +static void aggr_rx(void *); +static void aggr_rxm_ev(struct aggr_softc *, struct aggr_port *, + enum lacp_rxm_event, const struct lacp_du *); +#define aggr_rxm(_sc, _p, _ev) \ + aggr_rxm_ev((_sc), (_p), (_ev), NULL) +#define aggr_rxm_lacpdu(_sc, _p, _lacpdu) \ + aggr_rxm_ev((_sc), (_p), LACP_RXM_E_LACPDU, (_lacpdu)) + +static void aggr_mux(struct aggr_softc *, struct aggr_port *, + enum lacp_mux_event); +static int aggr_mux_ev(struct aggr_softc *, struct aggr_port *, + enum lacp_mux_event, int *); + +static void aggr_set_partner_timeout(struct aggr_port *, int); + +static void aggr_ptm_tx(void *); + +static void aggr_transmit_machine(void *); +static void aggr_ntt(struct aggr_port *); + +static void aggr_set_selected(struct aggr_port *, enum aggr_port_selected, + enum lacp_mux_event); +static void aggr_unselected(struct aggr_port *); + +static void aggr_selection_logic(struct aggr_softc *, struct aggr_port *); + +#define ether_cmp(_a, _b) memcmp((_a), (_b), ETHER_ADDR_LEN) +#define ether_is_eq(_a, _b) (ether_cmp((_a), (_b)) == 0) +#define ether_is_slow(_a) ether_is_eq((_a), lacp_address_slow) +#define ether_is_zero(_a) ether_is_eq((_a), etheranyaddr) + +static struct if_clone aggr_cloner = + IF_CLONE_INITIALIZER("aggr", aggr_clone_create, aggr_clone_destroy); + +void +aggrattach(int count) +{ + if_clone_attach(&aggr_cloner); +} + +static int +aggr_clone_create(struct if_clone *ifc, int unit) +{ + struct aggr_softc *sc; + struct ifnet *ifp; + + sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO|M_CANFAIL); + if (sc == NULL) + return (ENOMEM); + + sc->sc_mix = arc4random(); + + ifp = &sc->sc_if; + + snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d", + ifc->ifc_name, unit); + + TAILQ_INIT(&sc->sc_multiaddrs); + rw_init(&sc->sc_lock, "aggrlk"); + TAILQ_INIT(&sc->sc_ports); + sc->sc_nports = 0; + TAILQ_INIT(&sc->sc_distributing); + sc->sc_ndistributing = 0; + TAILQ_INIT(&sc->sc_muxen); + + sc->sc_map_gen = 0; + sc->sc_map = NULL; /* no links yet */ + + sc->sc_lacp_mode = AGGR_LACP_MODE_ACTIVE; + sc->sc_lacp_timeout = 0; /* passive */ + sc->sc_lacp_prio = 0x8000; /* medium */ + sc->sc_lacp_port_prio = 0x8000; /* medium */ + + ifmedia_init(&sc->sc_media, 0, aggr_media_change, aggr_media_status); + ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL); + ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO); + + ifp->if_softc = sc; + ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN; + ifp->if_ioctl = aggr_ioctl; + ifp->if_qstart = aggr_start; + ifp->if_enqueue = aggr_enqueue; + ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST | IFF_SIMPLEX; + ifp->if_xflags = IFXF_CLONED | IFXF_MPSAFE; + ifp->if_link_state = LINK_STATE_DOWN; + IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); + ether_fakeaddr(ifp); + + if_counters_alloc(ifp); + if_attach(ifp); + ether_ifattach(ifp); + + ifp->if_llprio = IFQ_MAXPRIO; + + return (0); +} + +static int +aggr_clone_destroy(struct ifnet *ifp) +{ + struct aggr_softc *sc = ifp->if_softc; + struct aggr_port *p; + + NET_LOCK(); + sc->sc_dead = 1; + + if (ISSET(ifp->if_flags, IFF_RUNNING)) + aggr_down(sc); + NET_UNLOCK(); + + ether_ifdetach(ifp); + if_detach(ifp); + + /* last ref, no need to lock. aggr_p_dtor locks anyway */ + while ((p = TAILQ_FIRST(&sc->sc_ports)) != NULL) + aggr_p_dtor(sc, p, "destroy"); + + free(sc, M_DEVBUF, sizeof(*sc)); + + return (0); +} + +/* + * LACP_Enabled + */ +static inline int +aggr_lacp_enabled(struct aggr_softc *sc) +{ + struct ifnet *ifp = &sc->sc_if; + return (ISSET(ifp->if_flags, IFF_RUNNING)); +} + +/* + * port_enabled + */ +static int +aggr_port_enabled(struct aggr_port *p) +{ + struct ifnet *ifp0 = p->p_ifp0; + + if (!ISSET(ifp0->if_flags, IFF_RUNNING)) + return (0); + + switch (ifp0->if_link_state) { + case LINK_STATE_UNKNOWN: + case LINK_STATE_UP: + case LINK_STATE_FULL_DUPLEX: + break; + default: + return (0); + } + + return (1); +} + +/* + * port_moved + * + * This variable is set to TRUE if the Receive machine for an Aggregation + * Port is in the PORT_DISABLED state, and the combination of + * Partner_Oper_System and Partner_Oper_Port_Number in use by that + * Aggregation Port has been received in an incoming LACPDU on a + * different Aggregation Port. This variable is set to FALSE once the + * INITIALIZE state of the Receive machine has set the Partner information + * for the Aggregation Port to administrative default values. + * + * Value: Boolean +*/ +static int +aggr_port_moved(struct aggr_softc *sc, struct aggr_port *p) +{ + return (0); +} + +static void +aggr_transmit(struct aggr_softc *sc, const struct aggr_map *map, struct mbuf *m) +{ + struct ifnet *ifp = &sc->sc_if; + struct ifnet *ifp0; + uint16_t flow = 0; + +#if NBPFILTER > 0 + { + caddr_t if_bpf = ifp->if_bpf; + if (if_bpf) + bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_OUT); + } +#endif + + if (ISSET(m->m_pkthdr.ph_flowid, M_FLOWID_VALID)) + flow = m->m_pkthdr.ph_flowid; + + ifp0 = map->m_ifp0s[flow % AGGR_MAX_PORTS]; + + if (if_enqueue(ifp0, m) != 0) + counters_inc(ifp->if_counters, ifc_oerrors); +} + +static int +aggr_enqueue(struct ifnet *ifp, struct mbuf *m) +{ + struct aggr_softc *sc; + const struct aggr_map *map; + int error = 0; + + if (!ifq_is_priq(&ifp->if_snd)) + return (if_enqueue_ifq(ifp, m)); + + sc = ifp->if_softc; + + smr_read_enter(); + map = SMR_PTR_GET(&sc->sc_map); + if (__predict_false(map == NULL)) { + m_freem(m); + error = ENETDOWN; + } else { + counters_pkt(ifp->if_counters, + ifc_opackets, ifc_obytes, m->m_pkthdr.len); + aggr_transmit(sc, map, m); + } + smr_read_leave(); + + return (error); +} + +static void +aggr_start(struct ifqueue *ifq) +{ + struct ifnet *ifp = ifq->ifq_if; + struct aggr_softc *sc = ifp->if_softc; + const struct aggr_map *map; + + smr_read_enter(); + map = SMR_PTR_GET(&sc->sc_map); + if (__predict_false(map == NULL)) + ifq_purge(ifq); + else { + struct mbuf *m; + + while ((m = ifq_dequeue(ifq)) != NULL) + aggr_transmit(sc, map, m); + } + smr_read_leave(); +} + +static inline int +aggr_eh_is_slow(const struct ether_header *eh) +{ + return (ether_is_slow(eh->ether_dhost) && + eh->ether_type == htons(ETHERTYPE_SLOW)); +} + +static int +aggr_input(struct ifnet *ifp0, struct mbuf *m, void *cookie) +{ + struct ether_header *eh; + struct aggr_port *p = cookie; + struct aggr_softc *sc = p->p_aggr; + struct ifnet *ifp = &sc->sc_if; + int hlen = sizeof(*eh); + + if (!ISSET(ifp->if_flags, IFF_RUNNING)) + goto drop; + + eh = mtod(m, struct ether_header *); + if (!ISSET(m->m_flags, M_VLANTAG) && + __predict_false(aggr_eh_is_slow(eh))) { + struct ether_slowproto_hdr *sph; + + hlen += sizeof(*sph); + if (m->m_len < hlen) { + m = m_pullup(m, hlen); + if (m == NULL) { + /* short++ */ + return (1); + } + eh = mtod(m, struct ether_header *); + } + + sph = (struct ether_slowproto_hdr *)(eh + 1); + switch (sph->sph_subtype) { + case SLOWPROTOCOLS_SUBTYPE_LACP: + case SLOWPROTOCOLS_SUBTYPE_LACP_MARKER: + if (mq_enqueue(&p->p_rxm_mq, m) == 0) + task_add(systq, &p->p_rxm_task); + return (1); + default: + break; + } + } + + if (__predict_false(!p->p_collecting)) + goto drop; + + if (!ISSET(m->m_pkthdr.ph_flowid, M_FLOWID_VALID)) { + m->m_pkthdr.ph_flowid = M_FLOWID_VALID | + (ifp0->if_index ^ sc->sc_mix); + } + + if_vinput(ifp, m); + + return (1); + +drop: + m_freem(m); + return (1); +} + +static int +aggr_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) +{ + struct aggr_softc *sc = ifp->if_softc; + struct ifreq *ifr = (struct ifreq *)data; + int error = 0; + + if (sc->sc_dead) + return (ENXIO); + + switch (cmd) { + case SIOCSIFADDR: + break; + + case SIOCSIFFLAGS: + if (ISSET(ifp->if_flags, IFF_UP)) { + if (!ISSET(ifp->if_flags, IFF_RUNNING)) + error = aggr_up(sc); + else + error = ENETRESET; + } else { + if (ISSET(ifp->if_flags, IFF_RUNNING)) + error = aggr_down(sc); + } + break; + + case SIOCSIFLLADDR: + error = aggr_set_lladdr(sc, ifr); + break; + + case SIOCGTRUNK: + error = aggr_get_trunk(sc, (struct trunk_reqall *)data); + break; + + case SIOCSTRUNKOPTS: + error = suser(curproc); + if (error != 0) + break; + + error = aggr_set_options(sc, (struct trunk_opts *)data); + break; + + case SIOCGTRUNKOPTS: + error = aggr_get_options(sc, (struct trunk_opts *)data); + break; + + case SIOCGTRUNKPORT: + error = aggr_get_port(sc, (struct trunk_reqport *)data); + break; + case SIOCSTRUNKPORT: + error = suser(curproc); + if (error != 0) + break; + + error = aggr_add_port(sc, (struct trunk_reqport *)data); + break; + case SIOCSTRUNKDELPORT: + error = suser(curproc); + if (error != 0) + break; + + error = aggr_del_port(sc, (struct trunk_reqport *)data); + break; + + case SIOCADDMULTI: + error = aggr_multi_add(sc, ifr); + break; + case SIOCDELMULTI: + error = aggr_multi_del(sc, ifr); + break; + + case SIOCSIFMEDIA: + error = EOPNOTSUPP; + break; + case SIOCGIFMEDIA: + error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd); + break; + + default: + error = ether_ioctl(ifp, &sc->sc_ac, cmd, data); + break; + } + + if (error == ENETRESET) + error = aggr_iff(sc); + + return (error); +} + +static int +aggr_get_trunk(struct aggr_softc *sc, struct trunk_reqall *ra) +{ + struct ifnet *ifp = &sc->sc_if; + struct trunk_reqport rp; + struct aggr_port *p; + size_t size = ra->ra_size; + caddr_t ubuf = (caddr_t)ra->ra_port; + struct lacp_opreq *req; + uint8_t state = 0; + int error = 0; + + if (sc->sc_lacp_mode == AGGR_LACP_MODE_ACTIVE) + SET(state, LACP_STATE_ACTIVITY); + if (sc->sc_lacp_timeout == AGGR_LACP_TIMEOUT_FAST) + SET(state, LACP_STATE_TIMEOUT); + + ra->ra_proto = TRUNK_PROTO_LACP; + memset(&ra->ra_psc, 0, sizeof(ra->ra_psc)); + + /* + * aggr(4) does not support Individual links so don't bother + * with portprio, portno, and state, as per the spec. + */ + + req = &ra->ra_lacpreq; + req->actor_prio = sc->sc_lacp_prio; + CTASSERT(sizeof(req->actor_mac) == sizeof(sc->sc_ac.ac_enaddr)); + memcpy(req->actor_mac, &sc->sc_ac.ac_enaddr, sizeof(req->actor_mac)); + req->actor_key = ifp->if_index; + req->actor_state = state; + + req->partner_prio = ntohs(sc->sc_partner_system.lacp_sysid_priority); + CTASSERT(sizeof(req->partner_mac) == + sizeof(sc->sc_partner_system.lacp_sysid_mac)); + memcpy(req->partner_mac, sc->sc_partner_system.lacp_sysid_mac, + sizeof(req->partner_mac)); + req->partner_key = ntohs(sc->sc_partner_key); + + ra->ra_ports = sc->sc_nports; + TAILQ_FOREACH(p, &sc->sc_ports, p_entry) { + struct ifnet *ifp0; + struct lacp_opreq *opreq; + + if (size < sizeof(rp)) + break; + + ifp0 = p->p_ifp0; + + CTASSERT(sizeof(rp.rp_ifname) == sizeof(ifp->if_xname)); + CTASSERT(sizeof(rp.rp_portname) == sizeof(ifp0->if_xname)); + + memset(&rp, 0, sizeof(rp)); + memcpy(rp.rp_ifname, ifp->if_xname, sizeof(rp.rp_ifname)); + memcpy(rp.rp_portname, ifp0->if_xname, sizeof(rp.rp_portname)); + + if (p->p_collecting) + SET(rp.rp_flags, TRUNK_PORT_COLLECTING); + if (p->p_distributing) + SET(rp.rp_flags, TRUNK_PORT_DISTRIBUTING); + if (!aggr_port_enabled(p)) + SET(rp.rp_flags, TRUNK_PORT_DISABLED); + + opreq = &rp.rp_lacpreq; + opreq->actor_state = state | p->p_actor_state; + opreq->partner_state = p->p_partner_state; + + error = copyout(&rp, ubuf, sizeof(rp)); + if (error != 0) + break; + + ubuf += sizeof(rp); + size -= sizeof(rp); + } + + return (error); +} + +static int +aggr_get_options(struct aggr_softc *sc, struct trunk_opts *tro) +{ + struct lacp_adminopts *opt = &tro->to_lacpopts; + + if (tro->to_proto != TRUNK_PROTO_LACP) + return (EPROTONOSUPPORT); + + opt->lacp_mode = sc->sc_lacp_mode; + opt->lacp_timeout = sc->sc_lacp_timeout; + opt->lacp_prio = sc->sc_lacp_prio; + opt->lacp_portprio = sc->sc_lacp_port_prio; + opt->lacp_ifqprio = sc->sc_if.if_llprio; + + return (0); +} + +static int +aggr_set_options(struct aggr_softc *sc, const struct trunk_opts *tro) +{ + const struct lacp_adminopts *opt = &tro->to_lacpopts; + + if (tro->to_proto != TRUNK_PROTO_LACP) + return (EPROTONOSUPPORT); + + switch (tro->to_opts) { + case TRUNK_OPT_LACP_MODE: + switch (opt->lacp_mode) { + case AGGR_LACP_MODE_PASSIVE: + case AGGR_LACP_MODE_ACTIVE: + break; + default: + return (EINVAL); + } + + aggr_set_lacp_mode(sc, opt->lacp_mode); + break; + + case TRUNK_OPT_LACP_TIMEOUT: + if (opt->lacp_timeout > nitems(aggr_periodic_times) || + aggr_periodic_times[opt->lacp_timeout] == 0) + return (EINVAL); + + aggr_set_lacp_timeout(sc, opt->lacp_timeout); + break; + + case TRUNK_OPT_LACP_SYS_PRIO: + if (opt->lacp_prio == 0) + return (EINVAL); + + sc->sc_lacp_prio = opt->lacp_prio; + break; + + case TRUNK_OPT_LACP_PORT_PRIO: + if (opt->lacp_portprio == 0) + return (EINVAL); + + sc->sc_lacp_port_prio = opt->lacp_portprio; + break; + + default: + return (ENODEV); + } + + return (0); +} + +static int +aggr_add_port(struct aggr_softc *sc, const struct trunk_reqport *rp) +{ + struct ifnet *ifp = &sc->sc_if; + struct ifnet *ifp0; + struct arpcom *ac0; + struct aggr_port *p; + struct aggr_multiaddr *ma; + uint32_t hardmtu; + int error; + + NET_ASSERT_LOCKED(); + if (sc->sc_nports > AGGR_MAX_PORTS) + return (ENOSPC); + + ifp0 = ifunit(rp->rp_portname); + if (ifp0 == NULL || ifp0->if_index == ifp->if_index) + return (EINVAL); + + if (ifp0->if_type != IFT_ETHER) + return (EPROTONOSUPPORT); + + hardmtu = ifp0->if_hardmtu; + if (hardmtu < ifp->if_mtu) + return (ENOBUFS); + if (ifp->if_hardmtu < hardmtu) + hardmtu = ifp->if_hardmtu; + + ac0 = (struct arpcom *)ifp0; + if (ac0->ac_trunkport != NULL) + return (EBUSY); + + /* let's try */ + + ifp0 = if_get(ifp0->if_index); /* get an actual reference */ + if (ifp0 == NULL) { + /* XXX this should never happen */ + return (EINVAL); + } + + p = malloc(sizeof(*p), M_DEVBUF, M_WAITOK|M_ZERO|M_CANFAIL); + if (p == NULL) { + error = ENOMEM; + goto put; + } + + p->p_ifp0 = ifp0; + p->p_aggr = sc; + + CTASSERT(sizeof(p->p_lladdr) == sizeof(ac0->ac_enaddr)); + memcpy(p->p_lladdr, ac0->ac_enaddr, sizeof(p->p_lladdr)); + p->p_ioctl = ifp0->if_ioctl; + p->p_output = ifp0->if_output; + + error = aggr_group(sc, p, SIOCADDMULTI); + if (error != 0) + goto free; + + error = aggr_p_setlladdr(p, sc->sc_ac.ac_enaddr); + if (error != 0) + goto ungroup; + + if (sc->sc_promisc) { + error = ifpromisc(ifp0, 1); + if (error != 0) + goto resetlladdr; + } + + TAILQ_FOREACH(ma, &sc->sc_multiaddrs, m_entry) { + if (aggr_multi(sc, p, ma, SIOCADDMULTI) != 0) { + log(LOG_WARNING, "%s %s: " + "unable to add multicast address", + ifp->if_xname, ifp0->if_xname); + } + } + + p->p_lcookie = hook_establish(ifp0->if_linkstatehooks, 1, + aggr_p_linkch, p); + p->p_dcookie = hook_establish(ifp0->if_detachhooks, 0, + aggr_p_detach, p); + + task_set(&p->p_rxm_task, aggr_rx, p); + mq_init(&p->p_rxm_mq, 3, IPL_NET); + + timeout_set_proc(&p->p_ptm_tx, aggr_ptm_tx, p); + timeout_set_proc(&p->p_txm_ntt, aggr_transmit_machine, p); + timeout_set_proc(&p->p_current_while_timer, + aggr_current_while_timer, p); + timeout_set_proc(&p->p_wait_while_timer, aggr_wait_while_timer, p); + + p->p_muxed = 0; + p->p_collecting = 0; + p->p_distributing = 0; + p->p_selected = AGGR_PORT_UNSELECTED; + p->p_actor_state = LACP_STATE_AGGREGATION; + + /* commit */ + DPRINTF(sc, "%s %s trunkport: creating port\n", + ifp->if_xname, ifp0->if_xname); + + ifp->if_hardmtu = hardmtu; + + TAILQ_INSERT_TAIL(&sc->sc_ports, p, p_entry); + sc->sc_nports++; + + ac0->ac_trunkport = p; + /* make sure p is visible before handlers can run */ + membar_producer(); + ifp0->if_ioctl = aggr_p_ioctl; + ifp0->if_output = aggr_p_output; + if_ih_insert(ifp0, aggr_input, p); + + aggr_mux(sc, p, LACP_MUX_E_BEGIN); + aggr_rxm(sc, p, LACP_RXM_E_BEGIN); + aggr_p_linkch(p); + + return (0); + +resetlladdr: + if (aggr_p_setlladdr(p, p->p_lladdr) != 0) { + log(LOG_WARNING, "%s add %s: unable to reset lladdr", + ifp->if_xname, ifp0->if_xname); + } +ungroup: + if (aggr_group(sc, p, SIOCDELMULTI) != 0) { + log(LOG_WARNING, "%s add %s: " + "unable to remove LACP group address", + ifp->if_xname, ifp0->if_xname); + } +free: + free(p, M_DEVBUF, sizeof(*p)); +put: + if_put(ifp0); + return (error); +} + +static struct aggr_port * +aggr_trunkport(struct aggr_softc *sc, const char *name) +{ + struct aggr_port *p; + + TAILQ_FOREACH(p, &sc->sc_ports, p_entry) { + if (strcmp(p->p_ifp0->if_xname, name) == 0) + return (p); + } + + return (NULL); +} + +static int +aggr_get_port(struct aggr_softc *sc, struct trunk_reqport *rp) +{ + struct aggr_port *p; + + NET_ASSERT_LOCKED(); + p = aggr_trunkport(sc, rp->rp_portname); + if (p == NULL) + return (EINVAL); + + /* XXX */ + + return (0); +} + +static int +aggr_del_port(struct aggr_softc *sc, const struct trunk_reqport *rp) +{ + struct aggr_port *p; + + NET_ASSERT_LOCKED(); + p = aggr_trunkport(sc, rp->rp_portname); + if (p == NULL) + return (EINVAL); + + aggr_p_dtor(sc, p, "del"); + + return (0); +} + +static int +aggr_p_setlladdr(struct aggr_port *p, const uint8_t *addr) +{ + struct ifnet *ifp0 = p->p_ifp0; + struct ifreq ifr; + struct sockaddr *sa; + int error; + + memset(&ifr, 0, sizeof(ifr)); + + CTASSERT(sizeof(ifr.ifr_name) == sizeof(ifp0->if_xname)); + memcpy(ifr.ifr_name, ifp0->if_xname, sizeof(ifr.ifr_name)); + + sa = &ifr.ifr_addr; + + /* wtf is this? */ + sa->sa_len = ETHER_ADDR_LEN; + sa->sa_family = AF_LINK; + CTASSERT(sizeof(sa->sa_data) >= ETHER_ADDR_LEN); + memcpy(sa->sa_data, addr, ETHER_ADDR_LEN); + + error = (*p->p_ioctl)(ifp0, SIOCSIFLLADDR, (caddr_t)&ifr); + switch (error) { + case ENOTTY: + case 0: + break; + default: + return (error); + } + + error = if_setlladdr(ifp0, addr); + if (error != 0) + return (error); + + ifnewlladdr(ifp0); + + return (0); +} + +static int +aggr_p_ioctl(struct ifnet *ifp0, u_long cmd, caddr_t data) +{ + struct arpcom *ac0 = (struct arpcom *)ifp0; + struct aggr_port *p = ac0->ac_trunkport; + int error = 0; + + switch (cmd) { + case SIOCGTRUNKPORT: { + struct trunk_reqport *rp = (struct trunk_reqport *)data; + struct aggr_softc *sc = p->p_aggr; + struct ifnet *ifp = &sc->sc_if; + + if (strncmp(rp->rp_ifname, rp->rp_portname, + sizeof(rp->rp_ifname)) != 0) + return (EINVAL); + + CTASSERT(sizeof(rp->rp_ifname) == sizeof(ifp->if_xname)); + memcpy(rp->rp_ifname, ifp->if_xname, sizeof(rp->rp_ifname)); + break; + } + + case SIOCSIFMTU: + case SIOCSIFLLADDR: + error = EBUSY; + break; + + default: + error = (*p->p_ioctl)(ifp0, cmd, data); + break; + } + + return (error); +} + +static int +aggr_p_output(struct ifnet *ifp0, struct mbuf *m, struct sockaddr *dst, + struct rtentry *rt) +{ + struct arpcom *ac0 = (struct arpcom *)ifp0; + struct aggr_port *p = ac0->ac_trunkport; + + /* restrict transmission to bpf only */ + if ((m_tag_find(m, PACKET_TAG_DLT, NULL) == NULL)) { + m_freem(m); + return (EBUSY); + } + + return ((*p->p_output)(ifp0, m, dst, rt)); +} + +static void +aggr_p_dtor(struct aggr_softc *sc, struct aggr_port *p, const char *op) +{ + struct ifnet *ifp = &sc->sc_if; + struct ifnet *ifp0 = p->p_ifp0; + struct arpcom *ac0 = (struct arpcom *)ifp0; + struct aggr_multiaddr *ma; + + DPRINTF(sc, "%s %s %s: destroying port\n", + ifp->if_xname, ifp0->if_xname, op); + + aggr_rxm(sc, p, LACP_RXM_E_NOT_PORT_ENABLED); + aggr_unselected(p); + + timeout_del(&p->p_ptm_tx); + timeout_del_barrier(&p->p_txm_ntt); /* XXX */ + timeout_del(&p->p_current_while_timer); + timeout_del(&p->p_wait_while_timer); + + if_ih_remove(ifp0, aggr_input, p); + + ac0->ac_trunkport = NULL; + + ifp0->if_ioctl = p->p_ioctl; + ifp0->if_output = p->p_output; + + TAILQ_REMOVE(&sc->sc_ports, p, p_entry); + sc->sc_nports--; + + TAILQ_FOREACH(ma, &sc->sc_multiaddrs, m_entry) { + if (aggr_multi(sc, p, ma, SIOCDELMULTI) != 0) { + log(LOG_WARNING, "%s %s %s: " + "unable to remove multicast address", + ifp->if_xname, op, ifp0->if_xname); + } + } + + if (sc->sc_promisc && ifpromisc(ifp0, 0) != 0) { + log(LOG_WARNING, "%s %s %s: unable to disable promisc", + ifp->if_xname, op, ifp0->if_xname); + } + + if (aggr_p_setlladdr(p, p->p_lladdr) != 0) { + log(LOG_WARNING, "%s %s %s: unable to restore lladdr", + ifp->if_xname, op, ifp0->if_xname); + } + + if (aggr_group(sc, p, SIOCDELMULTI) != 0) { + log(LOG_WARNING, "%s %s %s: " + "unable to remove LACP group address", + ifp->if_xname, op, ifp0->if_xname); + } + + hook_disestablish(ifp0->if_detachhooks, p->p_dcookie); + hook_disestablish(ifp0->if_linkstatehooks, p->p_lcookie); + + if_put(ifp0); + free(p, M_DEVBUF, sizeof(*p)); + + /* XXX this is a pretty ugly place to update this */ + ifp->if_hardmtu = aggr_hardmtu(sc); +} + +static void +aggr_p_detach(void *arg) +{ + struct aggr_port *p = arg; + struct aggr_softc *sc = p->p_aggr; + + aggr_p_dtor(sc, p, "detach"); + + NET_ASSERT_LOCKED(); +} + +static void +aggr_p_linkch(void *arg) +{ + struct aggr_port *p = arg; + struct aggr_softc *sc = p->p_aggr; + + NET_ASSERT_LOCKED(); + + if (aggr_port_enabled(p)) { + aggr_rxm(sc, p, LACP_RXM_E_PORT_ENABLED); + + if (aggr_lacp_enabled(sc)) { + timeout_add_sec(&p->p_ptm_tx, + aggr_periodic_times[AGGR_LACP_TIMEOUT_FAST]); + } + } else { + aggr_rxm(sc, p, LACP_RXM_E_NOT_PORT_ENABLED); + aggr_unselected(p); + timeout_del(&p->p_ptm_tx); + } +} + +static void +aggr_map(struct aggr_softc *sc) +{ + struct ifnet *ifp = &sc->sc_if; + struct aggr_map *map = NULL; + struct aggr_port *p; + unsigned int gen; + unsigned int i; + int link_state = LINK_STATE_DOWN; + + p = TAILQ_FIRST(&sc->sc_distributing); + if (p != NULL) { + gen = sc->sc_map_gen++; + map = &sc->sc_maps[gen % nitems(sc->sc_maps)]; + + for (i = 0; i < nitems(map->m_ifp0s); i++) { + map->m_ifp0s[i] = p->p_ifp0; + + p = TAILQ_NEXT(p, p_entry); + if (p == NULL) + p = TAILQ_FIRST(&sc->sc_distributing); + } + + link_state = LINK_STATE_FULL_DUPLEX; + } + + SMR_PTR_SET_LOCKED(&sc->sc_map, map); + smr_barrier(); + + if (ifp->if_link_state != link_state) { + ifp->if_link_state = link_state; + if_link_state_change(ifp); + } +} + +static void +aggr_current_while_timer(void *arg) +{ + struct aggr_port *p = arg; + struct aggr_softc *sc = p->p_aggr; + + aggr_rxm(sc, p, LACP_RXM_E_TIMER_EXPIRED); +} + +static void +aggr_wait_while_timer(void *arg) +{ + struct aggr_port *p = arg; + struct aggr_softc *sc = p->p_aggr; + + aggr_selection_logic(sc, p); +} + +static void +aggr_start_current_while_timer(struct aggr_port *p, unsigned int t) +{ + timeout_add_sec(&p->p_current_while_timer, + aggr_periodic_times[t] * LACP_TIMEOUT_FACTOR); +} + +static void +aggr_input_lacpdu(struct aggr_port *p, struct mbuf *m) +{ + struct aggr_softc *sc = p->p_aggr; + struct lacp_du *lacpdu; + + if (m->m_len < sizeof(*lacpdu)) { + m = m_pullup(m, sizeof(*lacpdu)); + if (m == NULL) + return; + } + + /* + * In the process of executing the recordPDU function, a Receive + * machine compliant to this standard shall not validate the + * Version Number, TLV_type, or Reserved fields in received + * LACPDUs. The same actions are taken regardless of the values + * received in these fields. A Receive machine may validate + * the Actor_Information_Length, Partner_Information_Length, + * Collector_Information_Length, or Terminator_Length fields. + */ + + lacpdu = mtod(m, struct lacp_du *); + aggr_rxm_lacpdu(sc, p, lacpdu); + + m_freem(m); +} + +static int +aggr_update_selected(struct aggr_softc *sc, struct aggr_port *p, + const struct lacp_du *lacpdu) +{ + const struct lacp_port_info *rpi = &lacpdu->lacp_actor_info; + const struct lacp_port_info *lpi = &p->p_partner; + + if ((rpi->lacp_portid.lacp_portid_number == + lpi->lacp_portid.lacp_portid_number) && + (rpi->lacp_portid.lacp_portid_priority == + lpi->lacp_portid.lacp_portid_priority) && + ether_is_eq(rpi->lacp_sysid.lacp_sysid_mac, + lpi->lacp_sysid.lacp_sysid_mac) && + (rpi->lacp_sysid.lacp_sysid_priority == + lpi->lacp_sysid.lacp_sysid_priority) && + (rpi->lacp_key == lpi->lacp_key) && + (ISSET(rpi->lacp_state, LACP_STATE_AGGREGATION) == + ISSET(lpi->lacp_state, LACP_STATE_AGGREGATION))) + return (0); + + aggr_unselected(p); + return (1); +} + +static void +aggr_record_default(struct aggr_softc *sc, struct aggr_port *p) +{ + struct lacp_port_info *pi = &p->p_partner; + + pi->lacp_sysid.lacp_sysid_priority = htons(0); + memset(pi->lacp_sysid.lacp_sysid_mac, 0, + sizeof(pi->lacp_sysid.lacp_sysid_mac)); + + pi->lacp_key = htons(0); + + pi->lacp_portid.lacp_portid_priority = htons(0); + pi->lacp_portid.lacp_portid_number = htons(0); + + SET(p->p_actor_state, LACP_STATE_DEFAULTED); + + pi->lacp_state = LACP_STATE_AGGREGATION | LACP_STATE_SYNC; + if (sc->sc_lacp_timeout == AGGR_LACP_TIMEOUT_FAST) + SET(pi->lacp_state, LACP_STATE_TIMEOUT); + if (sc->sc_lacp_mode == AGGR_LACP_MODE_ACTIVE) + SET(pi->lacp_state, LACP_STATE_ACTIVITY); + + /* notify Mux */ + aggr_mux(sc, p, LACP_MUX_E_NOT_COLLECTING); + aggr_mux(sc, p, LACP_MUX_E_SYNC); +} + +static void +aggr_update_default_selected(struct aggr_softc *sc, struct aggr_port *p) +{ + const struct lacp_port_info *pi = &p->p_partner; + + if ((pi->lacp_portid.lacp_portid_number == htons(0)) && + (pi->lacp_portid.lacp_portid_priority == htons(0)) && + ether_is_zero(pi->lacp_sysid.lacp_sysid_mac) && + (pi->lacp_sysid.lacp_sysid_priority == htons(0)) && + (pi->lacp_key == htons(0)) && + ISSET(pi->lacp_state, LACP_STATE_AGGREGATION)) + return; + + aggr_unselected(p); + aggr_selection_logic(sc, p); /* restart */ +} + +static int +aggr_update_ntt(struct aggr_port *p, const struct lacp_du *lacpdu) +{ + struct aggr_softc *sc = p->p_aggr; + struct arpcom *ac = &sc->sc_ac; + struct ifnet *ifp = &ac->ac_if; + struct ifnet *ifp0 = p->p_ifp0; + const struct lacp_port_info *pi = &lacpdu->lacp_partner_info; + uint8_t bits = LACP_STATE_ACTIVITY | LACP_STATE_TIMEOUT | + LACP_STATE_SYNC | LACP_STATE_AGGREGATION; + uint8_t state = p->p_actor_state; + int sync = 0; + + if (pi->lacp_portid.lacp_portid_number != htons(ifp0->if_index)) + goto ntt; + if (pi->lacp_portid.lacp_portid_priority != + htons(sc->sc_lacp_port_prio)) + goto ntt; + if (!ether_is_eq(pi->lacp_sysid.lacp_sysid_mac, ac->ac_enaddr)) + goto ntt; + if (pi->lacp_sysid.lacp_sysid_priority != + htons(sc->sc_lacp_prio)) + goto ntt; + if (pi->lacp_key != htons(ifp->if_index)) + goto ntt; + if (ISSET(pi->lacp_state, LACP_STATE_SYNC) != + ISSET(state, LACP_STATE_SYNC)) + goto ntt; + sync = 1; + + if (sc->sc_lacp_timeout == AGGR_LACP_TIMEOUT_FAST) + SET(state, LACP_STATE_TIMEOUT); + if (sc->sc_lacp_mode == AGGR_LACP_MODE_ACTIVE) + SET(state, LACP_STATE_ACTIVITY); + + if (ISSET(pi->lacp_state, bits) != ISSET(state, bits)) + goto ntt; + + return (1); + +ntt: + aggr_ntt(p); + + return (sync); +} + +static void +aggr_recordpdu(struct aggr_port *p, const struct lacp_du *lacpdu, int sync) +{ + struct aggr_softc *sc = p->p_aggr; + const struct lacp_port_info *rpi = &lacpdu->lacp_actor_info; + struct lacp_port_info *lpi = &p->p_partner; + int active = ISSET(rpi->lacp_state, LACP_STATE_ACTIVITY) || + (ISSET(p->p_actor_state, LACP_STATE_ACTIVITY) && + ISSET(lacpdu->lacp_partner_info.lacp_state, LACP_STATE_ACTIVITY)); + + lpi->lacp_portid.lacp_portid_number = + rpi->lacp_portid.lacp_portid_number; + lpi->lacp_portid.lacp_portid_priority = + rpi->lacp_portid.lacp_portid_priority; + memcpy(lpi->lacp_sysid.lacp_sysid_mac, + rpi->lacp_sysid.lacp_sysid_mac, + sizeof(lpi->lacp_sysid.lacp_sysid_mac)); + lpi->lacp_sysid.lacp_sysid_priority = + rpi->lacp_sysid.lacp_sysid_priority; + lpi->lacp_key = rpi->lacp_key; + lpi->lacp_state = rpi->lacp_state & ~LACP_STATE_SYNC; + + CLR(p->p_actor_state, LACP_STATE_DEFAULTED); + + if (active && ISSET(rpi->lacp_state, LACP_STATE_SYNC) && sync) { + SET(p->p_partner_state, LACP_STATE_SYNC); + aggr_mux(sc, p, LACP_MUX_E_SYNC); + } else { + DPRINTF(sc, "%s %s: %s active %d lacp_state.sync %d sync %d\n", + sc->sc_if.if_xname, p->p_ifp0->if_xname, __func__, + active, + !!ISSET(rpi->lacp_state, LACP_STATE_SYNC), + sync); + CLR(p->p_partner_state, LACP_STATE_SYNC); + aggr_mux(sc, p, LACP_MUX_E_NOT_SYNC); + } +} + +static void +aggr_input_marker(struct aggr_port *p, struct mbuf *m) +{ + m_freem(m); +} + +static void +aggr_rx(void *arg) +{ + struct aggr_port *p = arg; + struct mbuf_list ml; + struct mbuf *m; + + mq_delist(&p->p_rxm_mq, &ml); + + while ((m = ml_dequeue(&ml)) != NULL) { + struct ether_slowproto_hdr *sph; + + /* aggr_input has checked eh already */ + m_adj(m, sizeof(struct ether_header)); + + sph = mtod(m, struct ether_slowproto_hdr *); + switch (sph->sph_subtype) { + case SLOWPROTOCOLS_SUBTYPE_LACP: + aggr_input_lacpdu(p, m); + break; + case SLOWPROTOCOLS_SUBTYPE_LACP_MARKER: + aggr_input_marker(p, m); + break; + default: + panic("unexpected slow protocol subtype"); + /* NOTREACHED */ + } + } +} + +static void +aggr_set_selected(struct aggr_port *p, enum aggr_port_selected s, + enum lacp_mux_event ev) +{ + struct aggr_softc *sc = p->p_aggr; + + p->p_selected = s; + aggr_mux(sc, p, ev); +} + +static void +aggr_unselected(struct aggr_port *p) +{ + aggr_set_selected(p, AGGR_PORT_UNSELECTED, LACP_MUX_E_UNSELECTED); +} + +static inline void +aggr_selected(struct aggr_port *p) +{ + aggr_set_selected(p, AGGR_PORT_SELECTED, LACP_MUX_E_SELECTED); +} + +#ifdef notyet +static inline void +aggr_standby(struct aggr_port *p) +{ + aggr_set_selected(p, AGGR_PORT_STANDBY, LACP_MUX_E_STANDBY); +} +#endif + +static void +aggr_selection_logic(struct aggr_softc *sc, struct aggr_port *p) +{ + const struct lacp_port_info *pi; + struct arpcom *ac = &sc->sc_ac; + struct ifnet *ifp = &ac->ac_if; + const uint8_t *mac; + + if (p->p_rxm_state != LACP_RXM_S_CURRENT) + goto unselected; + + pi = &p->p_partner; + if (pi->lacp_key == htons(0)) + goto unselected; + + /* + * aggr(4) does not support individual interfaces + */ + if (!ISSET(pi->lacp_state, LACP_STATE_AGGREGATION)) + goto unselected; + + /* + * Any pair of Aggregation Ports that are members of the same + * LAG, but are connected together by the same link, shall not + * select the same Aggregator + */ + + mac = pi->lacp_sysid.lacp_sysid_mac; + if (ether_is_eq(mac, ac->ac_enaddr) && + pi->lacp_key == htons(ifp->if_index)) + goto unselected; + + if (!TAILQ_EMPTY(&sc->sc_muxen)) { + /* an aggregation has already been selected */ + if (!ether_is_eq(mac, sc->sc_partner_system.lacp_sysid_mac) || + sc->sc_partner_key != pi->lacp_key) + goto unselected; + } + + aggr_selected(p); + return; + +unselected: + aggr_unselected(p); +} + +static void +aggr_mux(struct aggr_softc *sc, struct aggr_port *p, enum lacp_mux_event ev) +{ + int ntt = 0; + + /* + * the mux can move through multiple states based on a + * single event, so loop until the event is completely consumed. + * debounce NTT = TRUE through the multiple state transitions. + */ + + while (aggr_mux_ev(sc, p, ev, &ntt) != 0) + ; + + if (ntt) + aggr_ntt(p); +} + +#ifdef notyet +static int +aggr_ready_n(struct aggr_port *p) +{ + return (p->p_mux_state == LACP_MUX_S_WAITING && + !timeout_pending(&p->p_wait_while_timer)); +} +#endif + +static inline int +aggr_ready(struct aggr_softc *sc) +{ + return (1); +} + +static void +aggr_disable_distributing(struct aggr_softc *sc, struct aggr_port *p) +{ + if (!p->p_distributing) + return; + + sc->sc_ndistributing--; + TAILQ_REMOVE(&sc->sc_distributing, p, p_entry_distributing); + p->p_distributing = 0; + + aggr_map(sc); + + DPRINTF(sc, "%s %s: distributing disabled\n", + sc->sc_if.if_xname, p->p_ifp0->if_xname); +} + +static void +aggr_enable_distributing(struct aggr_softc *sc, struct aggr_port *p) +{ + if (p->p_distributing) + return; + + /* check the LAG ID? */ + + p->p_distributing = 1; + TAILQ_INSERT_TAIL(&sc->sc_distributing, p, p_entry_distributing); + sc->sc_ndistributing++; + + aggr_map(sc); + + DPRINTF(sc, "%s %s: distributing enabled\n", + sc->sc_if.if_xname, p->p_ifp0->if_xname); +} + +static void +aggr_disable_collecting(struct aggr_softc *sc, struct aggr_port *p) +{ + if (!p->p_collecting) + return; + + p->p_collecting = 0; + + DPRINTF(sc, "%s %s: collecting disabled\n", + sc->sc_if.if_xname, p->p_ifp0->if_xname); +} + +static void +aggr_enable_collecting(struct aggr_softc *sc, struct aggr_port *p) +{ + if (p->p_collecting) + return; + + p->p_collecting = 1; + + DPRINTF(sc, "%s %s: collecting enabled\n", + sc->sc_if.if_xname, p->p_ifp0->if_xname); +} + +static void +aggr_attach_mux(struct aggr_softc *sc, struct aggr_port *p) +{ + const struct lacp_port_info *pi = &p->p_partner; + + if (p->p_muxed) + return; + + p->p_muxed = 1; + if (TAILQ_EMPTY(&sc->sc_muxen)) { + KASSERT(sc->sc_partner_key == htons(0)); + sc->sc_partner_system = pi->lacp_sysid; + sc->sc_partner_key = pi->lacp_key; + } + + TAILQ_INSERT_TAIL(&sc->sc_muxen, p, p_entry_muxen); + + DPRINTF(sc, "%s %s: mux attached\n", + sc->sc_if.if_xname, p->p_ifp0->if_xname); +} + +static void +aggr_detach_mux(struct aggr_softc *sc, struct aggr_port *p) +{ + if (!p->p_muxed) + return; + + p->p_muxed = 0; + + TAILQ_REMOVE(&sc->sc_muxen, p, p_entry_muxen); + if (TAILQ_EMPTY(&sc->sc_muxen)) { + memset(&sc->sc_partner_system.lacp_sysid_mac, 0, + sizeof(sc->sc_partner_system.lacp_sysid_mac)); + sc->sc_partner_system.lacp_sysid_priority = htons(0); + sc->sc_partner_key = htons(0); + } + + DPRINTF(sc, "%s %s: mux detached\n", + sc->sc_if.if_xname, p->p_ifp0->if_xname); +} + +static int +aggr_mux_ev(struct aggr_softc *sc, struct aggr_port *p, enum lacp_mux_event ev, + int *ntt) +{ + enum lacp_mux_state nstate = LACP_MUX_S_DETACHED; + + switch (p->p_mux_state) { + case LACP_MUX_S_BEGIN: + KASSERT(ev == LACP_MUX_E_BEGIN); + nstate = LACP_MUX_S_DETACHED; + break; + case LACP_MUX_S_DETACHED: + switch (ev) { + case LACP_MUX_E_SELECTED: + case LACP_MUX_E_STANDBY: + nstate = LACP_MUX_S_WAITING; + break; + default: + return (0); + } + break; + case LACP_MUX_S_WAITING: + switch (ev) { + case LACP_MUX_E_UNSELECTED: + nstate = LACP_MUX_S_DETACHED; + break; + case LACP_MUX_E_SELECTED: + case LACP_MUX_E_READY: + if (aggr_ready(sc) && + p->p_selected == AGGR_PORT_SELECTED) { + nstate = LACP_MUX_S_ATTACHED; + break; + } + /* FALLTHROUGH */ + default: + return (0); + } + break; + case LACP_MUX_S_ATTACHED: + switch (ev) { + case LACP_MUX_E_UNSELECTED: + case LACP_MUX_E_STANDBY: + nstate = LACP_MUX_S_DETACHED; + break; + case LACP_MUX_E_SELECTED: + case LACP_MUX_E_SYNC: + if (p->p_selected == AGGR_PORT_SELECTED && + ISSET(p->p_partner_state, LACP_STATE_SYNC)) { + nstate = LACP_MUX_S_COLLECTING; + break; + } + /* FALLTHROUGH */ + default: + return (0); + } + break; + case LACP_MUX_S_COLLECTING: + switch (ev) { + case LACP_MUX_E_UNSELECTED: + case LACP_MUX_E_STANDBY: + case LACP_MUX_E_NOT_SYNC: + nstate = LACP_MUX_S_ATTACHED; + break; + case LACP_MUX_E_SELECTED: + case LACP_MUX_E_SYNC: + case LACP_MUX_E_COLLECTING: + if (p->p_selected == AGGR_PORT_SELECTED && + ISSET(p->p_partner_state, LACP_STATE_SYNC) && + ISSET(p->p_partner_state, LACP_STATE_COLLECTING)) { + nstate = LACP_MUX_S_DISTRIBUTING; + break; + } + /* FALLTHROUGH */ + default: + return (0); + } + break; + case LACP_MUX_S_DISTRIBUTING: + switch (ev) { + case LACP_MUX_E_UNSELECTED: + case LACP_MUX_E_STANDBY: + case LACP_MUX_E_NOT_SYNC: + case LACP_MUX_E_NOT_COLLECTING: + nstate = LACP_MUX_S_COLLECTING; + break; + default: + return (0); + } + break; + } + + DPRINTF(sc, "%s %s mux: %s (%s) -> %s\n", + sc->sc_if.if_xname, p->p_ifp0->if_xname, + lacp_mux_state_names[p->p_mux_state], lacp_mux_event_names[ev], + lacp_mux_state_names[nstate]); + + /* act on the new state */ + switch (nstate) { + case LACP_MUX_S_BEGIN: + panic("unexpected mux nstate BEGIN"); + /* NOTREACHED */ + case LACP_MUX_S_DETACHED: + /* + * Detach_Mux_From_Aggregator(); + * Actor.Sync = FALSE; + * Disable_Distributing(); + * Actor.Distributing = FALSE; + * Actor.Collecting = FALSE; + * Disable_Collecting(); + * NTT = TRUE; + */ + aggr_detach_mux(sc, p); + CLR(p->p_actor_state, LACP_STATE_SYNC); + aggr_disable_distributing(sc, p); + CLR(p->p_actor_state, LACP_STATE_DISTRIBUTING); + CLR(p->p_actor_state, LACP_STATE_COLLECTING); + aggr_disable_collecting(sc, p); + *ntt = 1; + break; + case LACP_MUX_S_WAITING: + /* + * Start wait_while_timer + */ + timeout_add_sec(&p->p_wait_while_timer, + LACP_AGGREGATION_WAIT_TIME); + break; + case LACP_MUX_S_ATTACHED: + /* + * Attach_Mux_To_Aggregator(); + * Actor.Sync = TRUE; + * Actor.Collecting = FALSE; + * Disable_Collecting(); + * NTT = TRUE; + */ + aggr_attach_mux(sc, p); + SET(p->p_actor_state, LACP_STATE_SYNC); + CLR(p->p_actor_state, LACP_STATE_COLLECTING); + aggr_disable_collecting(sc, p); + *ntt = 1; + break; + + case LACP_MUX_S_COLLECTING: + /* + * Enable_Collecting(); + * Actor.Collecting = TRUE; + * Disable_Distributing(); + * Actor.Distributing = FALSE; + * NTT = TRUE; + */ + aggr_enable_collecting(sc, p); + SET(p->p_actor_state, LACP_STATE_COLLECTING); + aggr_disable_distributing(sc, p); + CLR(p->p_actor_state, LACP_STATE_DISTRIBUTING); + *ntt = 1; + break; + case LACP_MUX_S_DISTRIBUTING: + /* + * Actor.Distributing = TRUE; + * Enable_Distributing(); + */ + SET(p->p_actor_state, LACP_STATE_DISTRIBUTING); + aggr_enable_distributing(sc, p); + break; + } + + p->p_mux_state = nstate; + + return (1); +} + +static void +aggr_rxm_ev(struct aggr_softc *sc, struct aggr_port *p, + enum lacp_rxm_event ev, const struct lacp_du *lacpdu) +{ + unsigned int port_disabled = 0; + enum lacp_rxm_state nstate = LACP_RXM_S_BEGIN; + + KASSERT((ev == LACP_RXM_E_LACPDU) == (lacpdu != NULL)); + + /* global transitions */ + + switch (ev) { + case LACP_RXM_E_NOT_PORT_ENABLED: + port_disabled = !aggr_port_moved(sc, p); + break; + case LACP_RXM_E_NOT_PORT_MOVED: + port_disabled = !aggr_port_enabled(p); + break; + default: + break; + } + + if (port_disabled) + nstate = LACP_RXM_S_PORT_DISABLED; + else switch (p->p_rxm_state) { /* local state transitions */ + case LACP_RXM_S_BEGIN: + KASSERT(ev == LACP_RXM_E_BEGIN); + nstate = LACP_RXM_S_INITIALIZE; + break; + case LACP_RXM_S_INITIALIZE: + /* this should only be handled via UCT in nstate handling */ + panic("unexpected rxm state INITIALIZE"); + + case LACP_RXM_S_PORT_DISABLED: + switch (ev) { + case LACP_RXM_E_PORT_MOVED: + nstate = LACP_RXM_S_INITIALIZE; + break; + case LACP_RXM_E_PORT_ENABLED: + nstate = aggr_lacp_enabled(sc) ? + LACP_RXM_S_EXPIRED : LACP_RXM_S_LACP_DISABLED; + break; + case LACP_RXM_E_LACP_ENABLED: + if (!aggr_port_enabled(p)) + return; + nstate = LACP_RXM_S_EXPIRED; + break; + case LACP_RXM_E_NOT_LACP_ENABLED: + if (!aggr_port_enabled(p)) + return; + nstate = LACP_RXM_S_LACP_DISABLED; + break; + default: + return; + } + break; + case LACP_RXM_S_EXPIRED: + switch (ev) { + case LACP_RXM_E_LACPDU: + nstate = LACP_RXM_S_CURRENT; + break; + case LACP_RXM_E_TIMER_EXPIRED: + nstate = LACP_RXM_S_DEFAULTED; + break; + default: + return; + } + break; + case LACP_RXM_S_LACP_DISABLED: + switch (ev) { + case LACP_RXM_E_LACP_ENABLED: + nstate = LACP_RXM_S_PORT_DISABLED; + break; + default: + return; + } + break; + case LACP_RXM_S_DEFAULTED: + switch (ev) { + case LACP_RXM_E_LACPDU: + nstate = LACP_RXM_S_CURRENT; + break; + default: + return; + } + break; + case LACP_RXM_S_CURRENT: + switch (ev) { + case LACP_RXM_E_TIMER_EXPIRED: + nstate = LACP_RXM_S_EXPIRED; + break; + case LACP_RXM_E_LACPDU: + nstate = LACP_RXM_S_CURRENT; + break; + default: + return; + } + break; + } + + if (p->p_rxm_state != nstate) { + DPRINTF(sc, "%s %s rxm: %s (%s) -> %s\n", + sc->sc_if.if_xname, p->p_ifp0->if_xname, + lacp_rxm_state_names[p->p_rxm_state], + lacp_rxm_event_names[ev], + lacp_rxm_state_names[nstate]); + } + + /* record the new state */ + p->p_rxm_state = nstate; + + /* act on the new state */ + switch (nstate) { + case LACP_RXM_S_BEGIN: + panic("unexpected rxm nstate BEGIN"); + /* NOTREACHED */ + case LACP_RXM_S_INITIALIZE: + /* + * Selected = UNSELECTED; + * recordDefault(); + * Actor_Oper_Port_State.Expired = FALSE; + * port_moved = FALSE; + */ + aggr_unselected(p); + aggr_record_default(sc, p); + CLR(p->p_actor_state, LACP_STATE_EXPIRED); + + p->p_rxm_state = LACP_RXM_S_PORT_DISABLED; /* UCT */ + /* FALLTHROUGH */ + case LACP_RXM_S_PORT_DISABLED: + /* + * Partner_Oper_Port_State.Synchronization = FALSE; + */ + CLR(p->p_partner_state, LACP_STATE_SYNC); + aggr_mux(sc, p, LACP_MUX_E_NOT_SYNC); + break; + case LACP_RXM_S_EXPIRED: + /* + * Partner_Oper_Port_State.Synchronization = FALSE; + * Partner_Oper_Port_State.LACP_Timeout = Short Timeout; + * start current_while_timer(Short Timeout); + * Actor_Oper_Port_State.Expired = TRUE; + */ + + CLR(p->p_partner_state, LACP_STATE_SYNC); + aggr_mux(sc, p, LACP_MUX_E_NOT_SYNC); + aggr_set_partner_timeout(p, AGGR_LACP_TIMEOUT_FAST); + aggr_start_current_while_timer(p, AGGR_LACP_TIMEOUT_FAST); + SET(p->p_actor_state, LACP_STATE_EXPIRED); + + break; + case LACP_RXM_S_LACP_DISABLED: + /* + * Selected = UNSELECTED; + * recordDefault(); + * Partner_Oper_Port_State.Aggregation = FALSE; + * Actor_Oper_Port_State.Expired = FALSE; + */ + aggr_unselected(p); + aggr_record_default(sc, p); + CLR(p->p_partner_state, LACP_STATE_AGGREGATION); + CLR(p->p_actor_state, LACP_STATE_EXPIRED); + break; + case LACP_RXM_S_DEFAULTED: + /* + * update_Default_Selected(); + * recordDefault(); + * Actor_Oper_Port_State.Expired = FALSE; + */ + aggr_update_default_selected(sc, p); + aggr_record_default(sc, p); + CLR(p->p_actor_state, LACP_STATE_EXPIRED); + break; + case LACP_RXM_S_CURRENT: { + /* + * update_Selected(); + * update_NTT(); + * if (Actor_System_LACP_Version >=2 ) recordVersionNumber(); + * recordPDU(); + * start current_while_timer( + * Actor_Oper_Port_State.LACP_Timeout); + * Actor_Oper_Port_State.Expired = FALSE; + */ + int sync, unselected; + + unselected = aggr_update_selected(sc, p, lacpdu); + sync = aggr_update_ntt(p, lacpdu); + /* don't support v2 yet */ + aggr_recordpdu(p, lacpdu, sync); + aggr_start_current_while_timer(p, sc->sc_lacp_timeout); + CLR(p->p_actor_state, LACP_STATE_EXPIRED); + + if (unselected) + aggr_selection_logic(sc, p); /* restart */ + + } + break; + } +} + +static int +aggr_up(struct aggr_softc *sc) +{ + struct ifnet *ifp = &sc->sc_if; + struct aggr_port *p; + + NET_ASSERT_LOCKED(); + KASSERT(!ISSET(ifp->if_flags, IFF_RUNNING)); + + SET(ifp->if_flags, IFF_RUNNING); /* LACP_Enabled = TRUE */ + + TAILQ_FOREACH(p, &sc->sc_ports, p_entry) { + aggr_rxm(sc, p, LACP_RXM_E_LACP_ENABLED); + + aggr_selection_logic(sc, p); + } + + /* start the Periodic Transmission machine */ + if (sc->sc_lacp_mode == AGGR_LACP_MODE_ACTIVE) { + TAILQ_FOREACH(p, &sc->sc_ports, p_entry) { + if (!aggr_port_enabled(p)) + continue; + + timeout_add_sec(&p->p_ptm_tx, + aggr_periodic_times[sc->sc_lacp_timeout]); + } + } + + return (ENETRESET); +} + +static int +aggr_iff(struct aggr_softc *sc) +{ + struct ifnet *ifp = &sc->sc_if; + NET_ASSERT_LOCKED(); + unsigned int promisc = ISSET(ifp->if_flags, IFF_PROMISC); + + if (promisc != sc->sc_promisc) { + struct aggr_port *p; + + rw_enter_read(&sc->sc_lock); + TAILQ_FOREACH(p, &sc->sc_ports, p_entry) { + struct ifnet *ifp0 = p->p_ifp0; + if (ifpromisc(ifp0, promisc) != 0) { + log(LOG_WARNING, "%s iff %s: " + "unable to turn promisc %s", + ifp->if_xname, ifp0->if_xname, + promisc ? "on" : "off"); + } + } + + sc->sc_promisc = promisc; + } + + return (0); +} + +static int +aggr_down(struct aggr_softc *sc) +{ + struct ifnet *ifp = &sc->sc_if; + struct aggr_port *p; + + NET_ASSERT_LOCKED(); + CLR(ifp->if_flags, IFF_RUNNING); /* LACP_Enabled = FALSE */ + + TAILQ_FOREACH(p, &sc->sc_ports, p_entry) { + aggr_rxm(sc, p, LACP_RXM_E_NOT_LACP_ENABLED); + + /* stop the Periodic Transmission machine */ + timeout_del(&p->p_ptm_tx); + + /* stop the Mux machine */ + aggr_mux(sc, p, LACP_MUX_E_UNSELECTED); + + /* stop the Transmit machine */ + timeout_del(&p->p_txm_ntt); + } + + KASSERT(TAILQ_EMPTY(&sc->sc_distributing)); + KASSERT(sc->sc_ndistributing == 0); + KASSERT(SMR_PTR_GET_LOCKED(&sc->sc_map) == NULL); + + return (ENETRESET); +} + +static int +aggr_set_lladdr(struct aggr_softc *sc, const struct ifreq *ifr) +{ + struct ifnet *ifp = &sc->sc_if; + struct aggr_port *p; + const uint8_t *lladdr = ifr->ifr_addr.sa_data; + + rw_enter_read(&sc->sc_lock); + TAILQ_FOREACH(p, &sc->sc_ports, p_entry) { + if (aggr_p_setlladdr(p, lladdr) != 0) { + struct ifnet *ifp0 = p->p_ifp0; + log(LOG_WARNING, "%s setlladdr %s: " + "unable to set lladdr", + ifp->if_xname, ifp0->if_xname); + } + } + rw_exit_read(&sc->sc_lock); + + return (0); +} + +static int +aggr_group(struct aggr_softc *sc, struct aggr_port *p, u_long cmd) +{ + struct ifnet *ifp0 = p->p_ifp0; + struct ifreq ifr; + struct sockaddr *sa; + + memset(&ifr, 0, sizeof(ifr)); + + /* make it convincing */ + CTASSERT(sizeof(ifr.ifr_name) == sizeof(ifp0->if_xname)); + memcpy(ifr.ifr_name, ifp0->if_xname, sizeof(ifr.ifr_name)); + + sa = &ifr.ifr_addr; + CTASSERT(sizeof(sa->sa_data) >= sizeof(lacp_address_slow)); + + sa->sa_family = AF_UNSPEC; + memcpy(sa->sa_data, lacp_address_slow, sizeof(lacp_address_slow)); + + return ((*p->p_ioctl)(ifp0, cmd, (caddr_t)&ifr)); +} + +static int +aggr_multi(struct aggr_softc *sc, struct aggr_port *p, + const struct aggr_multiaddr *ma, u_long cmd) +{ + struct ifnet *ifp0 = p->p_ifp0; + struct ifreq ifr; + + memset(&ifr, 0, sizeof(ifr)); + + /* make it convincing */ + CTASSERT(sizeof(ifr.ifr_name) == sizeof(ifp0->if_xname)); + memcpy(ifr.ifr_name, ifp0->if_xname, sizeof(ifr.ifr_name)); + + ifr.ifr_addr = ma->m_addr; + + return ((*p->p_ioctl)(ifp0, cmd, (caddr_t)&ifr)); +} + +static void +aggr_media_status(struct ifnet *ifp, struct ifmediareq *imr) +{ + struct aggr_softc *sc = ifp->if_softc; + + imr->ifm_status = IFM_AVALID; + imr->ifm_active = IFM_ETHER | IFM_AUTO; + + smr_read_enter(); /* there's no reason to block... */ + if (SMR_PTR_GET(&sc->sc_map) != NULL) + imr->ifm_status |= IFM_ACTIVE; + smr_read_leave(); +} + +static int +aggr_media_change(struct ifnet *ifp) +{ + return (EOPNOTSUPP); +} + +static uint32_t +aggr_hardmtu(struct aggr_softc *sc) +{ + struct aggr_port *p; + uint32_t hardmtu = ETHER_MAX_HARDMTU_LEN; + + rw_enter_read(&sc->sc_lock); + TAILQ_FOREACH(p, &sc->sc_ports, p_entry) { + struct ifnet *ifp0 = p->p_ifp0; + + if (ifp0->if_hardmtu < hardmtu) + hardmtu = ifp0->if_hardmtu; + } + rw_exit_read(&sc->sc_lock); + + return (hardmtu); +} + +static void +aggr_ptm_tx(void *arg) +{ + struct aggr_port *p = arg; + unsigned int timeout; + + aggr_ntt(p); + + timeout = ISSET(p->p_partner_state, LACP_STATE_TIMEOUT) ? + AGGR_LACP_TIMEOUT_FAST : AGGR_LACP_TIMEOUT_SLOW; + timeout_add_sec(&p->p_ptm_tx, aggr_periodic_times[timeout]); +} + +static inline void +aggr_lacp_tlv_set(struct lacp_tlv_hdr *tlv, uint8_t type, uint8_t len) +{ + tlv->lacp_tlv_type = type; + tlv->lacp_tlv_length = sizeof(*tlv) + len; +} + +static void +aggr_ntt_transmit(struct aggr_port *p) +{ + struct aggr_softc *sc = p->p_aggr; + struct arpcom *ac = &sc->sc_ac; + struct ifnet *ifp = &sc->sc_if; + struct ifnet *ifp0 = p->p_ifp0; + struct mbuf *m; + struct lacp_du *lacpdu; + struct lacp_port_info *pi; + struct lacp_collector_info *ci; + struct ether_header *eh; + int linkhdr = max_linkhdr + ETHER_ALIGN; + int len = linkhdr + sizeof(*eh) + sizeof(*lacpdu); + + m = m_gethdr(M_DONTWAIT, MT_DATA); + if (m == NULL) + return; + + if (len > MHLEN) { + MCLGETI(m, M_DONTWAIT, NULL, len); + if (!ISSET(m->m_flags, M_EXT)) { + m_freem(m); + return; + } + } + + m->m_pkthdr.pf.prio = sc->sc_if.if_llprio; + m->m_pkthdr.len = m->m_len = len; + memset(m->m_data, 0, m->m_len); + m_adj(m, linkhdr); + + eh = mtod(m, struct ether_header *); + + CTASSERT(sizeof(eh->ether_dhost) == sizeof(lacp_address_slow)); + CTASSERT(sizeof(eh->ether_shost) == sizeof(ac->ac_enaddr)); + + memcpy(eh->ether_dhost, lacp_address_slow, sizeof(eh->ether_dhost)); + memcpy(eh->ether_shost, ac->ac_enaddr, sizeof(eh->ether_shost)); + eh->ether_type = htons(ETHERTYPE_SLOW); + + lacpdu = (struct lacp_du *)(eh + 1); + lacpdu->lacp_du_sph.sph_subtype = SLOWPROTOCOLS_SUBTYPE_LACP; + lacpdu->lacp_du_sph.sph_version = LACP_VERSION; + + pi = &lacpdu->lacp_actor_info; + aggr_lacp_tlv_set(&lacpdu->lacp_actor_info_tlv, + LACP_T_ACTOR, sizeof(*pi)); + + pi->lacp_sysid.lacp_sysid_priority = htons(sc->sc_lacp_prio); + CTASSERT(sizeof(pi->lacp_sysid.lacp_sysid_mac) == + sizeof(ac->ac_enaddr)); + memcpy(pi->lacp_sysid.lacp_sysid_mac, ac->ac_enaddr, + sizeof(pi->lacp_sysid.lacp_sysid_mac)); + + pi->lacp_key = htons(ifp->if_index); + + pi->lacp_portid.lacp_portid_priority = htons(sc->sc_lacp_port_prio); + pi->lacp_portid.lacp_portid_number = htons(ifp0->if_index); + + pi->lacp_state = p->p_actor_state; + if (sc->sc_lacp_mode) + SET(pi->lacp_state, LACP_STATE_ACTIVITY); + if (sc->sc_lacp_timeout) + SET(pi->lacp_state, LACP_STATE_TIMEOUT); + + pi = &lacpdu->lacp_partner_info; + aggr_lacp_tlv_set(&lacpdu->lacp_partner_info_tlv, + LACP_T_PARTNER, sizeof(*pi)); + + *pi = p->p_partner; + + ci = &lacpdu->lacp_collector_info; + aggr_lacp_tlv_set(&lacpdu->lacp_collector_info_tlv, + LACP_T_COLLECTOR, sizeof(*ci)); + ci->lacp_maxdelay = htons(0); + + lacpdu->lacp_terminator.lacp_tlv_type = LACP_T_TERMINATOR; + lacpdu->lacp_terminator.lacp_tlv_length = 0; + + (void)if_enqueue(ifp0, m); +} + +static void +aggr_ntt(struct aggr_port *p) +{ + if (!timeout_pending(&p->p_txm_ntt)) + timeout_add(&p->p_txm_ntt, 0); +} + +static void +aggr_transmit_machine(void *arg) +{ + struct aggr_port *p = arg; + struct aggr_softc *sc = p->p_aggr; + unsigned int slot; + int *log; + int period = hz * LACP_FAST_PERIODIC_TIME; + int diff; + + if (!aggr_lacp_enabled(sc) || !aggr_port_enabled(p)) + return; + + slot = p->p_txm_slot; + log = &p->p_txm_log[slot % nitems(p->p_txm_log)]; + + diff = ticks - *log; + if (diff < period) { + timeout_add(&p->p_txm_ntt, period - diff); + return; + } + + *log = ticks; + p->p_txm_slot = ++slot; + +#if 0 + DPRINTF(sc, "%s %s ntt\n", sc->sc_if.if_xname, p->p_ifp0->if_xname); +#endif + + aggr_ntt_transmit(p); +} + +static void +aggr_set_lacp_mode(struct aggr_softc *sc, int mode) +{ + sc->sc_lacp_mode = mode; + + if (mode == AGGR_LACP_MODE_PASSIVE) { + struct aggr_port *p; + + TAILQ_FOREACH(p, &sc->sc_ports, p_entry) { + if (!ISSET(p->p_partner_state, LACP_STATE_ACTIVITY)) + timeout_del(&p->p_ptm_tx); + } + } +} + +static void +aggr_set_partner_timeout(struct aggr_port *p, int timeout) +{ + uint8_t ostate = ISSET(p->p_partner_state, LACP_STATE_TIMEOUT); + uint8_t nstate = (timeout == AGGR_LACP_TIMEOUT_FAST) ? + LACP_STATE_TIMEOUT : 0; + + if (ostate == nstate) + return; + + if (timeout == AGGR_LACP_TIMEOUT_FAST) { + SET(p->p_partner_state, LACP_STATE_TIMEOUT); + timeout_add_sec(&p->p_ptm_tx, + aggr_periodic_times[AGGR_LACP_TIMEOUT_FAST]); + } else + CLR(p->p_partner_state, LACP_STATE_TIMEOUT); +} + +static void +aggr_set_lacp_timeout(struct aggr_softc *sc, int timeout) +{ + struct aggr_port *p; + + sc->sc_lacp_timeout = timeout; + + TAILQ_FOREACH(p, &sc->sc_ports, p_entry) { + if (!ISSET(p->p_actor_state, LACP_STATE_DEFAULTED)) + continue; + + aggr_set_partner_timeout(p, timeout); + } +} + +static int +aggr_multi_eq(const struct aggr_multiaddr *ma, + const uint8_t *addrlo, const uint8_t *addrhi) +{ + return (ether_is_eq(ma->m_addrlo, addrlo) && + ether_is_eq(ma->m_addrhi, addrhi)); +} + +static int +aggr_multi_add(struct aggr_softc *sc, struct ifreq *ifr) +{ + struct ifnet *ifp = &sc->sc_if; + struct aggr_port *p; + struct aggr_multiaddr *ma; + uint8_t addrlo[ETHER_ADDR_LEN]; + uint8_t addrhi[ETHER_ADDR_LEN]; + int error; + + error = ether_multiaddr(&ifr->ifr_addr, addrlo, addrhi); + if (error != 0) + return (error); + + TAILQ_FOREACH(ma, &sc->sc_multiaddrs, m_entry) { + if (aggr_multi_eq(ma, addrlo, addrhi)) { + ma->m_refs++; + return (0); + } + } + + ma = malloc(sizeof(*ma), M_DEVBUF, M_WAITOK|M_CANFAIL); + if (ma == NULL) + return (ENOMEM); + + ma->m_refs = 1; + ma->m_addr = ifr->ifr_addr; + memcpy(ma->m_addrlo, addrlo, sizeof(ma->m_addrlo)); + memcpy(ma->m_addrhi, addrhi, sizeof(ma->m_addrhi)); + TAILQ_INSERT_TAIL(&sc->sc_multiaddrs, ma, m_entry); + + TAILQ_FOREACH(p, &sc->sc_ports, p_entry) { + struct ifnet *ifp0 = p->p_ifp0; + + if (aggr_multi(sc, p, ma, SIOCADDMULTI) != 0) { + log(LOG_WARNING, "%s %s: " + "unable to add multicast address", + ifp->if_xname, ifp0->if_xname); + } + } + + return (0); +} + +int +aggr_multi_del(struct aggr_softc *sc, struct ifreq *ifr) +{ + struct ifnet *ifp = &sc->sc_if; + struct aggr_port *p; + struct aggr_multiaddr *ma; + uint8_t addrlo[ETHER_ADDR_LEN]; + uint8_t addrhi[ETHER_ADDR_LEN]; + int error; + + error = ether_multiaddr(&ifr->ifr_addr, addrlo, addrhi); + if (error != 0) + return (error); + + TAILQ_FOREACH(ma, &sc->sc_multiaddrs, m_entry) { + if (aggr_multi_eq(ma, addrlo, addrhi)) + break; + } + + if (ma == NULL) + return (EINVAL); + + if (--ma->m_refs > 0) + return (0); + + TAILQ_REMOVE(&sc->sc_multiaddrs, ma, m_entry); + + TAILQ_FOREACH(p, &sc->sc_ports, p_entry) { + struct ifnet *ifp0 = p->p_ifp0; + + if (aggr_multi(sc, p, ma, SIOCDELMULTI) != 0) { + log(LOG_WARNING, "%s %s: " + "unable to delete multicast address", + ifp->if_xname, ifp0->if_xname); + } + } + + free(ma, M_DEVBUF, sizeof(*ma)); + + return (0); +} Index: net/if_trunk.c =================================================================== RCS file: /cvs/src/sys/net/if_trunk.c,v retrieving revision 1.140 diff -u -p -r1.140 if_trunk.c --- net/if_trunk.c 11 May 2019 18:10:45 -0000 1.140 +++ net/if_trunk.c 27 Jun 2019 03:14:16 -0000 @@ -285,6 +285,7 @@ trunk_port_create(struct trunk_softc *tr { struct trunk_softc *tr_ptr; struct trunk_port *tp; + struct arpcom *ac0; int error = 0; /* Limit the maximal number of trunk ports */ @@ -299,6 +300,10 @@ trunk_port_create(struct trunk_softc *tr if (ifp->if_type != IFT_ETHER) return (EPROTONOSUPPORT); + ac0 = (struct arpcom *)ifp; + if (ac0->ac_trunkport != NULL) + return (EBUSY); + /* Take MTU from the first member port */ if (SLIST_EMPTY(&tr->tr_ports)) { if (tr->tr_ifflags & IFF_DEBUG) @@ -377,6 +382,7 @@ trunk_port_create(struct trunk_softc *tr if (tr->tr_port_create != NULL) error = (*tr->tr_port_create)(tp); + ac0->ac_trunkport = tp; /* Change input handler of the physical interface. */ if_ih_insert(ifp, trunk_input, tp); @@ -406,9 +412,11 @@ trunk_port_destroy(struct trunk_port *tp struct trunk_softc *tr = (struct trunk_softc *)tp->tp_trunk; struct trunk_port *tp_ptr; struct ifnet *ifp = tp->tp_if; + struct arpcom *ac0 = (struct arpcom *)ifp; /* Restore previous input handler. */ if_ih_remove(ifp, trunk_input, tp); + ac0->ac_trunkport = NULL; /* Remove multicast addresses from this port */ trunk_ether_cmdmulti(tp, SIOCDELMULTI); Index: netinet/if_ether.h =================================================================== RCS file: /cvs/src/sys/netinet/if_ether.h,v retrieving revision 1.74 diff -u -p -r1.74 if_ether.h --- netinet/if_ether.h 11 Dec 2018 01:27:08 -0000 1.74 +++ netinet/if_ether.h 27 Jun 2019 03:14:16 -0000 @@ -205,6 +205,7 @@ struct arpcom { int ac_multicnt; /* length of ac_multiaddrs */ int ac_multirangecnt; /* number of mcast ranges */ + void *ac_trunkport; }; extern int arpt_keep; /* arp resolved cache expire */ Index: conf/files =================================================================== RCS file: /cvs/src/sys/conf/files,v retrieving revision 1.671 diff -u -p -r1.671 files --- conf/files 4 May 2019 11:34:47 -0000 1.671 +++ conf/files 27 Jun 2019 03:14:16 -0000 @@ -562,6 +562,7 @@ pseudo-device gre: ifnet pseudo-device mobileip: ifnet pseudo-device crypto: ifnet pseudo-device trunk: ifnet, ether, ifmedia +pseudo-device aggr: ifnet, ether, ifmedia pseudo-device mpe: ifnet, mpls pseudo-device mpw: ifnet, mpls, ether pseudo-device mpip: ifnet, mpls @@ -816,6 +817,7 @@ file net/if_gre.c gre needs-count file net/if_mobileip.c mobileip needs-count file net/if_trunk.c trunk needs-count file net/trunklacp.c trunk +file net/if_aggr.c aggr file net/if_mpe.c mpe needs-count file net/if_mpw.c mpw needs-count file net/if_mpip.c mpip Index: conf/GENERIC =================================================================== RCS file: /cvs/src/sys/conf/GENERIC,v retrieving revision 1.261 diff -u -p -r1.261 GENERIC --- conf/GENERIC 22 Apr 2019 02:04:12 -0000 1.261 +++ conf/GENERIC 27 Jun 2019 03:14:16 -0000 @@ -101,6 +101,7 @@ pseudo-device pppoe # PPP over Ethernet pseudo-device pppx # PPP multiplexer pseudo-device sppp 1 # Sync PPP/HDLC pseudo-device trunk # Trunking support +pseudo-device aggr # Trunking support pseudo-device tun # network tunneling over tty (tun & tap) pseudo-device vether # Virtual ethernet pseudo-device vxlan # Virtual extensible LAN