Index: bpf.c =================================================================== RCS file: /cvs/src/sys/net/bpf.c,v retrieving revision 1.219 diff -u -p -r1.219 bpf.c --- bpf.c 9 Jul 2022 12:48:21 -0000 1.219 +++ bpf.c 5 Nov 2022 10:37:51 -0000 @@ -144,8 +144,8 @@ bpf_movein(struct uio *uio, struct bpf_d struct mbuf *m; struct m_tag *mtag; int error; - u_int hlen, alen, mlen; - u_int len; + size_t hlen, alen, mlen; + size_t len; u_int linktype; u_int slen; Index: if_pfsync.c =================================================================== RCS file: /cvs/src/sys/net/if_pfsync.c,v retrieving revision 1.305 diff -u -p -r1.305 if_pfsync.c --- if_pfsync.c 21 Apr 2022 15:22:49 -0000 1.305 +++ if_pfsync.c 5 Nov 2022 10:37:51 -0000 @@ -42,6 +42,9 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +#include "bpfilter.h" +#include "pfsync.h" + #include #include #include @@ -54,6 +57,7 @@ #include #include #include +#include #include #include @@ -85,296 +89,137 @@ #include #endif -#define PF_DEBUGNAME "pfsync: " #include #include #include -#include "bpfilter.h" -#include "pfsync.h" - #define PFSYNC_DEFER_NSEC 20000000ULL #define PFSYNC_MINPKT ( \ sizeof(struct ip) + \ sizeof(struct pfsync_header)) -int pfsync_upd_tcp(struct pf_state *, struct pfsync_state_peer *, - struct pfsync_state_peer *); - -int pfsync_in_clr(caddr_t, int, int, int); -int pfsync_in_iack(caddr_t, int, int, int); -int pfsync_in_upd_c(caddr_t, int, int, int); -int pfsync_in_ureq(caddr_t, int, int, int); -int pfsync_in_del(caddr_t, int, int, int); -int pfsync_in_del_c(caddr_t, int, int, int); -int pfsync_in_bus(caddr_t, int, int, int); -int pfsync_in_tdb(caddr_t, int, int, int); -int pfsync_in_ins(caddr_t, int, int, int); -int pfsync_in_upd(caddr_t, int, int, int); -int pfsync_in_eof(caddr_t, int, int, int); - -int pfsync_in_error(caddr_t, int, int, int); - -void pfsync_update_state_locked(struct pf_state *); - -struct { - int (*in)(caddr_t, int, int, int); - size_t len; -} pfsync_acts[] = { - /* PFSYNC_ACT_CLR */ - { pfsync_in_clr, sizeof(struct pfsync_clr) }, - /* PFSYNC_ACT_OINS */ - { pfsync_in_error, 0 }, - /* PFSYNC_ACT_INS_ACK */ - { pfsync_in_iack, sizeof(struct pfsync_ins_ack) }, - /* PFSYNC_ACT_OUPD */ - { pfsync_in_error, 0 }, - /* PFSYNC_ACT_UPD_C */ - { pfsync_in_upd_c, sizeof(struct pfsync_upd_c) }, - /* PFSYNC_ACT_UPD_REQ */ - { pfsync_in_ureq, sizeof(struct pfsync_upd_req) }, - /* PFSYNC_ACT_DEL */ - { pfsync_in_del, sizeof(struct pfsync_state) }, - /* PFSYNC_ACT_DEL_C */ - { pfsync_in_del_c, sizeof(struct pfsync_del_c) }, - /* PFSYNC_ACT_INS_F */ - { pfsync_in_error, 0 }, - /* PFSYNC_ACT_DEL_F */ - { pfsync_in_error, 0 }, - /* PFSYNC_ACT_BUS */ - { pfsync_in_bus, sizeof(struct pfsync_bus) }, - /* PFSYNC_ACT_OTDB */ - { pfsync_in_error, 0 }, - /* PFSYNC_ACT_EOF */ - { pfsync_in_error, 0 }, - /* PFSYNC_ACT_INS */ - { pfsync_in_ins, sizeof(struct pfsync_state) }, - /* PFSYNC_ACT_UPD */ - { pfsync_in_upd, sizeof(struct pfsync_state) }, - /* PFSYNC_ACT_TDB */ - { pfsync_in_tdb, sizeof(struct pfsync_tdb) }, -}; - -struct pfsync_q { - void (*write)(struct pf_state *, void *); - size_t len; - u_int8_t action; -}; - -/* we have one of these for every PFSYNC_S_ */ -void pfsync_out_state(struct pf_state *, void *); -void pfsync_out_iack(struct pf_state *, void *); -void pfsync_out_upd_c(struct pf_state *, void *); -void pfsync_out_del(struct pf_state *, void *); - -struct pfsync_q pfsync_qs[] = { - { pfsync_out_iack, sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK }, - { pfsync_out_upd_c, sizeof(struct pfsync_upd_c), PFSYNC_ACT_UPD_C }, - { pfsync_out_del, sizeof(struct pfsync_del_c), PFSYNC_ACT_DEL_C }, - { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_INS }, - { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_UPD } -}; - -void pfsync_q_ins(struct pf_state *, int); -void pfsync_q_del(struct pf_state *); - -struct pfsync_upd_req_item { - TAILQ_ENTRY(pfsync_upd_req_item) ur_entry; - TAILQ_ENTRY(pfsync_upd_req_item) ur_snap; - struct pfsync_upd_req ur_msg; -}; -TAILQ_HEAD(pfsync_upd_reqs, pfsync_upd_req_item); +struct pfsync_softc; -struct pfsync_deferral { - TAILQ_ENTRY(pfsync_deferral) pd_entry; - struct pf_state *pd_st; - struct mbuf *pd_m; - uint64_t pd_deadline; +struct pfsync_txstage { + struct pfsync_softc *tx_softc; + struct mutex tx_mtx; + struct task tx_task; }; -TAILQ_HEAD(pfsync_deferrals, pfsync_deferral); - -#define PFSYNC_PLSIZE MAX(sizeof(struct pfsync_upd_req_item), \ - sizeof(struct pfsync_deferral)) - -void pfsync_out_tdb(struct tdb *, void *); struct pfsync_softc { struct ifnet sc_if; - unsigned int sc_sync_ifidx; - - struct pool sc_pool; - - struct ip_moptions sc_imo; - - struct in_addr sc_sync_peer; - u_int8_t sc_maxupdates; - - struct ip sc_template; - - struct pf_state_queue sc_qs[PFSYNC_S_COUNT]; - struct mutex sc_st_mtx; - size_t sc_len; - - struct pfsync_upd_reqs sc_upd_req_list; - struct mutex sc_upd_req_mtx; - - int sc_initial_bulk; - int sc_link_demoted; - - int sc_defer; - struct pfsync_deferrals sc_deferrals; - u_int sc_deferred; - struct mutex sc_deferrals_mtx; - struct timeout sc_deferrals_tmo; - - void *sc_plus; - size_t sc_pluslen; - - u_int32_t sc_ureq_sent; - int sc_bulk_tries; - struct timeout sc_bulkfail_tmo; + unsigned int sc_dead; - u_int32_t sc_ureq_received; - struct pf_state *sc_bulk_next; - struct pf_state *sc_bulk_last; - struct timeout sc_bulk_tmo; + struct refcnt sc_refs; - TAILQ_HEAD(, tdb) sc_tdb_q; - struct mutex sc_tdb_mtx; + unsigned int sc_parent; - struct task sc_ltask; - struct task sc_dtask; - - struct timeout sc_tmo; + struct pfsync_txstage sc_txstage[1]; }; -struct pfsync_snapshot { - struct pfsync_softc *sn_sc; - struct pf_state_queue sn_qs[PFSYNC_S_COUNT]; - struct pfsync_upd_reqs sn_upd_req_list; - TAILQ_HEAD(, tdb) sn_tdb_q; - size_t sn_len; - void *sn_plus; - size_t sn_pluslen; -}; +void pfsyncattach(int); -struct pfsync_softc *pfsyncif = NULL; -struct cpumem *pfsynccounters; +static int pfsync_clone_create(struct if_clone *, int); +static int pfsync_clone_destroy(struct ifnet *); -void pfsyncattach(int); -int pfsync_clone_create(struct if_clone *, int); -int pfsync_clone_destroy(struct ifnet *); -int pfsync_alloc_scrub_memory(struct pfsync_state_peer *, - struct pf_state_peer *); -void pfsync_update_net_tdb(struct pfsync_tdb *); -int pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *, - struct rtentry *); -int pfsyncioctl(struct ifnet *, u_long, caddr_t); -void pfsyncstart(struct ifqueue *); -void pfsync_syncdev_state(void *); -void pfsync_ifdetach(void *); - -void pfsync_deferred(struct pf_state *, int); -void pfsync_undefer(struct pfsync_deferral *, int); -void pfsync_deferrals_tmo(void *); - -void pfsync_cancel_full_update(struct pfsync_softc *); -void pfsync_request_full_update(struct pfsync_softc *); -void pfsync_request_update(u_int32_t, u_int64_t); -void pfsync_update_state_req(struct pf_state *); - -void pfsync_drop(struct pfsync_softc *); -void pfsync_sendout(void); -void pfsync_send_plus(void *, size_t); -void pfsync_timeout(void *); -void pfsync_tdb_timeout(void *); - -void pfsync_bulk_start(void); -void pfsync_bulk_status(u_int8_t); -void pfsync_bulk_update(void *); -void pfsync_bulk_fail(void *); - -void pfsync_grab_snapshot(struct pfsync_snapshot *, struct pfsync_softc *); -void pfsync_drop_snapshot(struct pfsync_snapshot *); - -void pfsync_send_dispatch(void *); -void pfsync_send_pkt(struct mbuf *); - -static struct mbuf_queue pfsync_mq; -static struct task pfsync_task = - TASK_INITIALIZER(pfsync_send_dispatch, &pfsync_mq); +static struct if_clone pfsync_cloner = + IF_CLONE_INITIALIZER("pfsync", pfsync_clone_create, pfsync_clone_destroy); -#define PFSYNC_MAX_BULKTRIES 12 -int pfsync_sync_ok; +static int pfsync_ioctl(struct ifnet *, u_long, caddr_t); +static int pfsync_output(struct ifnet *, struct mbuf *, struct sockaddr *, + struct rtentry *); +static void pfsync_start(struct ifqueue *); + +static int pfsync_up(struct pfsync_softc *); +static int pfsync_down(struct pfsync_softc *); + +struct pfsync_softc *pfsync_if; /* not static so pfsync_is_up() can see it */ + +static int pfsync_in_clr(struct pfsync_softc *, unsigned int, + const void *, size_t); +static int pfsync_in_iack(struct pfsync_softc *, unsigned int, + const void *, size_t); +static int pfsync_in_upd_c(struct pfsync_softc *, unsigned int, + const void *, size_t); +static int pfsync_in_ureq(struct pfsync_softc *, unsigned int, + const void *, size_t); +static int pfsync_in_del(struct pfsync_softc *, unsigned int, + const void *, size_t); +static int pfsync_in_del_c(struct pfsync_softc *, unsigned int, + const void *, size_t); +static int pfsync_in_bus(struct pfsync_softc *, unsigned int, + const void *, size_t); +static int pfsync_in_tdb(struct pfsync_softc *, unsigned int, + const void *, size_t); +static int pfsync_in_ins(struct pfsync_softc *, unsigned int, + const void *, size_t); +static int pfsync_in_upd(struct pfsync_softc *, unsigned int, + const void *, size_t); +static int pfsync_in_eof(struct pfsync_softc *, unsigned int, + const void *, size_t); -struct if_clone pfsync_cloner = - IF_CLONE_INITIALIZER("pfsync", pfsync_clone_create, pfsync_clone_destroy); +struct { + int (*handler)(struct pfsync_softc *, unsigned int, + const void *, size_t); + size_t len; +} pfsync_acts[] = { + [PFSYNC_ACT_CLR] = + { pfsync_in_clr, sizeof(struct pfsync_clr) }, + [PFSYNC_ACT_INS_ACK] = + { pfsync_in_iack, sizeof(struct pfsync_ins_ack) }, + [PFSYNC_ACT_UPD_C] = + { pfsync_in_upd_c, sizeof(struct pfsync_upd_c) }, + [PFSYNC_ACT_UPD_REQ] = + { pfsync_in_ureq, sizeof(struct pfsync_upd_req) }, + [PFSYNC_ACT_DEL] = + { pfsync_in_del, sizeof(struct pfsync_state) }, + [PFSYNC_ACT_DEL_C] = + { pfsync_in_del_c, sizeof(struct pfsync_del_c) }, + [PFSYNC_ACT_BUS] = + { pfsync_in_bus, sizeof(struct pfsync_bus) }, + [PFSYNC_ACT_INS] = + { pfsync_in_ins, sizeof(struct pfsync_state) }, + [PFSYNC_ACT_UPD] = + { pfsync_in_upd, sizeof(struct pfsync_state) }, + [PFSYNC_ACT_TDB] = + { pfsync_in_tdb, sizeof(struct pfsync_tdb) }, +}; void pfsyncattach(int npfsync) { if_clone_attach(&pfsync_cloner); - pfsynccounters = counters_alloc(pfsyncs_ncounters); - mq_init(&pfsync_mq, 4096, IPL_MPFLOOR); } -int +static int pfsync_clone_create(struct if_clone *ifc, int unit) { struct pfsync_softc *sc; struct ifnet *ifp; - int q; if (unit != 0) return (EINVAL); - pfsync_sync_ok = 1; - - sc = malloc(sizeof(*pfsyncif), M_DEVBUF, M_WAITOK|M_ZERO); - for (q = 0; q < PFSYNC_S_COUNT; q++) - TAILQ_INIT(&sc->sc_qs[q]); - mtx_init(&sc->sc_st_mtx, IPL_MPFLOOR); - - pool_init(&sc->sc_pool, PFSYNC_PLSIZE, 0, IPL_MPFLOOR, 0, "pfsync", - NULL); - TAILQ_INIT(&sc->sc_upd_req_list); - mtx_init(&sc->sc_upd_req_mtx, IPL_MPFLOOR); - TAILQ_INIT(&sc->sc_deferrals); - mtx_init(&sc->sc_deferrals_mtx, IPL_MPFLOOR); - timeout_set_proc(&sc->sc_deferrals_tmo, pfsync_deferrals_tmo, sc); - task_set(&sc->sc_ltask, pfsync_syncdev_state, sc); - task_set(&sc->sc_dtask, pfsync_ifdetach, sc); - sc->sc_deferred = 0; - - TAILQ_INIT(&sc->sc_tdb_q); - mtx_init(&sc->sc_tdb_mtx, IPL_MPFLOOR); - - sc->sc_len = PFSYNC_MINPKT; - sc->sc_maxupdates = 128; - - sc->sc_imo.imo_membership = mallocarray(IP_MIN_MEMBERSHIPS, - sizeof(struct in_multi *), M_IPMOPTS, M_WAITOK|M_ZERO); - sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS; + sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO); ifp = &sc->sc_if; - snprintf(ifp->if_xname, sizeof ifp->if_xname, "pfsync%d", unit); + snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d", + ifc->ifc_name, unit); ifp->if_softc = sc; - ifp->if_ioctl = pfsyncioctl; - ifp->if_output = pfsyncoutput; - ifp->if_qstart = pfsyncstart; + ifp->if_ioctl = pfsync_ioctl; + ifp->if_output = pfsync_output; + ifp->if_qstart = pfsync_start; ifp->if_type = IFT_PFSYNC; ifp->if_hdrlen = sizeof(struct pfsync_header); ifp->if_mtu = ETHERMTU; ifp->if_xflags = IFXF_CLONED | IFXF_MPSAFE; - timeout_set_proc(&sc->sc_tmo, pfsync_timeout, NULL); - timeout_set_proc(&sc->sc_bulk_tmo, pfsync_bulk_update, NULL); - timeout_set_proc(&sc->sc_bulkfail_tmo, pfsync_bulk_fail, NULL); if_attach(ifp); if_alloc_sadl(ifp); + if_counters_alloc(ifp); -#if NCARP > 0 +#if 0 && NCARP > 0 if_addgroup(ifp, "carp"); #endif @@ -382,559 +227,412 @@ pfsync_clone_create(struct if_clone *ifc bpfattach(&sc->sc_if.if_bpf, ifp, DLT_PFSYNC, PFSYNC_HDRLEN); #endif - pfsyncif = sc; - return (0); } -int +static int pfsync_clone_destroy(struct ifnet *ifp) { struct pfsync_softc *sc = ifp->if_softc; - struct ifnet *ifp0; - struct pfsync_deferral *pd; - struct pfsync_deferrals deferrals; NET_LOCK(); + sc->sc_dead = 1; -#if NCARP > 0 - if (!pfsync_sync_ok) - carp_group_demote_adj(&sc->sc_if, -1, "pfsync destroy"); - if (sc->sc_link_demoted) - carp_group_demote_adj(&sc->sc_if, -1, "pfsync destroy"); -#endif - if ((ifp0 = if_get(sc->sc_sync_ifidx)) != NULL) { - if_linkstatehook_del(ifp0, &sc->sc_ltask); - if_detachhook_del(ifp0, &sc->sc_dtask); - } - if_put(ifp0); - - /* XXXSMP breaks atomicity */ + if (ISSET(ifp->if_flags, IFF_RUNNING)) + pfsync_down(sc); NET_UNLOCK(); - if_detach(ifp); - NET_LOCK(); - - pfsync_drop(sc); - - if (sc->sc_deferred > 0) { - TAILQ_INIT(&deferrals); - mtx_enter(&sc->sc_deferrals_mtx); - TAILQ_CONCAT(&deferrals, &sc->sc_deferrals, pd_entry); - sc->sc_deferred = 0; - mtx_leave(&sc->sc_deferrals_mtx); - - while ((pd = TAILQ_FIRST(&deferrals)) != NULL) { - TAILQ_REMOVE(&deferrals, pd, pd_entry); - pfsync_undefer(pd, 0); - } - } - pfsyncif = NULL; - timeout_del(&sc->sc_bulkfail_tmo); - timeout_del(&sc->sc_bulk_tmo); - timeout_del(&sc->sc_tmo); - - NET_UNLOCK(); + if_detach(ifp); - pool_destroy(&sc->sc_pool); - free(sc->sc_imo.imo_membership, M_IPMOPTS, - sc->sc_imo.imo_max_memberships * sizeof(struct in_multi *)); free(sc, M_DEVBUF, sizeof(*sc)); return (0); } -/* - * Start output on the pfsync interface. - */ -void -pfsyncstart(struct ifqueue *ifq) -{ - ifq_purge(ifq); -} - -void -pfsync_syncdev_state(void *arg) +static int +pfsync_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { - struct pfsync_softc *sc = arg; - struct ifnet *ifp; - - if ((sc->sc_if.if_flags & IFF_UP) == 0) - return; - if ((ifp = if_get(sc->sc_sync_ifidx)) == NULL) - return; + struct pfsync_softc *sc = ifp->if_softc; + //struct ifreq *ifr = (struct ifreq *)data; + int error = 0; - if (ifp->if_link_state == LINK_STATE_DOWN) { - sc->sc_if.if_flags &= ~IFF_RUNNING; - if (!sc->sc_link_demoted) { -#if NCARP > 0 - carp_group_demote_adj(&sc->sc_if, 1, - "pfsync link state down"); -#endif - sc->sc_link_demoted = 1; + switch (cmd) { + case SIOCSIFADDR: + error = ENODEV; + break; + case SIOCSIFFLAGS: + if (ISSET(ifp->if_flags, IFF_UP)) { + if (!ISSET(ifp->if_flags, IFF_RUNNING)) + error = pfsync_up(sc); + } else { + if (ISSET(ifp->if_flags, IFF_RUNNING)) + error = pfsync_down(sc); } + break; - /* drop everything */ - timeout_del(&sc->sc_tmo); - pfsync_drop(sc); - - pfsync_cancel_full_update(sc); - } else if (sc->sc_link_demoted) { - sc->sc_if.if_flags |= IFF_RUNNING; - - pfsync_request_full_update(sc); + default: + error = ENOTTY; } - if_put(ifp); + return (error); } -void -pfsync_ifdetach(void *arg) +static int +pfsync_up(struct pfsync_softc *sc) { - struct pfsync_softc *sc = arg; - struct ifnet *ifp; + struct ifnet *ifp = &sc->sc_if; - if ((ifp = if_get(sc->sc_sync_ifidx)) != NULL) { - if_linkstatehook_del(ifp, &sc->sc_ltask); - if_detachhook_del(ifp, &sc->sc_dtask); - } - if_put(ifp); + NET_ASSERT_LOCKED(); - sc->sc_sync_ifidx = 0; -} + if (sc->sc_dead) + return (ENXIO); -int -pfsync_alloc_scrub_memory(struct pfsync_state_peer *s, - struct pf_state_peer *d) -{ - if (s->scrub.scrub_flag && d->scrub == NULL) { - d->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT | PR_ZERO); - if (d->scrub == NULL) - return (ENOMEM); - } + SET(ifp->if_flags, IFF_RUNNING); + refcnt_init(&sc->sc_refs); - return (0); -} + /* commit */ + SMR_PTR_SET_LOCKED(&pfsync_if, sc); -void -pfsync_state_export(struct pfsync_state *sp, struct pf_state *st) -{ - pf_state_export(sp, st); + return (0); } -int -pfsync_state_import(struct pfsync_state *sp, int flags) +static inline struct pfsync_softc * +pfsync_get(void) { - struct pf_state *st = NULL; - struct pf_state_key *skw = NULL, *sks = NULL; - struct pf_rule *r = NULL; - struct pfi_kif *kif; - int pool_flags; - int error = ENOMEM; - int n = 0; - - if (sp->creatorid == 0) { - DPFPRINTF(LOG_NOTICE, "pfsync_state_import: " - "invalid creator id: %08x", ntohl(sp->creatorid)); - return (EINVAL); - } - - if ((kif = pfi_kif_get(sp->ifname, NULL)) == NULL) { - DPFPRINTF(LOG_NOTICE, "pfsync_state_import: " - "unknown interface: %s", sp->ifname); - if (flags & PFSYNC_SI_IOCTL) - return (EINVAL); - return (0); /* skip this state */ - } - - if (sp->af == 0) - return (0); /* skip this state */ - - /* - * If the ruleset checksums match or the state is coming from the ioctl, - * it's safe to associate the state with the rule of that number. - */ - if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) && - (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) < - pf_main_ruleset.rules.active.rcount) { - TAILQ_FOREACH(r, pf_main_ruleset.rules.active.ptr, entries) - if (ntohl(sp->rule) == n++) - break; - } else - r = &pf_default_rule; + struct pfsync_softc *sc; - if ((r->max_states && r->states_cur >= r->max_states)) - goto cleanup; + smr_read_enter(); + sc = SMR_PTR_GET(&pfsync_if); + if (sc != NULL) + refcnt_take(&sc->sc_refs); + smr_read_leave(); - if (flags & PFSYNC_SI_IOCTL) - pool_flags = PR_WAITOK | PR_LIMITFAIL | PR_ZERO; - else - pool_flags = PR_NOWAIT | PR_LIMITFAIL | PR_ZERO; + return (sc); +} - if ((st = pool_get(&pf_state_pl, pool_flags)) == NULL) - goto cleanup; +static inline void +pfsync_put(struct pfsync_softc *sc) +{ + refcnt_rele_wake(&sc->sc_refs); +} - if ((skw = pf_alloc_state_key(pool_flags)) == NULL) - goto cleanup; +static int +pfsync_down(struct pfsync_softc *sc) +{ + struct ifnet *ifp = &sc->sc_if; - if ((sp->key[PF_SK_WIRE].af && - (sp->key[PF_SK_WIRE].af != sp->key[PF_SK_STACK].af)) || - PF_ANEQ(&sp->key[PF_SK_WIRE].addr[0], - &sp->key[PF_SK_STACK].addr[0], sp->af) || - PF_ANEQ(&sp->key[PF_SK_WIRE].addr[1], - &sp->key[PF_SK_STACK].addr[1], sp->af) || - sp->key[PF_SK_WIRE].port[0] != sp->key[PF_SK_STACK].port[0] || - sp->key[PF_SK_WIRE].port[1] != sp->key[PF_SK_STACK].port[1] || - sp->key[PF_SK_WIRE].rdomain != sp->key[PF_SK_STACK].rdomain) { - if ((sks = pf_alloc_state_key(pool_flags)) == NULL) - goto cleanup; - } else - sks = skw; - - /* allocate memory for scrub info */ - if (pfsync_alloc_scrub_memory(&sp->src, &st->src) || - pfsync_alloc_scrub_memory(&sp->dst, &st->dst)) - goto cleanup; - - /* copy to state key(s) */ - skw->addr[0] = sp->key[PF_SK_WIRE].addr[0]; - skw->addr[1] = sp->key[PF_SK_WIRE].addr[1]; - skw->port[0] = sp->key[PF_SK_WIRE].port[0]; - skw->port[1] = sp->key[PF_SK_WIRE].port[1]; - skw->rdomain = ntohs(sp->key[PF_SK_WIRE].rdomain); - PF_REF_INIT(skw->refcnt); - skw->proto = sp->proto; - if (!(skw->af = sp->key[PF_SK_WIRE].af)) - skw->af = sp->af; - if (sks != skw) { - sks->addr[0] = sp->key[PF_SK_STACK].addr[0]; - sks->addr[1] = sp->key[PF_SK_STACK].addr[1]; - sks->port[0] = sp->key[PF_SK_STACK].port[0]; - sks->port[1] = sp->key[PF_SK_STACK].port[1]; - sks->rdomain = ntohs(sp->key[PF_SK_STACK].rdomain); - PF_REF_INIT(sks->refcnt); - if (!(sks->af = sp->key[PF_SK_STACK].af)) - sks->af = sp->af; - if (sks->af != skw->af) { - switch (sp->proto) { - case IPPROTO_ICMP: - sks->proto = IPPROTO_ICMPV6; - break; - case IPPROTO_ICMPV6: - sks->proto = IPPROTO_ICMP; - break; - default: - sks->proto = sp->proto; - } - } else - sks->proto = sp->proto; + NET_ASSERT_LOCKED(); + CLR(ifp->if_flags, IFF_RUNNING); - if (((sks->af != AF_INET) && (sks->af != AF_INET6)) || - ((skw->af != AF_INET) && (skw->af != AF_INET6))) { - error = EINVAL; - goto cleanup; - } + SMR_PTR_SET_LOCKED(&pfsync_if, NULL); + smr_barrier(); - } else if ((sks->af != AF_INET) && (sks->af != AF_INET6)) { - error = EINVAL; - goto cleanup; - } - st->rtableid[PF_SK_WIRE] = ntohl(sp->rtableid[PF_SK_WIRE]); - st->rtableid[PF_SK_STACK] = ntohl(sp->rtableid[PF_SK_STACK]); - - /* copy to state */ - st->rt_addr = sp->rt_addr; - st->rt = sp->rt; - st->creation = getuptime() - ntohl(sp->creation); - st->expire = getuptime(); - if (ntohl(sp->expire)) { - u_int32_t timeout; - - timeout = r->timeout[sp->timeout]; - if (!timeout) - timeout = pf_default_rule.timeout[sp->timeout]; - - /* sp->expire may have been adaptively scaled by export. */ - st->expire -= timeout - ntohl(sp->expire); - } - - st->direction = sp->direction; - st->log = sp->log; - st->timeout = sp->timeout; - st->state_flags = ntohs(sp->state_flags); - st->max_mss = ntohs(sp->max_mss); - st->min_ttl = sp->min_ttl; - st->set_tos = sp->set_tos; - st->set_prio[0] = sp->set_prio[0]; - st->set_prio[1] = sp->set_prio[1]; - - st->id = sp->id; - st->creatorid = sp->creatorid; - pf_state_peer_ntoh(&sp->src, &st->src); - pf_state_peer_ntoh(&sp->dst, &st->dst); + refcnt_finalize(&sc->sc_refs, "pfsyncdn"); - st->rule.ptr = r; - st->anchor.ptr = NULL; + return (0); +} - st->pfsync_time = getuptime(); - st->sync_state = PFSYNC_S_NONE; +static void +pfsync_deferred(struct pfsync_softc *sc, struct pf_state *st, int action) +{ - refcnt_init(&st->refcnt); +} - /* XXX when we have anchors, use STATE_INC_COUNTERS */ - r->states_cur++; - r->states_tot++; +static void +pfsync_request_update(u_int32_t creatorid, u_int64_t id) +{ - if (!ISSET(flags, PFSYNC_SI_IOCTL)) - SET(st->state_flags, PFSTATE_NOSYNC); +} - /* - * We just set PFSTATE_NOSYNC bit, which prevents - * pfsync_insert_state() to insert state to pfsync. - */ - if (pf_state_insert(kif, &skw, &sks, st) != 0) { - /* XXX when we have anchors, use STATE_DEC_COUNTERS */ - r->states_cur--; - error = EEXIST; - goto cleanup_state; +void +pfsync_update_state_req(struct pfsync_softc *sc, struct pf_state *st) +{ + if (ISSET(st->state_flags, PFSTATE_NOSYNC)) { + if (st->sync_state != PFSYNC_S_NONE) + pfsync_q_del(st); + return; } - if (!ISSET(flags, PFSYNC_SI_IOCTL)) { - CLR(st->state_flags, PFSTATE_NOSYNC); - if (ISSET(st->state_flags, PFSTATE_ACK)) { - pfsync_q_ins(st, PFSYNC_S_IACK); - schednetisr(NETISR_PFSYNC); - } - } - CLR(st->state_flags, PFSTATE_ACK); + switch (st->sync_state) { + case PFSYNC_S_UPD_C: + case PFSYNC_S_IACK: + pfsync_q_del(st); + /* FALLTHROUGH */ + case PFSYNC_S_NONE: + pfsync_q_ins(st, PFSYNC_S_UPD); + schednetisr(NETISR_PFSYNC); + return; - return (0); + case PFSYNC_S_INS: + case PFSYNC_S_UPD: + case PFSYNC_S_DEL: + /* we're already handling it */ + return; - cleanup: - if (skw == sks) - sks = NULL; - if (skw != NULL) - pool_put(&pf_state_key_pl, skw); - if (sks != NULL) - pool_put(&pf_state_key_pl, sks); - - cleanup_state: /* pf_state_insert frees the state keys */ - if (st) { - if (st->dst.scrub) - pool_put(&pf_state_scrub_pl, st->dst.scrub); - if (st->src.scrub) - pool_put(&pf_state_scrub_pl, st->src.scrub); - pool_put(&pf_state_pl, st); + default: + panic("%s: unexpected sync state %d", __func__, + st->sync_state); } - return (error); } int pfsync_input(struct mbuf **mp, int *offp, int proto, int af) { - struct mbuf *n, *m = *mp; - struct pfsync_softc *sc = pfsyncif; - struct ip *ip = mtod(m, struct ip *); + struct pfsync_softc *sc; struct pfsync_header *ph; - struct pfsync_subheader subh; - int offset, noff, len, count, mlen, flags = 0; - int e; - - NET_ASSERT_LOCKED(); + struct pfsync_subheader *psh; + struct mbuf *m = *mp, *n; + int off, end, len; + unsigned int flags = 0; + uint8_t ttl; pfsyncstat_inc(pfsyncs_ipackets); - /* verify that we have a sync interface configured */ - if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING) || - sc->sc_sync_ifidx == 0 || !pf_status.running) - goto done; + if (!pf_status.running) + goto drop; + + sc = pfsync_get(); + if (sc == NULL) + goto drop; /* verify that the packet came in on the right interface */ - if (sc->sc_sync_ifidx != m->m_pkthdr.ph_ifidx) { + if (sc->sc_parent != m->m_pkthdr.ph_ifidx) { pfsyncstat_inc(pfsyncs_badif); goto done; } - sc->sc_if.if_ipackets++; - sc->sc_if.if_ibytes += m->m_pkthdr.len; + counters_pkt(sc->sc_if.if_counters, ifc_ipackets, ifc_ibytes, + m->m_pkthdr.len); + + switch (af) { + case AF_INET: { + struct ip *ip = mtod(m, struct ip *); + ttl = ip->ip_ttl; + break; + } +#ifdef INET6 + case AF_INET6: { + struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); + ttl = ip6->ip6_hlim; + break; + } +#endif /* INET6 */ + default: + unhandled_af(af); + /* NOTREACHED */ + } - /* verify that the IP TTL is 255. */ - if (ip->ip_ttl != PFSYNC_DFLTTL) { + if (ttl != PFSYNC_DFLTTL) { pfsyncstat_inc(pfsyncs_badttl); goto done; } - offset = ip->ip_hl << 2; - n = m_pulldown(m, offset, sizeof(*ph), &noff); - if (n == NULL) { + m_adj(m, *offp); + if (m->m_pkthdr.len < sizeof(*ph)) { pfsyncstat_inc(pfsyncs_hdrops); - return IPPROTO_DONE; + goto done; + } + if (m->m_len < sizeof(*ph)) { + m = m_pullup(m, sizeof(*ph)); + if (m == NULL) + goto done; } - ph = (struct pfsync_header *)(n->m_data + noff); - /* verify the version */ + ph = mtod(m, struct pfsync_header *); if (ph->version != PFSYNC_VERSION) { pfsyncstat_inc(pfsyncs_badver); goto done; } - len = ntohs(ph->len) + offset; + len = ntohs(ph->len); if (m->m_pkthdr.len < len) { pfsyncstat_inc(pfsyncs_badlen); goto done; } + m->m_pkthdr.len = len; + +#if NBPFILTER > 0 + { + caddr_t if_bpf = sc->sc_if.if_bpf; + if (if_bpf) + bpf_mtap(if_bpf, m, BPF_DIRECTION_IN); + } +#endif + /* XXX atomic? */ if (!bcmp(&ph->pfcksum, &pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) flags = PFSYNC_SI_CKSUM; - offset += sizeof(*ph); - while (offset <= len - sizeof(subh)) { - m_copydata(m, offset, sizeof(subh), &subh); - offset += sizeof(subh); - - mlen = subh.len << 2; - count = ntohs(subh.count); - - if (subh.action >= PFSYNC_ACT_MAX || - subh.action >= nitems(pfsync_acts) || - mlen < pfsync_acts[subh.action].len) { - /* - * subheaders are always followed by at least one - * message, so if the peer is new - * enough to tell us how big its messages are then we - * know enough to skip them. - */ - if (count > 0 && mlen > 0) { - offset += count * mlen; - continue; - } - pfsyncstat_inc(pfsyncs_badact); - goto done; - } - n = m_pulldown(m, offset, mlen * count, &noff); + n = m; + end = sizeof(*ph); + len -= sizeof(*ph); + + while (len >= sizeof(*psh)) { + uint32_t buf[256]; /* uint32_t provides alignment */ + int (*action)(struct pfsync_softc *, unsigned int, + const void *, size_t); + unsigned int mlen, count, sublen, i; + + n = m_getptr(n, end, &off); if (n == NULL) { pfsyncstat_inc(pfsyncs_badlen); - return IPPROTO_DONE; + goto done; + } + len -= sizeof(*psh); + + end = off + sizeof(*psh); + if (end <= n->m_len) { + psh = (struct pfsync_subheader *) + (mtod(n, caddr_t) + off); + } else { + CTASSERT(sizeof(*psh) <= sizeof(buf)); + m_copydata(n, off, sizeof(*psh), buf); + psh = (struct pfsync_subheader *)buf; } - e = pfsync_acts[subh.action].in(n->m_data + noff, mlen, count, - flags); - if (e != 0) + mlen = psh->len << 2; + count = ntohs(psh->count); + + sublen = mlen * count; + if (mlen == 0 || count == 0 || len < sublen) { + pfsyncstat_inc(pfsyncs_badlen); goto done; + } + len -= sublen; + + if (psh->action >= PFSYNC_ACT_MAX || + psh->action >= nitems(pfsync_acts) || + mlen < pfsync_acts[psh->action].len || + (action = pfsync_acts[psh->action].handler) == NULL) { + /* skip these ones */ + end += sublen; + continue; + } - offset += mlen * count; + for (i = 0; i < count; i++) { + void *ptr; + + n = m_getptr(n, end, &off); + if (n == NULL) { + pfsyncstat_inc(pfsyncs_badlen); + goto done; + } + end = off + mlen; + + if (end <= n->m_len) + ptr = mtod(n, caddr_t) + off; + else { + m_copydata(n, off, mlen, buf); + ptr = buf; + } + + if ((*action)(sc, flags, ptr, mlen) != 0) + goto done; + } } done: + pfsync_put(sc); +drop: m_freem(m); - return IPPROTO_DONE; + return (IPPROTO_DONE); } -int -pfsync_in_clr(caddr_t buf, int len, int count, int flags) +static int +pfsync_in_clr(struct pfsync_softc *sc, unsigned int flags, + const void *buf, size_t len) { - struct pfsync_clr *clr; + const struct pfsync_clr *clr = buf; + uint32_t creatorid = clr->creatorid; + struct pfi_kif *kif = NULL; struct pf_state *st, *nexts; - struct pfi_kif *kif; - u_int32_t creatorid; - int i; PF_LOCK(); - for (i = 0; i < count; i++) { - clr = (struct pfsync_clr *)buf + len * i; - kif = NULL; - creatorid = clr->creatorid; - if (strlen(clr->ifname) && - (kif = pfi_kif_find(clr->ifname)) == NULL) + if (strlen(clr->ifname)) { + kif = pfi_kif_find(clr->ifname); + if (kif == NULL) + goto done; + } + + /* XXX this sucks with lots of states */ + PF_STATE_ENTER_WRITE(); + RB_FOREACH_SAFE(st, pf_state_tree_id, &tree_id, nexts) { + if (st->creatorid != creatorid) + continue; + if (kif != NULL && st->kif != kif) continue; - PF_STATE_ENTER_WRITE(); - for (st = RB_MIN(pf_state_tree_id, &tree_id); st; st = nexts) { - nexts = RB_NEXT(pf_state_tree_id, &tree_id, st); - if (st->creatorid == creatorid && - ((kif && st->kif == kif) || !kif)) { - SET(st->state_flags, PFSTATE_NOSYNC); - pf_remove_state(st); - } - } - PF_STATE_EXIT_WRITE(); + SET(st->state_flags, PFSTATE_NOSYNC); + pf_remove_state(st); } + PF_STATE_EXIT_WRITE(); +done: PF_UNLOCK(); return (0); } -int -pfsync_in_ins(caddr_t buf, int len, int count, int flags) +static int +pfsync_in_ins(struct pfsync_softc *sc, unsigned int flags, + const void *buf, size_t len) { - struct pfsync_state *sp; + const struct pfsync_state *sp = buf; sa_family_t af1, af2; - int i; - PF_LOCK(); - for (i = 0; i < count; i++) { - sp = (struct pfsync_state *)(buf + len * i); - af1 = sp->key[0].af; - af2 = sp->key[1].af; - - /* check for invalid values */ - if (sp->timeout >= PFTM_MAX || - sp->src.state > PF_TCPS_PROXY_DST || - sp->dst.state > PF_TCPS_PROXY_DST || - sp->direction > PF_OUT || - (((af1 || af2) && - ((af1 != AF_INET && af1 != AF_INET6) || - (af2 != AF_INET && af2 != AF_INET6))) || - (sp->af != AF_INET && sp->af != AF_INET6))) { - DPFPRINTF(LOG_NOTICE, - "pfsync_input: PFSYNC5_ACT_INS: invalid value"); - pfsyncstat_inc(pfsyncs_badval); - continue; - } + af1 = sp->key[0].af; + af2 = sp->key[1].af; - if (pfsync_state_import(sp, flags) == ENOMEM) { - /* drop out, but process the rest of the actions */ - break; - } + /* check for invalid values */ + if (sp->timeout >= PFTM_MAX || + sp->src.state > PF_TCPS_PROXY_DST || + sp->dst.state > PF_TCPS_PROXY_DST || + sp->direction > PF_OUT || + (((af1 || af2) && + ((af1 != AF_INET && af1 != AF_INET6) || + (af2 != AF_INET && af2 != AF_INET6))) || + (sp->af != AF_INET && sp->af != AF_INET6))) { + DPFPRINTF(LOG_NOTICE, + "pfsync_input: PFSYNC5_ACT_INS: invalid value"); + pfsyncstat_inc(pfsyncs_badval); + return (0); } + + PF_LOCK(); + pf_state_import(sp, flags); PF_UNLOCK(); return (0); } -int -pfsync_in_iack(caddr_t buf, int len, int count, int flags) -{ - struct pfsync_ins_ack *ia; - struct pf_state_cmp id_key; +static int +pfsync_in_iack(struct pfsync_softc *sc, unsigned int flags, + const void *buf, size_t len) +{ + const struct pfsync_ins_ack *ia = buf; + struct pf_state_cmp id_key = { + .id = ia->id, + .creatorid = ia->creatorid, + }; struct pf_state *st; - int i; - - for (i = 0; i < count; i++) { - ia = (struct pfsync_ins_ack *)(buf + len * i); - - id_key.id = ia->id; - id_key.creatorid = ia->creatorid; - PF_STATE_ENTER_READ(); - st = pf_find_state_byid(&id_key); - pf_state_ref(st); - PF_STATE_EXIT_READ(); - if (st == NULL) - continue; + PF_STATE_ENTER_READ(); + st = pf_find_state_byid(&id_key); + pf_state_ref(st); + PF_STATE_EXIT_READ(); + if (st == NULL) + return (0); - if (ISSET(st->state_flags, PFSTATE_ACK)) - pfsync_deferred(st, 0); + if (ISSET(st->state_flags, PFSTATE_ACK)) + pfsync_deferred(sc, st, PF_PASS); - pf_state_unref(st); - } + pf_state_unref(st); return (0); } -int -pfsync_upd_tcp(struct pf_state *st, struct pfsync_state_peer *src, - struct pfsync_state_peer *dst) +static int +pfsync_upd_tcp(struct pf_state *st, const struct pfsync_state_peer *src, + const struct pfsync_state_peer *dst) { int sync = 0; @@ -945,10 +643,10 @@ pfsync_upd_tcp(struct pf_state *st, stru */ if ((st->src.state > src->state && (st->src.state < PF_TCPS_PROXY_SRC || - src->state >= PF_TCPS_PROXY_SRC)) || + src->state >= PF_TCPS_PROXY_SRC)) || (st->src.state == src->state && - SEQ_GT(st->src.seqlo, ntohl(src->seqlo)))) + SEQ_GT(st->src.seqlo, ntohl(src->seqlo)))) sync++; else pf_state_peer_ntoh(src, &st->src); @@ -956,7 +654,7 @@ pfsync_upd_tcp(struct pf_state *st, stru if ((st->dst.state > dst->state) || (st->dst.state >= TCPS_SYN_SENT && - SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo)))) + SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo)))) sync++; else pf_state_peer_ntoh(dst, &st->dst); @@ -964,1853 +662,235 @@ pfsync_upd_tcp(struct pf_state *st, stru return (sync); } -int -pfsync_in_upd(caddr_t buf, int len, int count, int flags) +static int +pfsync_in_upd(struct pfsync_softc *sc, unsigned int flags, + const void *buf, size_t len) { - struct pfsync_state *sp; + const struct pfsync_state *sp = buf; struct pf_state_cmp id_key; struct pf_state *st; - int sync, error; - int i; + int error; + int sync; - for (i = 0; i < count; i++) { - sp = (struct pfsync_state *)(buf + len * i); + /* check for invalid values */ + if (sp->timeout >= PFTM_MAX || + sp->src.state > PF_TCPS_PROXY_DST || + sp->dst.state > PF_TCPS_PROXY_DST) { + DPFPRINTF(LOG_NOTICE, + "pfsync_input: PFSYNC_ACT_UPD: invalid value"); + pfsyncstat_inc(pfsyncs_badval); + return (0); + } - /* check for invalid values */ - if (sp->timeout >= PFTM_MAX || - sp->src.state > PF_TCPS_PROXY_DST || - sp->dst.state > PF_TCPS_PROXY_DST) { - DPFPRINTF(LOG_NOTICE, - "pfsync_input: PFSYNC_ACT_UPD: invalid value"); - pfsyncstat_inc(pfsyncs_badval); - continue; - } + id_key.id = sp->id; + id_key.creatorid = sp->creatorid; - id_key.id = sp->id; - id_key.creatorid = sp->creatorid; + PF_STATE_ENTER_READ(); + st = pf_find_state_byid(&id_key); + pf_state_ref(st); + PF_STATE_EXIT_READ(); + if (st == NULL) { + /* insert the update */ + PF_LOCK(); + error = pf_state_import(sp, flags); + PF_UNLOCK(); + if (error) + pfsyncstat_inc(pfsyncs_badstate); + return (0); + } - PF_STATE_ENTER_READ(); - st = pf_find_state_byid(&id_key); - pf_state_ref(st); - PF_STATE_EXIT_READ(); - if (st == NULL) { - /* insert the update */ - PF_LOCK(); - error = pfsync_state_import(sp, flags); - if (error) - pfsyncstat_inc(pfsyncs_badstate); - PF_UNLOCK(); - continue; - } + if (ISSET(st->state_flags, PFSTATE_ACK)) + pfsync_deferred(sc, st, 1); - if (ISSET(st->state_flags, PFSTATE_ACK)) - pfsync_deferred(st, 1); + if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) + sync = pfsync_upd_tcp(st, &sp->src, &sp->dst); + else { + sync = 0; - if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) - sync = pfsync_upd_tcp(st, &sp->src, &sp->dst); - else { - sync = 0; - - /* - * Non-TCP protocol state machine always go - * forwards - */ - if (st->src.state > sp->src.state) - sync++; - else - pf_state_peer_ntoh(&sp->src, &st->src); - - if (st->dst.state > sp->dst.state) - sync++; - else - pf_state_peer_ntoh(&sp->dst, &st->dst); - } + /* + * Non-TCP protocol state machine always go + * forwards + */ + if (st->src.state > sp->src.state) + sync++; + else + pf_state_peer_ntoh(&sp->src, &st->src); - if (sync < 2) { - pfsync_alloc_scrub_memory(&sp->dst, &st->dst); + if (st->dst.state > sp->dst.state) + sync++; + else pf_state_peer_ntoh(&sp->dst, &st->dst); - st->expire = getuptime(); - st->timeout = sp->timeout; - } - st->pfsync_time = getuptime(); - - if (sync) { - pfsyncstat_inc(pfsyncs_stale); + } - pfsync_update_state(st); - schednetisr(NETISR_PFSYNC); - } + if (sync < 2) { + pf_state_alloc_scrub_memory(&sp->dst, &st->dst); + /* XXX what if the above fails? */ + pf_state_peer_ntoh(&sp->dst, &st->dst); + st->expire = getuptime(); + st->timeout = sp->timeout; + } + st->pfsync_time = getuptime(); - pf_state_unref(st); + if (sync) { + pfsyncstat_inc(pfsyncs_stale); + + pfsync_update_state(st); + schednetisr(NETISR_PFSYNC); } + pf_state_unref(st); + return (0); } int -pfsync_in_upd_c(caddr_t buf, int len, int count, int flags) +pfsync_in_upd_c(struct pfsync_softc *sc, unsigned int flags, + const void *buf, size_t len) { - struct pfsync_upd_c *up; + const struct pfsync_upd_c *up = buf; struct pf_state_cmp id_key; struct pf_state *st; int sync; - int i; + /* check for invalid values */ + if (up->timeout >= PFTM_MAX || + up->src.state > PF_TCPS_PROXY_DST || + up->dst.state > PF_TCPS_PROXY_DST) { + DPFPRINTF(LOG_NOTICE, + "pfsync_input: PFSYNC_ACT_UPD_C: invalid value"); + pfsyncstat_inc(pfsyncs_badval); + return (0); + } - for (i = 0; i < count; i++) { - up = (struct pfsync_upd_c *)(buf + len * i); + id_key.id = up->id; + id_key.creatorid = up->creatorid; - /* check for invalid values */ - if (up->timeout >= PFTM_MAX || - up->src.state > PF_TCPS_PROXY_DST || - up->dst.state > PF_TCPS_PROXY_DST) { - DPFPRINTF(LOG_NOTICE, - "pfsync_input: PFSYNC_ACT_UPD_C: invalid value"); - pfsyncstat_inc(pfsyncs_badval); - continue; - } + PF_STATE_ENTER_READ(); + st = pf_find_state_byid(&id_key); + pf_state_ref(st); + PF_STATE_EXIT_READ(); + if (st == NULL) { + /* We don't have this state. Ask for it. */ + pfsync_request_update(id_key.creatorid, id_key.id); + return (0); + } - id_key.id = up->id; - id_key.creatorid = up->creatorid; + if (ISSET(st->state_flags, PFSTATE_ACK)) + pfsync_deferred(sc, st, 1); - PF_STATE_ENTER_READ(); - st = pf_find_state_byid(&id_key); - pf_state_ref(st); - PF_STATE_EXIT_READ(); - if (st == NULL) { - /* We don't have this state. Ask for it. */ - pfsync_request_update(id_key.creatorid, id_key.id); - continue; - } + if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) + sync = pfsync_upd_tcp(st, &up->src, &up->dst); + else { + sync = 0; - if (ISSET(st->state_flags, PFSTATE_ACK)) - pfsync_deferred(st, 1); + /* + * Non-TCP protocol state machine always go + * forwards + */ + if (st->src.state > up->src.state) + sync++; + else + pf_state_peer_ntoh(&up->src, &st->src); - if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) - sync = pfsync_upd_tcp(st, &up->src, &up->dst); - else { - sync = 0; - /* - * Non-TCP protocol state machine always go - * forwards - */ - if (st->src.state > up->src.state) - sync++; - else - pf_state_peer_ntoh(&up->src, &st->src); - - if (st->dst.state > up->dst.state) - sync++; - else - pf_state_peer_ntoh(&up->dst, &st->dst); - } - if (sync < 2) { - pfsync_alloc_scrub_memory(&up->dst, &st->dst); + if (st->dst.state > up->dst.state) + sync++; + else pf_state_peer_ntoh(&up->dst, &st->dst); - st->expire = getuptime(); - st->timeout = up->timeout; - } - st->pfsync_time = getuptime(); - - if (sync) { - pfsyncstat_inc(pfsyncs_stale); - - pfsync_update_state(st); - schednetisr(NETISR_PFSYNC); - } - - pf_state_unref(st); } - return (0); -} - -int -pfsync_in_ureq(caddr_t buf, int len, int count, int flags) -{ - struct pfsync_upd_req *ur; - int i; - - struct pf_state_cmp id_key; - struct pf_state *st; - - for (i = 0; i < count; i++) { - ur = (struct pfsync_upd_req *)(buf + len * i); - - id_key.id = ur->id; - id_key.creatorid = ur->creatorid; - - if (id_key.id == 0 && id_key.creatorid == 0) - pfsync_bulk_start(); - else { - PF_STATE_ENTER_READ(); - st = pf_find_state_byid(&id_key); - pf_state_ref(st); - PF_STATE_EXIT_READ(); - if (st == NULL) { - pfsyncstat_inc(pfsyncs_badstate); - continue; - } - if (ISSET(st->state_flags, PFSTATE_NOSYNC)) { - pf_state_unref(st); - continue; - } - - pfsync_update_state_req(st); - pf_state_unref(st); - } + if (sync < 2) { + pf_state_alloc_scrub_memory(&up->dst, &st->dst); + /* XXX what if the above fails? */ + pf_state_peer_ntoh(&up->dst, &st->dst); + st->expire = getuptime(); + st->timeout = up->timeout; } + st->pfsync_time = getuptime(); - return (0); -} - -int -pfsync_in_del(caddr_t buf, int len, int count, int flags) -{ - struct pfsync_state *sp; - struct pf_state_cmp id_key; - struct pf_state *st; - int i; - - PF_STATE_ENTER_WRITE(); - for (i = 0; i < count; i++) { - sp = (struct pfsync_state *)(buf + len * i); - - id_key.id = sp->id; - id_key.creatorid = sp->creatorid; + if (sync) { + pfsyncstat_inc(pfsyncs_stale); - st = pf_find_state_byid(&id_key); - if (st == NULL) { - pfsyncstat_inc(pfsyncs_badstate); - continue; - } - SET(st->state_flags, PFSTATE_NOSYNC); - pf_remove_state(st); + pfsync_update_state(st); + schednetisr(NETISR_PFSYNC); } - PF_STATE_EXIT_WRITE(); + + pf_state_unref(st); return (0); } -int -pfsync_in_del_c(caddr_t buf, int len, int count, int flags) -{ - struct pfsync_del_c *sp; - struct pf_state_cmp id_key; +static int +pfsync_in_ureq(struct pfsync_softc *sc, unsigned int flags, + const void *buf, size_t len) +{ + const struct pfsync_upd_req *ur = buf; + struct pf_state_cmp id_key = { + .id = ur->id, + .creatorid = ur->creatorid, + }; struct pf_state *st; - int i; - - PF_LOCK(); - PF_STATE_ENTER_WRITE(); - for (i = 0; i < count; i++) { - sp = (struct pfsync_del_c *)(buf + len * i); - - id_key.id = sp->id; - id_key.creatorid = sp->creatorid; + if (id_key.id == 0 && id_key.creatorid == 0) + pfsync_bulk_start(); + else { + PF_STATE_ENTER_READ(); st = pf_find_state_byid(&id_key); + pf_state_ref(st); + PF_STATE_EXIT_READ(); if (st == NULL) { pfsyncstat_inc(pfsyncs_badstate); - continue; - } - - SET(st->state_flags, PFSTATE_NOSYNC); - pf_remove_state(st); - } - PF_STATE_EXIT_WRITE(); - PF_UNLOCK(); - - return (0); -} - -int -pfsync_in_bus(caddr_t buf, int len, int count, int flags) -{ - struct pfsync_softc *sc = pfsyncif; - struct pfsync_bus *bus; - - /* If we're not waiting for a bulk update, who cares. */ - if (sc->sc_ureq_sent == 0) - return (0); - - bus = (struct pfsync_bus *)buf; - - switch (bus->status) { - case PFSYNC_BUS_START: - timeout_add(&sc->sc_bulkfail_tmo, 4 * hz + - pf_pool_limits[PF_LIMIT_STATES].limit / - ((sc->sc_if.if_mtu - PFSYNC_MINPKT) / - sizeof(struct pfsync_state))); - DPFPRINTF(LOG_INFO, "received bulk update start"); - break; - - case PFSYNC_BUS_END: - if (getuptime() - ntohl(bus->endtime) >= - sc->sc_ureq_sent) { - /* that's it, we're happy */ - sc->sc_ureq_sent = 0; - sc->sc_bulk_tries = 0; - timeout_del(&sc->sc_bulkfail_tmo); -#if NCARP > 0 - if (!pfsync_sync_ok) - carp_group_demote_adj(&sc->sc_if, -1, - sc->sc_link_demoted ? - "pfsync link state up" : - "pfsync bulk done"); - if (sc->sc_initial_bulk) { - carp_group_demote_adj(&sc->sc_if, -32, - "pfsync init"); - sc->sc_initial_bulk = 0; - } -#endif - pfsync_sync_ok = 1; - sc->sc_link_demoted = 0; - DPFPRINTF(LOG_INFO, "received valid bulk update end"); - } else { - DPFPRINTF(LOG_WARNING, "received invalid " - "bulk update end: bad timestamp"); - } - break; - } - - return (0); -} - -int -pfsync_in_tdb(caddr_t buf, int len, int count, int flags) -{ -#if defined(IPSEC) - struct pfsync_tdb *tp; - int i; - - for (i = 0; i < count; i++) { - tp = (struct pfsync_tdb *)(buf + len * i); - pfsync_update_net_tdb(tp); - } -#endif - - return (0); -} - -#if defined(IPSEC) -/* Update an in-kernel tdb. Silently fail if no tdb is found. */ -void -pfsync_update_net_tdb(struct pfsync_tdb *pt) -{ - struct tdb *tdb; - - NET_ASSERT_LOCKED(); - - /* check for invalid values */ - if (ntohl(pt->spi) <= SPI_RESERVED_MAX || - (pt->dst.sa.sa_family != AF_INET && - pt->dst.sa.sa_family != AF_INET6)) - goto bad; - - tdb = gettdb(ntohs(pt->rdomain), pt->spi, - (union sockaddr_union *)&pt->dst, pt->sproto); - if (tdb) { - pt->rpl = betoh64(pt->rpl); - pt->cur_bytes = betoh64(pt->cur_bytes); - - /* Neither replay nor byte counter should ever decrease. */ - if (pt->rpl < tdb->tdb_rpl || - pt->cur_bytes < tdb->tdb_cur_bytes) { - tdb_unref(tdb); - goto bad; - } - - tdb->tdb_rpl = pt->rpl; - tdb->tdb_cur_bytes = pt->cur_bytes; - tdb_unref(tdb); - } - return; - - bad: - DPFPRINTF(LOG_WARNING, "pfsync_insert: PFSYNC_ACT_TDB_UPD: " - "invalid value"); - pfsyncstat_inc(pfsyncs_badstate); - return; -} -#endif - - -int -pfsync_in_eof(caddr_t buf, int len, int count, int flags) -{ - if (len > 0 || count > 0) - pfsyncstat_inc(pfsyncs_badact); - - /* we're done. let the caller return */ - return (1); -} - -int -pfsync_in_error(caddr_t buf, int len, int count, int flags) -{ - pfsyncstat_inc(pfsyncs_badact); - return (-1); -} - -int -pfsyncoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, - struct rtentry *rt) -{ - m_freem(m); /* drop packet */ - return (EAFNOSUPPORT); -} - -int -pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) -{ - struct proc *p = curproc; - struct pfsync_softc *sc = ifp->if_softc; - struct ifreq *ifr = (struct ifreq *)data; - struct ip_moptions *imo = &sc->sc_imo; - struct pfsyncreq pfsyncr; - struct ifnet *ifp0, *sifp; - struct ip *ip; - int error; - - switch (cmd) { - case SIOCSIFFLAGS: - if ((ifp->if_flags & IFF_RUNNING) == 0 && - (ifp->if_flags & IFF_UP)) { - ifp->if_flags |= IFF_RUNNING; - -#if NCARP > 0 - sc->sc_initial_bulk = 1; - carp_group_demote_adj(&sc->sc_if, 32, "pfsync init"); -#endif - - pfsync_request_full_update(sc); - } - if ((ifp->if_flags & IFF_RUNNING) && - (ifp->if_flags & IFF_UP) == 0) { - ifp->if_flags &= ~IFF_RUNNING; - - /* drop everything */ - timeout_del(&sc->sc_tmo); - pfsync_drop(sc); - - pfsync_cancel_full_update(sc); - } - break; - case SIOCSIFMTU: - if ((ifp0 = if_get(sc->sc_sync_ifidx)) == NULL) - return (EINVAL); - error = 0; - if (ifr->ifr_mtu <= PFSYNC_MINPKT || - ifr->ifr_mtu > ifp0->if_mtu) { - error = EINVAL; - } - if_put(ifp0); - if (error) - return error; - if (ifr->ifr_mtu < ifp->if_mtu) - pfsync_sendout(); - ifp->if_mtu = ifr->ifr_mtu; - break; - case SIOCGETPFSYNC: - bzero(&pfsyncr, sizeof(pfsyncr)); - if ((ifp0 = if_get(sc->sc_sync_ifidx)) != NULL) { - strlcpy(pfsyncr.pfsyncr_syncdev, - ifp0->if_xname, IFNAMSIZ); - } - if_put(ifp0); - pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer; - pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates; - pfsyncr.pfsyncr_defer = sc->sc_defer; - return (copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr))); - - case SIOCSETPFSYNC: - if ((error = suser(p)) != 0) - return (error); - if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr)))) - return (error); - - if (pfsyncr.pfsyncr_syncpeer.s_addr == 0) - sc->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP; - else - sc->sc_sync_peer.s_addr = - pfsyncr.pfsyncr_syncpeer.s_addr; - - if (pfsyncr.pfsyncr_maxupdates > 255) - return (EINVAL); - sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates; - - sc->sc_defer = pfsyncr.pfsyncr_defer; - - if (pfsyncr.pfsyncr_syncdev[0] == 0) { - if ((ifp0 = if_get(sc->sc_sync_ifidx)) != NULL) { - if_linkstatehook_del(ifp0, &sc->sc_ltask); - if_detachhook_del(ifp0, &sc->sc_dtask); - } - if_put(ifp0); - sc->sc_sync_ifidx = 0; - if (imo->imo_num_memberships > 0) { - in_delmulti(imo->imo_membership[ - --imo->imo_num_memberships]); - imo->imo_ifidx = 0; - } - break; - } - - if ((sifp = if_unit(pfsyncr.pfsyncr_syncdev)) == NULL) - return (EINVAL); - - ifp0 = if_get(sc->sc_sync_ifidx); - - if (sifp->if_mtu < sc->sc_if.if_mtu || (ifp0 != NULL && - sifp->if_mtu < ifp0->if_mtu) || - sifp->if_mtu < MCLBYTES - sizeof(struct ip)) - pfsync_sendout(); - - if (ifp0) { - if_linkstatehook_del(ifp0, &sc->sc_ltask); - if_detachhook_del(ifp0, &sc->sc_dtask); - } - if_put(ifp0); - sc->sc_sync_ifidx = sifp->if_index; - - if (imo->imo_num_memberships > 0) { - in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); - imo->imo_ifidx = 0; - } - - if (sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) { - struct in_addr addr; - - if (!(sifp->if_flags & IFF_MULTICAST)) { - sc->sc_sync_ifidx = 0; - if_put(sifp); - return (EADDRNOTAVAIL); - } - - addr.s_addr = INADDR_PFSYNC_GROUP; - - if ((imo->imo_membership[0] = - in_addmulti(&addr, sifp)) == NULL) { - sc->sc_sync_ifidx = 0; - if_put(sifp); - return (ENOBUFS); - } - imo->imo_num_memberships++; - imo->imo_ifidx = sc->sc_sync_ifidx; - imo->imo_ttl = PFSYNC_DFLTTL; - imo->imo_loop = 0; + return (0); } - ip = &sc->sc_template; - bzero(ip, sizeof(*ip)); - ip->ip_v = IPVERSION; - ip->ip_hl = sizeof(sc->sc_template) >> 2; - ip->ip_tos = IPTOS_LOWDELAY; - /* len and id are set later */ - ip->ip_off = htons(IP_DF); - ip->ip_ttl = PFSYNC_DFLTTL; - ip->ip_p = IPPROTO_PFSYNC; - ip->ip_src.s_addr = INADDR_ANY; - ip->ip_dst.s_addr = sc->sc_sync_peer.s_addr; - - if_linkstatehook_add(sifp, &sc->sc_ltask); - if_detachhook_add(sifp, &sc->sc_dtask); - if_put(sifp); - - pfsync_request_full_update(sc); - - break; + if (!ISSET(st->state_flags, PFSTATE_NOSYNC)) { + pfsync_update_state_req(sc, st); - default: - return (ENOTTY); + pf_state_unref(st); } - return (0); -} - -void -pfsync_out_state(struct pf_state *st, void *buf) -{ - struct pfsync_state *sp = buf; - - pfsync_state_export(sp, st); -} - -void -pfsync_out_iack(struct pf_state *st, void *buf) -{ - struct pfsync_ins_ack *iack = buf; - - iack->id = st->id; - iack->creatorid = st->creatorid; + return (0); } -void -pfsync_out_upd_c(struct pf_state *st, void *buf) -{ - struct pfsync_upd_c *up = buf; - - bzero(up, sizeof(*up)); - up->id = st->id; - pf_state_peer_hton(&st->src, &up->src); - pf_state_peer_hton(&st->dst, &up->dst); - up->creatorid = st->creatorid; - up->timeout = st->timeout; -} -void -pfsync_out_del(struct pf_state *st, void *buf) +static int +pfsync_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, + struct rtentry *rt) { - struct pfsync_del_c *dp = buf; - - dp->id = st->id; - dp->creatorid = st->creatorid; - - SET(st->state_flags, PFSTATE_NOSYNC); + m_freem(m); + return (ENODEV); } -void -pfsync_grab_snapshot(struct pfsync_snapshot *sn, struct pfsync_softc *sc) +static void +pfsync_start(struct ifqueue *ifq) { - int q; - struct pf_state *st; - struct pfsync_upd_req_item *ur; - struct tdb *tdb; - - sn->sn_sc = sc; - - mtx_enter(&sc->sc_st_mtx); - mtx_enter(&sc->sc_upd_req_mtx); - mtx_enter(&sc->sc_tdb_mtx); - - for (q = 0; q < PFSYNC_S_COUNT; q++) { - TAILQ_INIT(&sn->sn_qs[q]); - - while ((st = TAILQ_FIRST(&sc->sc_qs[q])) != NULL) { - KASSERT(st->snapped == 0); - TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list); - TAILQ_INSERT_TAIL(&sn->sn_qs[q], st, sync_snap); - st->snapped = 1; - } - } - - TAILQ_INIT(&sn->sn_upd_req_list); - while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) { - TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry); - TAILQ_INSERT_TAIL(&sn->sn_upd_req_list, ur, ur_snap); - } - - TAILQ_INIT(&sn->sn_tdb_q); - while ((tdb = TAILQ_FIRST(&sc->sc_tdb_q)) != NULL) { - TAILQ_REMOVE(&sc->sc_tdb_q, tdb, tdb_sync_entry); - TAILQ_INSERT_TAIL(&sn->sn_tdb_q, tdb, tdb_sync_snap); - - mtx_enter(&tdb->tdb_mtx); - KASSERT(!ISSET(tdb->tdb_flags, TDBF_PFSYNC_SNAPPED)); - SET(tdb->tdb_flags, TDBF_PFSYNC_SNAPPED); - mtx_leave(&tdb->tdb_mtx); - } - - sn->sn_len = sc->sc_len; - sc->sc_len = PFSYNC_MINPKT; - - sn->sn_plus = sc->sc_plus; - sc->sc_plus = NULL; - sn->sn_pluslen = sc->sc_pluslen; - sc->sc_pluslen = 0; - - mtx_leave(&sc->sc_tdb_mtx); - mtx_leave(&sc->sc_upd_req_mtx); - mtx_leave(&sc->sc_st_mtx); + ifq_purge(ifq); } void -pfsync_drop_snapshot(struct pfsync_snapshot *sn) -{ - struct pf_state *st; - struct pfsync_upd_req_item *ur; - struct tdb *t; - int q; - - for (q = 0; q < PFSYNC_S_COUNT; q++) { - if (TAILQ_EMPTY(&sn->sn_qs[q])) - continue; - - while ((st = TAILQ_FIRST(&sn->sn_qs[q])) != NULL) { - KASSERT(st->sync_state == q); - KASSERT(st->snapped == 1); - TAILQ_REMOVE(&sn->sn_qs[q], st, sync_snap); - st->sync_state = PFSYNC_S_NONE; - st->snapped = 0; - pf_state_unref(st); - } - } - - while ((ur = TAILQ_FIRST(&sn->sn_upd_req_list)) != NULL) { - TAILQ_REMOVE(&sn->sn_upd_req_list, ur, ur_snap); - pool_put(&sn->sn_sc->sc_pool, ur); - } - - while ((t = TAILQ_FIRST(&sn->sn_tdb_q)) != NULL) { - TAILQ_REMOVE(&sn->sn_tdb_q, t, tdb_sync_snap); - mtx_enter(&t->tdb_mtx); - KASSERT(ISSET(t->tdb_flags, TDBF_PFSYNC_SNAPPED)); - CLR(t->tdb_flags, TDBF_PFSYNC_SNAPPED); - CLR(t->tdb_flags, TDBF_PFSYNC); - mtx_leave(&t->tdb_mtx); - } -} - -int -pfsync_is_snapshot_empty(struct pfsync_snapshot *sn) +pfsync_update_tdb(struct tdb *t, int output) { - int q; - - for (q = 0; q < PFSYNC_S_COUNT; q++) - if (!TAILQ_EMPTY(&sn->sn_qs[q])) - return (0); - - if (!TAILQ_EMPTY(&sn->sn_upd_req_list)) - return (0); - - if (!TAILQ_EMPTY(&sn->sn_tdb_q)) - return (0); - - return (sn->sn_plus == NULL); -} + struct pfsync_softc *sc; -void -pfsync_drop(struct pfsync_softc *sc) -{ - struct pfsync_snapshot sn; + sc = pfsync_get(); + if (sc == NULL) + return; - pfsync_grab_snapshot(&sn, sc); - pfsync_drop_snapshot(&sn); + printf("%s: %s\n", sc->sc_if.if_xname, __func__); + pfsync_put(sc); } void -pfsync_send_dispatch(void *xmq) +pfsync_delete_tdb(struct tdb *t) { - struct mbuf_queue *mq = xmq; struct pfsync_softc *sc; - struct mbuf *m; - struct mbuf_list ml; - int error; - mq_delist(mq, &ml); - if (ml_empty(&ml)) + sc = pfsync_get(); + if (sc == NULL) return; - NET_LOCK(); - sc = pfsyncif; - if (sc == NULL) { - ml_purge(&ml); - goto done; - } - - while ((m = ml_dequeue(&ml)) != NULL) { - if ((error = ip_output(m, NULL, NULL, IP_RAWOUTPUT, - &sc->sc_imo, NULL, 0)) == 0) - pfsyncstat_inc(pfsyncs_opackets); - else { - DPFPRINTF(LOG_DEBUG, - "ip_output() @ %s failed (%d)\n", __func__, error); - pfsyncstat_inc(pfsyncs_oerrors); - } - } -done: - NET_UNLOCK(); -} - -void -pfsync_send_pkt(struct mbuf *m) -{ - if (mq_enqueue(&pfsync_mq, m) != 0) { - pfsyncstat_inc(pfsyncs_oerrors); - DPFPRINTF(LOG_DEBUG, "mq_enqueue() @ %s failed, queue full\n", - __func__); - } else - task_add(net_tq(0), &pfsync_task); -} - -void -pfsync_sendout(void) -{ - struct pfsync_snapshot sn; - struct pfsync_softc *sc = pfsyncif; -#if NBPFILTER > 0 - struct ifnet *ifp = &sc->sc_if; -#endif - struct mbuf *m; - struct ip *ip; - struct pfsync_header *ph; - struct pfsync_subheader *subh; - struct pf_state *st; - struct pfsync_upd_req_item *ur; - struct tdb *t; - int offset; - int q, count = 0; - - if (sc == NULL || sc->sc_len == PFSYNC_MINPKT) - return; - - if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING) || -#if NBPFILTER > 0 - (ifp->if_bpf == NULL && sc->sc_sync_ifidx == 0)) { -#else - sc->sc_sync_ifidx == 0) { -#endif - pfsync_drop(sc); - return; - } - - pfsync_grab_snapshot(&sn, sc); - - /* - * Check below is sufficient to prevent us from sending empty packets, - * but it does not stop us from sending short packets. - */ - if (pfsync_is_snapshot_empty(&sn)) - return; - - MGETHDR(m, M_DONTWAIT, MT_DATA); - if (m == NULL) { - sc->sc_if.if_oerrors++; - pfsyncstat_inc(pfsyncs_onomem); - pfsync_drop_snapshot(&sn); - return; - } - - if (max_linkhdr + sn.sn_len > MHLEN) { - MCLGETL(m, M_DONTWAIT, max_linkhdr + sn.sn_len); - if (!ISSET(m->m_flags, M_EXT)) { - m_free(m); - sc->sc_if.if_oerrors++; - pfsyncstat_inc(pfsyncs_onomem); - pfsync_drop_snapshot(&sn); - return; - } - } - m->m_data += max_linkhdr; - m->m_len = m->m_pkthdr.len = sn.sn_len; - - /* build the ip header */ - ip = mtod(m, struct ip *); - bcopy(&sc->sc_template, ip, sizeof(*ip)); - offset = sizeof(*ip); - - ip->ip_len = htons(m->m_pkthdr.len); - ip->ip_id = htons(ip_randomid()); - - /* build the pfsync header */ - ph = (struct pfsync_header *)(m->m_data + offset); - bzero(ph, sizeof(*ph)); - offset += sizeof(*ph); - - ph->version = PFSYNC_VERSION; - ph->len = htons(sn.sn_len - sizeof(*ip)); - bcopy(pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH); - - if (!TAILQ_EMPTY(&sn.sn_upd_req_list)) { - subh = (struct pfsync_subheader *)(m->m_data + offset); - offset += sizeof(*subh); - - count = 0; - while ((ur = TAILQ_FIRST(&sn.sn_upd_req_list)) != NULL) { - TAILQ_REMOVE(&sn.sn_upd_req_list, ur, ur_snap); - - bcopy(&ur->ur_msg, m->m_data + offset, - sizeof(ur->ur_msg)); - offset += sizeof(ur->ur_msg); - - pool_put(&sc->sc_pool, ur); - - count++; - } - - bzero(subh, sizeof(*subh)); - subh->len = sizeof(ur->ur_msg) >> 2; - subh->action = PFSYNC_ACT_UPD_REQ; - subh->count = htons(count); - } - - /* has someone built a custom region for us to add? */ - if (sn.sn_plus != NULL) { - bcopy(sn.sn_plus, m->m_data + offset, sn.sn_pluslen); - offset += sn.sn_pluslen; - sn.sn_plus = NULL; /* XXX memory leak ? */ - } - - if (!TAILQ_EMPTY(&sn.sn_tdb_q)) { - subh = (struct pfsync_subheader *)(m->m_data + offset); - offset += sizeof(*subh); - - count = 0; - while ((t = TAILQ_FIRST(&sn.sn_tdb_q)) != NULL) { - TAILQ_REMOVE(&sn.sn_tdb_q, t, tdb_sync_snap); - pfsync_out_tdb(t, m->m_data + offset); - offset += sizeof(struct pfsync_tdb); - mtx_enter(&t->tdb_mtx); - KASSERT(ISSET(t->tdb_flags, TDBF_PFSYNC_SNAPPED)); - CLR(t->tdb_flags, TDBF_PFSYNC_SNAPPED); - CLR(t->tdb_flags, TDBF_PFSYNC); - mtx_leave(&t->tdb_mtx); - tdb_unref(t); - count++; - } - - bzero(subh, sizeof(*subh)); - subh->action = PFSYNC_ACT_TDB; - subh->len = sizeof(struct pfsync_tdb) >> 2; - subh->count = htons(count); - } - - /* walk the queues */ - for (q = 0; q < PFSYNC_S_COUNT; q++) { - if (TAILQ_EMPTY(&sn.sn_qs[q])) - continue; - - subh = (struct pfsync_subheader *)(m->m_data + offset); - offset += sizeof(*subh); - - count = 0; - while ((st = TAILQ_FIRST(&sn.sn_qs[q])) != NULL) { - TAILQ_REMOVE(&sn.sn_qs[q], st, sync_snap); - KASSERT(st->sync_state == q); - KASSERT(st->snapped == 1); - st->sync_state = PFSYNC_S_NONE; - st->snapped = 0; - pfsync_qs[q].write(st, m->m_data + offset); - offset += pfsync_qs[q].len; - - pf_state_unref(st); - count++; - } - - bzero(subh, sizeof(*subh)); - subh->action = pfsync_qs[q].action; - subh->len = pfsync_qs[q].len >> 2; - subh->count = htons(count); - } - - /* we're done, let's put it on the wire */ -#if NBPFILTER > 0 - if (ifp->if_bpf) { - m->m_data += sizeof(*ip); - m->m_len = m->m_pkthdr.len = sn.sn_len - sizeof(*ip); - bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT); - m->m_data -= sizeof(*ip); - m->m_len = m->m_pkthdr.len = sn.sn_len; - } - - if (sc->sc_sync_ifidx == 0) { - sc->sc_len = PFSYNC_MINPKT; - m_freem(m); - return; - } -#endif - - sc->sc_if.if_opackets++; - sc->sc_if.if_obytes += m->m_pkthdr.len; - - m->m_pkthdr.ph_rtableid = sc->sc_if.if_rdomain; - - pfsync_send_pkt(m); -} - -void -pfsync_insert_state(struct pf_state *st) -{ - struct pfsync_softc *sc = pfsyncif; - - NET_ASSERT_LOCKED(); - - if (ISSET(st->rule.ptr->rule_flag, PFRULE_NOSYNC) || - st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) { - SET(st->state_flags, PFSTATE_NOSYNC); - return; - } - - if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING) || - ISSET(st->state_flags, PFSTATE_NOSYNC)) - return; - - KASSERT(st->sync_state == PFSYNC_S_NONE); - - if (sc->sc_len == PFSYNC_MINPKT) - timeout_add_sec(&sc->sc_tmo, 1); - - pfsync_q_ins(st, PFSYNC_S_INS); - - st->sync_updates = 0; -} - -int -pfsync_defer(struct pf_state *st, struct mbuf *m, struct pfsync_deferral **ppd) -{ - struct pfsync_softc *sc = pfsyncif; - struct pfsync_deferral *pd; - unsigned int sched; - - NET_ASSERT_LOCKED(); - - if (!sc->sc_defer || - ISSET(st->state_flags, PFSTATE_NOSYNC) || - m->m_flags & (M_BCAST|M_MCAST)) - return (0); - - pd = pool_get(&sc->sc_pool, M_NOWAIT); - if (pd == NULL) - return (0); - - /* - * deferral queue grows faster, than timeout can consume, - * we have to ask packet (caller) to help timer and dispatch - * one deferral for us. - * - * We wish to call pfsync_undefer() here. Unfortunately we can't, - * because pfsync_undefer() will be calling to ip_output(), - * which in turn will call to pf_test(), which would then attempt - * to grab PF_LOCK() we currently hold. - */ - if (sc->sc_deferred >= 128) { - mtx_enter(&sc->sc_deferrals_mtx); - *ppd = TAILQ_FIRST(&sc->sc_deferrals); - if (*ppd != NULL) { - TAILQ_REMOVE(&sc->sc_deferrals, *ppd, pd_entry); - sc->sc_deferred--; - } - mtx_leave(&sc->sc_deferrals_mtx); - } else - *ppd = NULL; - - m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; - SET(st->state_flags, PFSTATE_ACK); - - pd->pd_st = pf_state_ref(st); - pd->pd_m = m; - - pd->pd_deadline = getnsecuptime() + PFSYNC_DEFER_NSEC; - - mtx_enter(&sc->sc_deferrals_mtx); - sched = TAILQ_EMPTY(&sc->sc_deferrals); - - TAILQ_INSERT_TAIL(&sc->sc_deferrals, pd, pd_entry); - sc->sc_deferred++; - mtx_leave(&sc->sc_deferrals_mtx); - - if (sched) - timeout_add_nsec(&sc->sc_deferrals_tmo, PFSYNC_DEFER_NSEC); - - schednetisr(NETISR_PFSYNC); - - return (1); -} - -void -pfsync_undefer_notify(struct pfsync_deferral *pd) -{ - struct pf_pdesc pdesc; - struct pf_state *st = pd->pd_st; - - /* - * pf_remove_state removes the state keys and sets st->timeout - * to PFTM_UNLINKED. this is done under NET_LOCK which should - * be held here, so we can use PFTM_UNLINKED as a test for - * whether the state keys are set for the address family - * lookup. - */ - - if (st->timeout == PFTM_UNLINKED) - return; - - if (st->rt == PF_ROUTETO) { - if (pf_setup_pdesc(&pdesc, st->key[PF_SK_WIRE]->af, - st->direction, st->kif, pd->pd_m, NULL) != PF_PASS) - return; - switch (st->key[PF_SK_WIRE]->af) { - case AF_INET: - pf_route(&pdesc, st); - break; -#ifdef INET6 - case AF_INET6: - pf_route6(&pdesc, st); - break; -#endif /* INET6 */ - default: - unhandled_af(st->key[PF_SK_WIRE]->af); - } - pd->pd_m = pdesc.m; - } else { - switch (st->key[PF_SK_WIRE]->af) { - case AF_INET: - ip_output(pd->pd_m, NULL, NULL, 0, NULL, NULL, 0); - break; -#ifdef INET6 - case AF_INET6: - ip6_output(pd->pd_m, NULL, NULL, 0, NULL, NULL); - break; -#endif /* INET6 */ - default: - unhandled_af(st->key[PF_SK_WIRE]->af); - } - - pd->pd_m = NULL; - } -} - -void -pfsync_free_deferral(struct pfsync_deferral *pd) -{ - struct pfsync_softc *sc = pfsyncif; - - pf_state_unref(pd->pd_st); - m_freem(pd->pd_m); - pool_put(&sc->sc_pool, pd); -} - -void -pfsync_undefer(struct pfsync_deferral *pd, int drop) -{ - struct pfsync_softc *sc = pfsyncif; - - NET_ASSERT_LOCKED(); - - if (sc == NULL) - return; - - CLR(pd->pd_st->state_flags, PFSTATE_ACK); - if (!drop) - pfsync_undefer_notify(pd); - - pfsync_free_deferral(pd); -} - -void -pfsync_deferrals_tmo(void *arg) -{ - struct pfsync_softc *sc = arg; - struct pfsync_deferral *pd; - uint64_t now, nsec = 0; - struct pfsync_deferrals pds = TAILQ_HEAD_INITIALIZER(pds); - - now = getnsecuptime(); - - mtx_enter(&sc->sc_deferrals_mtx); - for (;;) { - pd = TAILQ_FIRST(&sc->sc_deferrals); - if (pd == NULL) - break; - - if (now < pd->pd_deadline) { - nsec = pd->pd_deadline - now; - break; - } - - TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry); - sc->sc_deferred--; - TAILQ_INSERT_TAIL(&pds, pd, pd_entry); - } - mtx_leave(&sc->sc_deferrals_mtx); - - if (nsec > 0) { - /* we were looking at a pd, but it wasn't old enough */ - timeout_add_nsec(&sc->sc_deferrals_tmo, nsec); - } - - if (TAILQ_EMPTY(&pds)) - return; - - NET_LOCK(); - while ((pd = TAILQ_FIRST(&pds)) != NULL) { - TAILQ_REMOVE(&pds, pd, pd_entry); - - pfsync_undefer(pd, 0); - } - NET_UNLOCK(); -} - -void -pfsync_deferred(struct pf_state *st, int drop) -{ - struct pfsync_softc *sc = pfsyncif; - struct pfsync_deferral *pd; - - NET_ASSERT_LOCKED(); - - mtx_enter(&sc->sc_deferrals_mtx); - TAILQ_FOREACH(pd, &sc->sc_deferrals, pd_entry) { - if (pd->pd_st == st) { - TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry); - sc->sc_deferred--; - break; - } - } - mtx_leave(&sc->sc_deferrals_mtx); - - if (pd != NULL) - pfsync_undefer(pd, drop); -} - -void -pfsync_update_state(struct pf_state *st) -{ - struct pfsync_softc *sc = pfsyncif; - int sync = 0; - - NET_ASSERT_LOCKED(); - - if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) - return; - - if (ISSET(st->state_flags, PFSTATE_ACK)) - pfsync_deferred(st, 0); - if (ISSET(st->state_flags, PFSTATE_NOSYNC)) { - if (st->sync_state != PFSYNC_S_NONE) - pfsync_q_del(st); - return; - } - - if (sc->sc_len == PFSYNC_MINPKT) - timeout_add_sec(&sc->sc_tmo, 1); - - switch (st->sync_state) { - case PFSYNC_S_UPD_C: - case PFSYNC_S_UPD: - case PFSYNC_S_INS: - /* we're already handling it */ - - if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) { - st->sync_updates++; - if (st->sync_updates >= sc->sc_maxupdates) - sync = 1; - } - break; - - case PFSYNC_S_IACK: - pfsync_q_del(st); - case PFSYNC_S_NONE: - pfsync_q_ins(st, PFSYNC_S_UPD_C); - st->sync_updates = 0; - break; - - default: - panic("pfsync_update_state: unexpected sync state %d", - st->sync_state); - } - - if (sync || (getuptime() - st->pfsync_time) < 2) - schednetisr(NETISR_PFSYNC); -} - -void -pfsync_cancel_full_update(struct pfsync_softc *sc) -{ - if (timeout_pending(&sc->sc_bulkfail_tmo) || - timeout_pending(&sc->sc_bulk_tmo)) { -#if NCARP > 0 - if (!pfsync_sync_ok) - carp_group_demote_adj(&sc->sc_if, -1, - "pfsync bulk cancelled"); - if (sc->sc_initial_bulk) { - carp_group_demote_adj(&sc->sc_if, -32, - "pfsync init"); - sc->sc_initial_bulk = 0; - } -#endif - pfsync_sync_ok = 1; - DPFPRINTF(LOG_INFO, "cancelling bulk update"); - } - timeout_del(&sc->sc_bulkfail_tmo); - timeout_del(&sc->sc_bulk_tmo); - sc->sc_bulk_next = NULL; - sc->sc_bulk_last = NULL; - sc->sc_ureq_sent = 0; - sc->sc_bulk_tries = 0; -} - -void -pfsync_request_full_update(struct pfsync_softc *sc) -{ - if (sc->sc_sync_ifidx != 0 && ISSET(sc->sc_if.if_flags, IFF_RUNNING)) { - /* Request a full state table update. */ - sc->sc_ureq_sent = getuptime(); -#if NCARP > 0 - if (!sc->sc_link_demoted && pfsync_sync_ok) - carp_group_demote_adj(&sc->sc_if, 1, - "pfsync bulk start"); -#endif - pfsync_sync_ok = 0; - DPFPRINTF(LOG_INFO, "requesting bulk update"); - timeout_add(&sc->sc_bulkfail_tmo, 4 * hz + - pf_pool_limits[PF_LIMIT_STATES].limit / - ((sc->sc_if.if_mtu - PFSYNC_MINPKT) / - sizeof(struct pfsync_state))); - pfsync_request_update(0, 0); - } -} - -void -pfsync_request_update(u_int32_t creatorid, u_int64_t id) -{ - struct pfsync_softc *sc = pfsyncif; - struct pfsync_upd_req_item *item; - size_t nlen, sclen; - int retry; - - /* - * this code does nothing to prevent multiple update requests for the - * same state being generated. - */ - - item = pool_get(&sc->sc_pool, PR_NOWAIT); - if (item == NULL) { - /* XXX stats */ - return; - } - - item->ur_msg.id = id; - item->ur_msg.creatorid = creatorid; - - for (;;) { - mtx_enter(&sc->sc_upd_req_mtx); - - nlen = sizeof(struct pfsync_upd_req); - if (TAILQ_EMPTY(&sc->sc_upd_req_list)) - nlen += sizeof(struct pfsync_subheader); - - sclen = atomic_add_long_nv(&sc->sc_len, nlen); - retry = (sclen > sc->sc_if.if_mtu); - if (retry) - atomic_sub_long(&sc->sc_len, nlen); - else - TAILQ_INSERT_TAIL(&sc->sc_upd_req_list, item, ur_entry); - - mtx_leave(&sc->sc_upd_req_mtx); - - if (!retry) - break; - - pfsync_sendout(); - } - - schednetisr(NETISR_PFSYNC); -} - -void -pfsync_update_state_req(struct pf_state *st) -{ - struct pfsync_softc *sc = pfsyncif; - - if (sc == NULL) - panic("pfsync_update_state_req: nonexistent instance"); - - if (ISSET(st->state_flags, PFSTATE_NOSYNC)) { - if (st->sync_state != PFSYNC_S_NONE) - pfsync_q_del(st); - return; - } - - switch (st->sync_state) { - case PFSYNC_S_UPD_C: - case PFSYNC_S_IACK: - pfsync_q_del(st); - case PFSYNC_S_NONE: - pfsync_q_ins(st, PFSYNC_S_UPD); - schednetisr(NETISR_PFSYNC); - return; - - case PFSYNC_S_INS: - case PFSYNC_S_UPD: - case PFSYNC_S_DEL: - /* we're already handling it */ - return; - - default: - panic("pfsync_update_state_req: unexpected sync state %d", - st->sync_state); - } -} - -void -pfsync_delete_state(struct pf_state *st) -{ - struct pfsync_softc *sc = pfsyncif; - - NET_ASSERT_LOCKED(); - - if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) - return; - - if (ISSET(st->state_flags, PFSTATE_ACK)) - pfsync_deferred(st, 1); - if (ISSET(st->state_flags, PFSTATE_NOSYNC)) { - if (st->sync_state != PFSYNC_S_NONE) - pfsync_q_del(st); - return; - } - - if (sc->sc_len == PFSYNC_MINPKT) - timeout_add_sec(&sc->sc_tmo, 1); - - switch (st->sync_state) { - case PFSYNC_S_INS: - /* we never got to tell the world so just forget about it */ - pfsync_q_del(st); - return; - - case PFSYNC_S_UPD_C: - case PFSYNC_S_UPD: - case PFSYNC_S_IACK: - pfsync_q_del(st); - /* - * FALLTHROUGH to putting it on the del list - * Note on reference count bookkeeping: - * pfsync_q_del() drops reference for queue - * ownership. But the st entry survives, because - * our caller still holds a reference. - */ - - case PFSYNC_S_NONE: - /* - * We either fall through here, or there is no reference to - * st owned by pfsync queues at this point. - * - * Calling pfsync_q_ins() puts st to del queue. The pfsync_q_ins() - * grabs a reference for delete queue. - */ - pfsync_q_ins(st, PFSYNC_S_DEL); - return; - - default: - panic("pfsync_delete_state: unexpected sync state %d", - st->sync_state); - } -} - -void -pfsync_clear_states(u_int32_t creatorid, const char *ifname) -{ - struct pfsync_softc *sc = pfsyncif; - struct { - struct pfsync_subheader subh; - struct pfsync_clr clr; - } __packed r; - - NET_ASSERT_LOCKED(); - - if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) - return; - - bzero(&r, sizeof(r)); - - r.subh.action = PFSYNC_ACT_CLR; - r.subh.len = sizeof(struct pfsync_clr) >> 2; - r.subh.count = htons(1); - - strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname)); - r.clr.creatorid = creatorid; - - pfsync_send_plus(&r, sizeof(r)); -} - -void -pfsync_q_ins(struct pf_state *st, int q) -{ - struct pfsync_softc *sc = pfsyncif; - size_t nlen, sclen; - - if (sc->sc_len < PFSYNC_MINPKT) - panic("pfsync pkt len is too low %zd", sc->sc_len); - do { - mtx_enter(&sc->sc_st_mtx); - - /* - * There are either two threads trying to update the - * the same state, or the state is just being processed - * (is on snapshot queue). - */ - if (st->sync_state != PFSYNC_S_NONE) { - mtx_leave(&sc->sc_st_mtx); - break; - } - - nlen = pfsync_qs[q].len; - - if (TAILQ_EMPTY(&sc->sc_qs[q])) - nlen += sizeof(struct pfsync_subheader); - - sclen = atomic_add_long_nv(&sc->sc_len, nlen); - if (sclen > sc->sc_if.if_mtu) { - atomic_sub_long(&sc->sc_len, nlen); - mtx_leave(&sc->sc_st_mtx); - pfsync_sendout(); - continue; - } - - pf_state_ref(st); - - TAILQ_INSERT_TAIL(&sc->sc_qs[q], st, sync_list); - st->sync_state = q; - mtx_leave(&sc->sc_st_mtx); - } while (0); -} - -void -pfsync_q_del(struct pf_state *st) -{ - struct pfsync_softc *sc = pfsyncif; - int q; - - KASSERT(st->sync_state != PFSYNC_S_NONE); - - mtx_enter(&sc->sc_st_mtx); - q = st->sync_state; - /* - * re-check under mutex - * if state is snapped already, then just bail out, because we came - * too late, the state is being just processed/dispatched to peer. - */ - if ((q == PFSYNC_S_NONE) || (st->snapped)) { - mtx_leave(&sc->sc_st_mtx); - return; - } - atomic_sub_long(&sc->sc_len, pfsync_qs[q].len); - TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list); - if (TAILQ_EMPTY(&sc->sc_qs[q])) - atomic_sub_long(&sc->sc_len, sizeof (struct pfsync_subheader)); - st->sync_state = PFSYNC_S_NONE; - mtx_leave(&sc->sc_st_mtx); - - pf_state_unref(st); -} - -void -pfsync_update_tdb(struct tdb *t, int output) -{ - struct pfsync_softc *sc = pfsyncif; - size_t nlen, sclen; - - if (sc == NULL) - return; - - if (!ISSET(t->tdb_flags, TDBF_PFSYNC)) { - do { - mtx_enter(&sc->sc_tdb_mtx); - nlen = sizeof(struct pfsync_tdb); - - mtx_enter(&t->tdb_mtx); - if (ISSET(t->tdb_flags, TDBF_PFSYNC)) { - /* we've lost race, no action for us then */ - mtx_leave(&t->tdb_mtx); - mtx_leave(&sc->sc_tdb_mtx); - break; - } - - if (TAILQ_EMPTY(&sc->sc_tdb_q)) - nlen += sizeof(struct pfsync_subheader); - - sclen = atomic_add_long_nv(&sc->sc_len, nlen); - if (sclen > sc->sc_if.if_mtu) { - atomic_sub_long(&sc->sc_len, nlen); - mtx_leave(&t->tdb_mtx); - mtx_leave(&sc->sc_tdb_mtx); - pfsync_sendout(); - continue; - } - - TAILQ_INSERT_TAIL(&sc->sc_tdb_q, t, tdb_sync_entry); - tdb_ref(t); - SET(t->tdb_flags, TDBF_PFSYNC); - mtx_leave(&t->tdb_mtx); - - mtx_leave(&sc->sc_tdb_mtx); - t->tdb_updates = 0; - } while (0); - } else { - if (++t->tdb_updates >= sc->sc_maxupdates) - schednetisr(NETISR_PFSYNC); - } - - mtx_enter(&t->tdb_mtx); - if (output) - SET(t->tdb_flags, TDBF_PFSYNC_RPL); - else - CLR(t->tdb_flags, TDBF_PFSYNC_RPL); - mtx_leave(&t->tdb_mtx); -} - -void -pfsync_delete_tdb(struct tdb *t) -{ - struct pfsync_softc *sc = pfsyncif; - size_t nlen; - - if (sc == NULL || !ISSET(t->tdb_flags, TDBF_PFSYNC)) - return; - - mtx_enter(&sc->sc_tdb_mtx); - - /* - * if tdb entry is just being processed (found in snapshot), - * then it can not be deleted. we just came too late - */ - if (ISSET(t->tdb_flags, TDBF_PFSYNC_SNAPPED)) { - mtx_leave(&sc->sc_tdb_mtx); - return; - } - - TAILQ_REMOVE(&sc->sc_tdb_q, t, tdb_sync_entry); - - mtx_enter(&t->tdb_mtx); - CLR(t->tdb_flags, TDBF_PFSYNC); - mtx_leave(&t->tdb_mtx); - - nlen = sizeof(struct pfsync_tdb); - if (TAILQ_EMPTY(&sc->sc_tdb_q)) - nlen += sizeof(struct pfsync_subheader); - atomic_sub_long(&sc->sc_len, nlen); - - mtx_leave(&sc->sc_tdb_mtx); - - tdb_unref(t); -} - -void -pfsync_out_tdb(struct tdb *t, void *buf) -{ - struct pfsync_tdb *ut = buf; - - bzero(ut, sizeof(*ut)); - ut->spi = t->tdb_spi; - bcopy(&t->tdb_dst, &ut->dst, sizeof(ut->dst)); - /* - * When a failover happens, the master's rpl is probably above - * what we see here (we may be up to a second late), so - * increase it a bit for outbound tdbs to manage most such - * situations. - * - * For now, just add an offset that is likely to be larger - * than the number of packets we can see in one second. The RFC - * just says the next packet must have a higher seq value. - * - * XXX What is a good algorithm for this? We could use - * a rate-determined increase, but to know it, we would have - * to extend struct tdb. - * XXX pt->rpl can wrap over MAXINT, but if so the real tdb - * will soon be replaced anyway. For now, just don't handle - * this edge case. - */ -#define RPL_INCR 16384 - ut->rpl = htobe64(t->tdb_rpl + (ISSET(t->tdb_flags, TDBF_PFSYNC_RPL) ? - RPL_INCR : 0)); - ut->cur_bytes = htobe64(t->tdb_cur_bytes); - ut->sproto = t->tdb_sproto; - ut->rdomain = htons(t->tdb_rdomain); -} - -void -pfsync_bulk_start(void) -{ - struct pfsync_softc *sc = pfsyncif; - - NET_ASSERT_LOCKED(); - - /* - * pf gc via pfsync_state_in_use reads sc_bulk_next and - * sc_bulk_last while exclusively holding the pf_state_list - * rwlock. make sure it can't race with us setting these - * pointers. they basically act as hazards, and borrow the - * lists state reference count. - */ - rw_enter_read(&pf_state_list.pfs_rwl); - - /* get a consistent view of the list pointers */ - mtx_enter(&pf_state_list.pfs_mtx); - if (sc->sc_bulk_next == NULL) - sc->sc_bulk_next = TAILQ_FIRST(&pf_state_list.pfs_list); - - sc->sc_bulk_last = TAILQ_LAST(&pf_state_list.pfs_list, pf_state_queue); - mtx_leave(&pf_state_list.pfs_mtx); - - rw_exit_read(&pf_state_list.pfs_rwl); - - DPFPRINTF(LOG_INFO, "received bulk update request"); - - if (sc->sc_bulk_last == NULL) - pfsync_bulk_status(PFSYNC_BUS_END); - else { - sc->sc_ureq_received = getuptime(); - - pfsync_bulk_status(PFSYNC_BUS_START); - timeout_add(&sc->sc_bulk_tmo, 0); - } -} - -void -pfsync_bulk_update(void *arg) -{ - struct pfsync_softc *sc; - struct pf_state *st; - int i = 0; - - NET_LOCK(); - sc = pfsyncif; - if (sc == NULL) - goto out; - - rw_enter_read(&pf_state_list.pfs_rwl); - st = sc->sc_bulk_next; - sc->sc_bulk_next = NULL; - - for (;;) { - if (st->sync_state == PFSYNC_S_NONE && - st->timeout < PFTM_MAX && - st->pfsync_time <= sc->sc_ureq_received) { - pfsync_update_state_req(st); - i++; - } - - st = TAILQ_NEXT(st, entry_list); - if ((st == NULL) || (st == sc->sc_bulk_last)) { - /* we're done */ - sc->sc_bulk_last = NULL; - pfsync_bulk_status(PFSYNC_BUS_END); - break; - } - - if (i > 1 && (sc->sc_if.if_mtu - sc->sc_len) < - sizeof(struct pfsync_state)) { - /* we've filled a packet */ - sc->sc_bulk_next = st; - timeout_add(&sc->sc_bulk_tmo, 1); - break; - } - } - - rw_exit_read(&pf_state_list.pfs_rwl); - out: - NET_UNLOCK(); -} - -void -pfsync_bulk_status(u_int8_t status) -{ - struct { - struct pfsync_subheader subh; - struct pfsync_bus bus; - } __packed r; - - struct pfsync_softc *sc = pfsyncif; - - bzero(&r, sizeof(r)); - - r.subh.action = PFSYNC_ACT_BUS; - r.subh.len = sizeof(struct pfsync_bus) >> 2; - r.subh.count = htons(1); - - r.bus.creatorid = pf_status.hostid; - r.bus.endtime = htonl(getuptime() - sc->sc_ureq_received); - r.bus.status = status; - - pfsync_send_plus(&r, sizeof(r)); -} - -void -pfsync_bulk_fail(void *arg) -{ - struct pfsync_softc *sc; - - NET_LOCK(); - sc = pfsyncif; - if (sc == NULL) - goto out; - if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) { - /* Try again */ - timeout_add_sec(&sc->sc_bulkfail_tmo, 5); - pfsync_request_update(0, 0); - } else { - /* Pretend like the transfer was ok */ - sc->sc_ureq_sent = 0; - sc->sc_bulk_tries = 0; -#if NCARP > 0 - if (!pfsync_sync_ok) - carp_group_demote_adj(&sc->sc_if, -1, - sc->sc_link_demoted ? - "pfsync link state up" : - "pfsync bulk fail"); - if (sc->sc_initial_bulk) { - carp_group_demote_adj(&sc->sc_if, -32, - "pfsync init"); - sc->sc_initial_bulk = 0; - } -#endif - pfsync_sync_ok = 1; - sc->sc_link_demoted = 0; - DPFPRINTF(LOG_ERR, "failed to receive bulk update"); - } - out: - NET_UNLOCK(); -} - -void -pfsync_send_plus(void *plus, size_t pluslen) -{ - struct pfsync_softc *sc = pfsyncif; - - if (sc->sc_len + pluslen > sc->sc_if.if_mtu) - pfsync_sendout(); - - sc->sc_plus = plus; - sc->sc_pluslen = pluslen; - atomic_add_long(&sc->sc_len, pluslen); - - pfsync_sendout(); -} - -int -pfsync_up(void) -{ - struct pfsync_softc *sc = pfsyncif; - - if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) - return (0); - - return (1); -} - -int -pfsync_state_in_use(struct pf_state *st) -{ - struct pfsync_softc *sc = pfsyncif; - - if (sc == NULL) - return (0); - - rw_assert_wrlock(&pf_state_list.pfs_rwl); - - if (st->sync_state != PFSYNC_S_NONE || - st == sc->sc_bulk_next || - st == sc->sc_bulk_last) - return (1); - - return (0); -} - -void -pfsync_timeout(void *arg) -{ - NET_LOCK(); - pfsync_sendout(); - NET_UNLOCK(); -} - -/* this is a softnet/netisr handler */ -void -pfsyncintr(void) -{ - pfsync_sendout(); + printf("%s: %s\n", sc->sc_if.if_xname, __func__); + pfsync_put(sc); } int Index: if_pfsync.h =================================================================== RCS file: /cvs/src/sys/net/if_pfsync.h,v retrieving revision 1.57 diff -u -p -r1.57 if_pfsync.h --- if_pfsync.h 7 Jul 2021 18:38:25 -0000 1.57 +++ if_pfsync.h 5 Nov 2022 10:37:51 -0000 @@ -113,7 +113,7 @@ struct pfsync_header { u_int8_t _pad; u_int16_t len; /* in bytes */ u_int8_t pfcksum[PF_MD5_DIGEST_LENGTH]; -} __packed; +} __packed __aligned(4); /* * Frame region subheader @@ -123,7 +123,7 @@ struct pfsync_subheader { u_int8_t action; u_int8_t len; /* in dwords */ u_int16_t count; -} __packed; +} __packed __aligned(4); /* * CLR @@ -132,7 +132,7 @@ struct pfsync_subheader { struct pfsync_clr { char ifname[IFNAMSIZ]; u_int32_t creatorid; -} __packed; +} __packed __aligned(4); /* * OINS, OUPD @@ -153,7 +153,7 @@ struct pfsync_clr { struct pfsync_ins_ack { u_int64_t id; u_int32_t creatorid; -} __packed; +} __packed __aligned(4); /* * UPD_C @@ -168,7 +168,7 @@ struct pfsync_upd_c { u_int8_t timeout; u_int8_t state_flags; u_int8_t _pad[2]; -} __packed; +} __packed __aligned(4); /* * UPD_REQ @@ -177,7 +177,7 @@ struct pfsync_upd_c { struct pfsync_upd_req { u_int64_t id; u_int32_t creatorid; -} __packed; +} __packed __aligned(4); /* * DEL_C @@ -186,7 +186,7 @@ struct pfsync_upd_req { struct pfsync_del_c { u_int64_t id; u_int32_t creatorid; -} __packed; +} __packed __aligned(4); /* * INS_F, DEL_F @@ -205,7 +205,7 @@ struct pfsync_bus { #define PFSYNC_BUS_START 1 #define PFSYNC_BUS_END 2 u_int8_t _pad[3]; -} __packed; +} __packed __aligned(4); /* * TDB @@ -218,7 +218,7 @@ struct pfsync_tdb { u_int8_t sproto; u_int8_t updates; u_int16_t rdomain; -} __packed; +} __packed __aligned(4); /* * EOF @@ -341,8 +341,19 @@ int pfsync_defer(struct pf_state *, st struct pfsync_deferral **); void pfsync_undefer(struct pfsync_deferral *, int); -int pfsync_up(void); int pfsync_state_in_use(struct pf_state *); + +void pfsync_iack(struct pf_state *); + +struct pfsync_softc; +extern struct pfsync_softc *pfsync_if; + +static inline int +pfsync_is_up(void) +{ + return (pfsync_if != NULL); +} + #endif /* _KERNEL */ #endif /* _NET_IF_PFSYNC_H_ */ Index: pf.c =================================================================== RCS file: /cvs/src/sys/net/pf.c,v retrieving revision 1.1136 diff -u -p -r1.1136 pf.c --- pf.c 20 Jul 2022 09:33:11 -0000 1.1136 +++ pf.c 5 Nov 2022 10:37:51 -0000 @@ -1259,6 +1259,223 @@ pf_state_export(struct pfsync_state *sp, sp->set_prio[1] = st->set_prio[1]; } +int +pf_state_alloc_scrub_memory(const struct pfsync_state_peer *s, + struct pf_state_peer *d) +{ + if (s->scrub.scrub_flag && d->scrub == NULL) + return (pf_normalize_tcp_alloc(d)); + + return (0); +} + +int +pf_state_import(const struct pfsync_state *sp, int flags) +{ + struct pf_state *st = NULL; + struct pf_state_key *skw = NULL, *sks = NULL; + struct pf_rule *r = NULL; + struct pfi_kif *kif; + int pool_flags; + int error = ENOMEM; + int n = 0; + + if (sp->creatorid == 0) { + DPFPRINTF(LOG_NOTICE, "%s: invalid creator id: %08x", __func__, + ntohl(sp->creatorid)); + return (EINVAL); + } + + if ((kif = pfi_kif_get(sp->ifname, NULL)) == NULL) { + DPFPRINTF(LOG_NOTICE, "%s: unknown interface: %s", __func__, + sp->ifname); + if (flags & PFSYNC_SI_IOCTL) + return (EINVAL); + return (0); /* skip this state */ + } + + if (sp->af == 0) + return (0); /* skip this state */ + + /* + * If the ruleset checksums match or the state is coming from the ioctl, + * it's safe to associate the state with the rule of that number. + */ + if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) && + (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && + ntohl(sp->rule) < pf_main_ruleset.rules.active.rcount) { + TAILQ_FOREACH(r, pf_main_ruleset.rules.active.ptr, entries) + if (ntohl(sp->rule) == n++) + break; + } else + r = &pf_default_rule; + + if ((r->max_states && r->states_cur >= r->max_states)) + goto cleanup; + + if (flags & PFSYNC_SI_IOCTL) + pool_flags = PR_WAITOK | PR_LIMITFAIL | PR_ZERO; + else + pool_flags = PR_NOWAIT | PR_LIMITFAIL | PR_ZERO; + + if ((st = pool_get(&pf_state_pl, pool_flags)) == NULL) + goto cleanup; + + if ((skw = pf_alloc_state_key(pool_flags)) == NULL) + goto cleanup; + + if ((sp->key[PF_SK_WIRE].af && + (sp->key[PF_SK_WIRE].af != sp->key[PF_SK_STACK].af)) || + PF_ANEQ(&sp->key[PF_SK_WIRE].addr[0], + &sp->key[PF_SK_STACK].addr[0], sp->af) || + PF_ANEQ(&sp->key[PF_SK_WIRE].addr[1], + &sp->key[PF_SK_STACK].addr[1], sp->af) || + sp->key[PF_SK_WIRE].port[0] != sp->key[PF_SK_STACK].port[0] || + sp->key[PF_SK_WIRE].port[1] != sp->key[PF_SK_STACK].port[1] || + sp->key[PF_SK_WIRE].rdomain != sp->key[PF_SK_STACK].rdomain) { + if ((sks = pf_alloc_state_key(pool_flags)) == NULL) + goto cleanup; + } else + sks = skw; + + /* allocate memory for scrub info */ + if (pf_state_alloc_scrub_memory(&sp->src, &st->src) || + pf_state_alloc_scrub_memory(&sp->dst, &st->dst)) + goto cleanup; + + /* copy to state key(s) */ + skw->addr[0] = sp->key[PF_SK_WIRE].addr[0]; + skw->addr[1] = sp->key[PF_SK_WIRE].addr[1]; + skw->port[0] = sp->key[PF_SK_WIRE].port[0]; + skw->port[1] = sp->key[PF_SK_WIRE].port[1]; + skw->rdomain = ntohs(sp->key[PF_SK_WIRE].rdomain); + PF_REF_INIT(skw->refcnt); + skw->proto = sp->proto; + if (!(skw->af = sp->key[PF_SK_WIRE].af)) + skw->af = sp->af; + if (sks != skw) { + sks->addr[0] = sp->key[PF_SK_STACK].addr[0]; + sks->addr[1] = sp->key[PF_SK_STACK].addr[1]; + sks->port[0] = sp->key[PF_SK_STACK].port[0]; + sks->port[1] = sp->key[PF_SK_STACK].port[1]; + sks->rdomain = ntohs(sp->key[PF_SK_STACK].rdomain); + PF_REF_INIT(sks->refcnt); + if (!(sks->af = sp->key[PF_SK_STACK].af)) + sks->af = sp->af; + if (sks->af != skw->af) { + switch (sp->proto) { + case IPPROTO_ICMP: + sks->proto = IPPROTO_ICMPV6; + break; + case IPPROTO_ICMPV6: + sks->proto = IPPROTO_ICMP; + break; + default: + sks->proto = sp->proto; + } + } else + sks->proto = sp->proto; + + if (((sks->af != AF_INET) && (sks->af != AF_INET6)) || + ((skw->af != AF_INET) && (skw->af != AF_INET6))) { + error = EINVAL; + goto cleanup; + } + + } else if ((sks->af != AF_INET) && (sks->af != AF_INET6)) { + error = EINVAL; + goto cleanup; + } + st->rtableid[PF_SK_WIRE] = ntohl(sp->rtableid[PF_SK_WIRE]); + st->rtableid[PF_SK_STACK] = ntohl(sp->rtableid[PF_SK_STACK]); + + /* copy to state */ + st->rt_addr = sp->rt_addr; + st->rt = sp->rt; + st->creation = getuptime() - ntohl(sp->creation); + st->expire = getuptime(); + if (ntohl(sp->expire)) { + u_int32_t timeout; + + timeout = r->timeout[sp->timeout]; + if (!timeout) + timeout = pf_default_rule.timeout[sp->timeout]; + + /* sp->expire may have been adaptively scaled by export. */ + st->expire -= timeout - ntohl(sp->expire); + } + + st->direction = sp->direction; + st->log = sp->log; + st->timeout = sp->timeout; + st->state_flags = ntohs(sp->state_flags); + st->max_mss = ntohs(sp->max_mss); + st->min_ttl = sp->min_ttl; + st->set_tos = sp->set_tos; + st->set_prio[0] = sp->set_prio[0]; + st->set_prio[1] = sp->set_prio[1]; + + st->id = sp->id; + st->creatorid = sp->creatorid; + pf_state_peer_ntoh(&sp->src, &st->src); + pf_state_peer_ntoh(&sp->dst, &st->dst); + + st->rule.ptr = r; + st->anchor.ptr = NULL; + + st->pfsync_time = getuptime(); + st->sync_state = PFSYNC_S_NONE; + + refcnt_init(&st->refcnt); + + /* XXX when we have anchors, use STATE_INC_COUNTERS */ + r->states_cur++; + r->states_tot++; + + if (!ISSET(flags, PFSYNC_SI_IOCTL)) + SET(st->state_flags, PFSTATE_NOSYNC); + + /* + * We just set PFSTATE_NOSYNC bit, which prevents + * pfsync_insert_state() to insert state to pfsync. + */ + if (pf_state_insert(kif, &skw, &sks, st) != 0) { + /* XXX when we have anchors, use STATE_DEC_COUNTERS */ + r->states_cur--; + error = EEXIST; + goto cleanup_state; + } + + if (!ISSET(flags, PFSYNC_SI_IOCTL)) { + CLR(st->state_flags, PFSTATE_NOSYNC); +#if NPFSYNC > 0 + if (ISSET(st->state_flags, PFSTATE_ACK)) + pfsync_iack(st); +#endif + } + CLR(st->state_flags, PFSTATE_ACK); + + return (0); + + cleanup: + if (skw == sks) + sks = NULL; + if (skw != NULL) + pool_put(&pf_state_key_pl, skw); + if (sks != NULL) + pool_put(&pf_state_key_pl, sks); + + cleanup_state: /* pf_state_insert frees the state keys */ + if (st) { + if (st->dst.scrub) + pool_put(&pf_state_scrub_pl, st->dst.scrub); + if (st->src.scrub) + pool_put(&pf_state_scrub_pl, st->src.scrub); + pool_put(&pf_state_pl, st); + } + return (error); +} + /* END state table stuff */ void @@ -4038,7 +4255,7 @@ pf_test_rule(struct pf_pdesc *pd, struct #if NPFSYNC > 0 if (*sm != NULL && !ISSET((*sm)->state_flags, PFSTATE_NOSYNC) && - pd->dir == PF_OUT && pfsync_up()) { + pd->dir == PF_OUT && pfsync_is_up()) { /* * We want the state created, but we dont * want to send this in case a partner Index: pf_norm.c =================================================================== RCS file: /cvs/src/sys/net/pf_norm.c,v retrieving revision 1.223 diff -u -p -r1.223 pf_norm.c --- pf_norm.c 10 Mar 2021 10:21:48 -0000 1.223 +++ pf_norm.c 5 Nov 2022 10:37:51 -0000 @@ -1160,6 +1160,16 @@ tcp_drop: } int +pf_normalize_tcp_alloc(struct pf_state_peer *src) +{ + src->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT | PR_ZERO); + if (src->scrub == NULL) + return (ENOMEM); + + return (0); +} + +int pf_normalize_tcp_init(struct pf_pdesc *pd, struct pf_state_peer *src) { struct tcphdr *th = &pd->hdr.tcp; @@ -1170,10 +1180,8 @@ pf_normalize_tcp_init(struct pf_pdesc *p KASSERT(src->scrub == NULL); - src->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT); - if (src->scrub == NULL) + if (pf_normalize_tcp_alloc(src) != 0) return (1); - memset(src->scrub, 0, sizeof(*src->scrub)); switch (pd->af) { case AF_INET: { @@ -1206,7 +1214,7 @@ pf_normalize_tcp_init(struct pf_pdesc *p opt = opts; while ((opt = pf_find_tcpopt(opt, opts, olen, - TCPOPT_TIMESTAMP, TCPOLEN_TIMESTAMP)) != NULL) { + TCPOPT_TIMESTAMP, TCPOLEN_TIMESTAMP)) != NULL) { src->scrub->pfss_flags |= PFSS_TIMESTAMP; src->scrub->pfss_ts_mod = arc4random(); Index: pfvar.h =================================================================== RCS file: /cvs/src/sys/net/pfvar.h,v retrieving revision 1.509 diff -u -p -r1.509 pfvar.h --- pfvar.h 20 Jul 2022 09:33:11 -0000 1.509 +++ pfvar.h 5 Nov 2022 10:37:51 -0000 @@ -748,12 +748,14 @@ struct pf_state { u_int32_t creatorid; u_int8_t direction; u_int8_t pad[3]; + RB_ENTRY(pf_state) entry_id; TAILQ_ENTRY(pf_state) sync_list; - TAILQ_ENTRY(pf_state) sync_snap; + u_int8_t sync_state; /* PFSYNC_S_x */ + u_int8_t sync_updates; + TAILQ_ENTRY(pf_state) entry_list; SLIST_ENTRY(pf_state) gc_list; - RB_ENTRY(pf_state) entry_id; struct pf_state_peer src; struct pf_state_peer dst; struct pf_rule_slist match_rules; @@ -770,6 +772,7 @@ struct pf_state { int32_t expire; int32_t pfsync_time; int rtableid[2]; /* rtables stack and wire */ + pf_refcnt_t refcnt; u_int16_t qid; u_int16_t pqid; u_int16_t tag; @@ -786,20 +789,19 @@ struct pf_state { #define PFSTATE_SETPRIO 0x0200 #define PFSTATE_SCRUBMASK (PFSTATE_NODF|PFSTATE_RANDOMID|PFSTATE_SCRUB_TCP) #define PFSTATE_SETMASK (PFSTATE_SETTOS|PFSTATE_SETPRIO) + u_int8_t log; u_int8_t timeout; - u_int8_t sync_state; /* PFSYNC_S_x */ - u_int8_t sync_updates; + u_int8_t rt; + u_int8_t min_ttl; u_int8_t set_tos; u_int8_t set_prio[2]; - u_int16_t max_mss; + u_int16_t if_index_in; u_int16_t if_index_out; - pf_refcnt_t refcnt; + u_int16_t max_mss; u_int16_t delay; - u_int8_t rt; - u_int8_t snapped; }; /* @@ -1740,6 +1742,11 @@ extern struct pf_state *pf_find_state_a u_int, int *); extern void pf_state_export(struct pfsync_state *, struct pf_state *); +int pf_state_import(const struct pfsync_state *, + int); +int pf_state_alloc_scrub_memory( + const struct pfsync_state_peer *, + struct pf_state_peer *); extern void pf_print_state(struct pf_state *); extern void pf_print_flags(u_int8_t); extern void pf_addrcpy(struct pf_addr *, struct pf_addr *, @@ -1788,6 +1795,7 @@ int pf_normalize_ip6(struct pf_pdesc *, int pf_normalize_tcp(struct pf_pdesc *); void pf_normalize_tcp_cleanup(struct pf_state *); int pf_normalize_tcp_init(struct pf_pdesc *, struct pf_state_peer *); +int pf_normalize_tcp_alloc(struct pf_state_peer *); int pf_normalize_tcp_stateful(struct pf_pdesc *, u_short *, struct pf_state *, struct pf_state_peer *, struct pf_state_peer *, int *);