Index: regress/sys/net/pf_forward/Makefile =================================================================== RCS file: /cvs/src/regress/sys/net/pf_forward/Makefile,v diff -u -p -r1.35 Makefile --- regress/sys/net/pf_forward/Makefile 1 Feb 2021 12:52:07 -0000 1.35 +++ regress/sys/net/pf_forward/Makefile 21 Aug 2024 05:11:22 -0000 @@ -55,14 +55,14 @@ regress: # # Run make check-setup to see if you got the setup correct. -SRC_IF ?= tap0 +SRC_IF ?= etherip0 SRC_MAC ?= fe:e1:ba:d1:0a:dc -PF_IFIN ?= vio0 -PF_IFOUT ?= vio1 -PF_MAC ?= 52:54:00:12:34:50 -PF_SSH ?= -RT_SSH ?= -ECO_SSH ?= +PF_IFIN ?= etherip0 +PF_IFOUT ?= etherip1 +PF_MAC ?= fe:e1:ba:d2:2c:58 +PF_SSH ?= 130.102.96.41 +RT_SSH ?= 130.102.96.42 +ECO_SSH ?= 130.102.96.43 SRC_OUT ?= 10.188.210.10 PF_IN ?= 10.188.210.50 @@ -273,17 +273,18 @@ TRACEROUTE_CHECK = awk \ END{ if (n!=3) { print "hopcount is not 3: "n; exit 1 } } \ END{ if (x!=0) { print "unanswered probes: "x; exit 1 } }' -.for ip in ECO_IN ECO_OUT RDR_IN RDR_OUT AF_IN RTT_IN RTT_OUT RPT_IN RPT_OUT +#.for ip in ECO_IN ECO_OUT RDR_IN RDR_OUT AF_IN RTT_IN RTT_OUT RPT_IN RPT_OUT +.for ip in ECO_IN ECO_OUT RDR_IN RDR_OUT RTT_IN RTT_OUT RPT_IN RPT_OUT .for proto in icmp udp REGRESS_TARGETS += run-traceroute-${proto}-${inet}-${ip} run-traceroute-${proto}-${inet}-${ip}: stamp-pfctl @echo Check traceroute ${proto} ${ip${inet:S/inet//}}: .if "RPT_IN" == ${ip} || "RPT_OUT" == ${ip} - traceroute${inet:S/inet//} ${proto:S/icmp/-I/:S/udp//}\ + traceroute${inet:S/inet//} -m 8 ${proto:S/icmp/-I/:S/udp//}\ -s ${${ip}${inet:S/inet//}} ${ECO_IN${inet:S/inet//}} |\ ${TRACEROUTE_CHECK} .else - traceroute${inet:S/inet//} ${proto:S/icmp/-I/:S/udp//}\ + traceroute${inet:S/inet//} -m 8 ${proto:S/icmp/-I/:S/udp//}\ ${${ip}${inet:S/inet//}} | ${TRACEROUTE_CHECK} .endif .endfor # proto @@ -342,8 +343,8 @@ check-setup-pf: fgrep -q 'gateway: ${RT_IN}' # ${ip} RT_IN .endfor .for ip in RTT_IN RTT_OUT RPT_IN RPT_OUT - ssh ${PF_SSH} route -n get -inet ${${ip}} | grep -q 'flags: .*REJECT' \ - # ${ip} reject + ssh ${PF_SSH} route -n get -inet ${${ip}} |\ + grep -q 'interface: .*lo' # ${ip} reject .endfor ssh ${PF_SSH} ping6 -n -c 1 ${PF_IN6} # PF_IN6 ssh ${PF_SSH} route -n get -inet6 ${PF_IN6} | grep -q 'flags: .*LOCAL' \ @@ -359,7 +360,7 @@ check-setup-pf: .endfor .for ip in RTT_IN RTT_OUT RPT_IN RPT_OUT ssh ${PF_SSH} route -n get -inet6 ${${ip}6} |\ - grep -q 'flags: .*REJECT' # ${ip}6 reject + grep -q 'interface: .*lo' # ${ip}6 reject .endfor ssh ${PF_SSH} ${SUDO} pfctl -sr | grep '^anchor "regress" all$$' ssh ${PF_SSH} ${SUDO} pfctl -si | grep '^Status: Enabled ' Index: sys/net/if.c =================================================================== RCS file: /cvs/src/sys/net/if.c,v diff -u -p -r1.720 if.c --- sys/net/if.c 14 Jul 2024 18:53:39 -0000 1.720 +++ sys/net/if.c 21 Aug 2024 05:11:32 -0000 @@ -66,7 +66,6 @@ #include "carp.h" #include "ether.h" #include "pf.h" -#include "pfsync.h" #include "ppp.h" #include "pppoe.h" #include "if_wg.h" @@ -136,7 +135,13 @@ #if NPF > 0 #include -#endif + +#include "pfsync.h" +#if NPFSYNC > 0 +#include /* for union sockaddr_union */ +#include +#endif /* NPFSYNC > 0 */ +#endif /* NPF > 0 */ #include @@ -725,6 +730,14 @@ if_enqueue(struct ifnet *ifp, struct mbu CLR(m->m_pkthdr.csum_flags, M_TIMESTAMP); #if NPF > 0 +#if NPFSYNC > 0 + if (ISSET(m->m_pkthdr.ph_tagsset, PACKET_TAG_PF_DEFER)) { + m = pfsync_defer_out(ifp, m); + if (m == NULL) + return (0); + } +#endif + if (m->m_pkthdr.pf.delay > 0) return (pf_delay_pkt(m, ifp->if_index)); #endif Index: sys/net/if_loop.c =================================================================== RCS file: /cvs/src/sys/net/if_loop.c,v diff -u -p -r1.98 if_loop.c --- sys/net/if_loop.c 29 Dec 2023 11:43:04 -0000 1.98 +++ sys/net/if_loop.c 21 Aug 2024 05:11:32 -0000 @@ -261,10 +261,10 @@ looutput(struct ifnet *ifp, struct mbuf if ((m->m_flags & M_PKTHDR) == 0) panic("%s: no header mbuf", __func__); - if (rt && rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) { + if (rt != NULL && !ISSET(rt->rt_flags, RTF_LOCAL|RTF_BROADCAST)) { m_freem(m); - return (rt->rt_flags & RTF_BLACKHOLE ? 0 : - rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH); + return (ISSET(rt->rt_flags, RTF_HOST) ? + EHOSTUNREACH : ENETUNREACH); } /* Index: sys/net/if_pfsync.c =================================================================== RCS file: /cvs/src/sys/net/if_pfsync.c,v diff -u -p -r1.326 if_pfsync.c --- sys/net/if_pfsync.c 24 May 2024 06:38:41 -0000 1.326 +++ sys/net/if_pfsync.c 21 Aug 2024 05:11:32 -0000 @@ -109,8 +109,9 @@ struct pfsync_softc; struct pfsync_deferral { TAILQ_ENTRY(pfsync_deferral) pd_entry; struct pf_state *pd_st; - struct mbuf *pd_m; + struct mbuf_list pd_ml; uint64_t pd_deadline; + unsigned int pd_ifidx; }; TAILQ_HEAD(pfsync_deferrals, pfsync_deferral); @@ -428,7 +429,6 @@ pfsync_clone_create(struct if_clone *ifc TAILQ_INIT(&s->s_qs[q]); TAILQ_INIT(&s->s_tdb_q); - /* stupid NET_LOCK */ timeout_set(&s->s_deferrals_tmo, pfsync_deferrals_tmo, s); task_set(&s->s_deferrals_task, pfsync_deferrals_task, s); TAILQ_INIT(&s->s_deferrals); @@ -1928,10 +1928,9 @@ int pfsync_defer(struct pf_state *st, struct mbuf *m) { struct pfsync_softc *sc; - struct pfsync_slice *s; - struct pfsync_deferral *pd; - int sched = 0; - int rv = 0; + struct m_tag *mtag; + struct pf_state_cmp *cmp; + int defer; if (ISSET(st->state_flags, PFSTATE_NOSYNC) || ISSET(m->m_flags, M_BCAST|M_MCAST)) @@ -1939,26 +1938,83 @@ pfsync_defer(struct pf_state *st, struct smr_read_enter(); sc = SMR_PTR_GET(&pfsyncif); - if (sc == NULL || !sc->sc_defer) - goto leave; + defer = (sc != NULL && sc->sc_defer); + smr_read_leave(); - pd = pool_get(&pfsync_deferrals_pool, M_NOWAIT); - if (pd == NULL) { + if (!defer) + return (0); + + KASSERTMSG(m_tag_find(m, PACKET_TAG_PF_DEFER, NULL) == NULL, + "mbuf %p already has a PACKET_TAG_PF_DEFER mtag", m); + + mtag = m_tag_get(PACKET_TAG_PF_DEFER, sizeof(*cmp), M_NOWAIT); + if (mtag == NULL) + return (ENOMEM); + + cmp = (struct pf_state_cmp *)(mtag + 1); + cmp->id = st->id; + cmp->creatorid = st->creatorid; + + m_tag_prepend(m, mtag); + return (0); +} + +struct mbuf * +pfsync_defer_out(struct ifnet *ifp, struct mbuf *m) +{ + struct pfsync_softc *sc; + struct pfsync_slice *s; + struct pf_state *st; + struct pfsync_deferral *pd; + struct m_tag *mtag; + struct pf_state_cmp *cmp; + int sched = 0; + + mtag = m_tag_find(m, PACKET_TAG_PF_DEFER, NULL); + KASSERTMSG(mtag != NULL, + "mbuf %p has PACKET_TAG_PF_DEFER set but no tag", m); + cmp = (struct pf_state_cmp *)(mtag + 1); + + PF_STATE_ENTER_READ(); + st = pf_find_state_byid(cmp); + pf_state_ref(st); + PF_STATE_EXIT_READ(); + + m_tag_delete(m, mtag); + + /* the state doesn't exist already^Wanymore */ + if (st == NULL) + return (m); + + smr_read_enter(); + sc = SMR_PTR_GET(&pfsyncif); + if (sc == NULL || !sc->sc_defer) goto leave; - } s = pfsync_slice_enter(sc, st); s->s_stat_defer_add++; - pd->pd_st = pf_state_ref(st); - pd->pd_m = m; - pd->pd_deadline = getnsecuptime() + PFSYNC_DEFER_NSEC; + /* pd is protected by the slice mutex */ + pd = st->sync_defer; + if (pd == NULL) { + pd = pool_get(&pfsync_deferrals_pool, M_NOWAIT); + if (pd == NULL) + goto leave; + + pd->pd_ifidx = ifp->if_index; + pd->pd_st = pf_state_ref(st); + ml_init(&pd->pd_ml); + pd->pd_deadline = getnsecuptime() + PFSYNC_DEFER_NSEC; - m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; - st->sync_defer = pd; + st->sync_defer = pd; - sched = s->s_deferred++; - TAILQ_INSERT_TAIL(&s->s_deferrals, pd, pd_entry); + sched = s->s_deferred++; + TAILQ_INSERT_TAIL(&s->s_deferrals, pd, pd_entry); + } + + //m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; + ml_enqueue(&pd->pd_ml, m); + m = NULL; /* take the packet away from the caller */ if (sched == 0) timeout_add_nsec(&s->s_deferrals_tmo, PFSYNC_DEFER_NSEC); @@ -1970,11 +2026,11 @@ pfsync_defer(struct pf_state *st, struct pfsync_slice_sched(s); pfsync_slice_leave(sc, s); - rv = 1; leave: smr_read_leave(); + pf_state_unref(st); - return (rv); + return (m); } static void @@ -2056,57 +2112,31 @@ pfsync_deferrals_task(void *arg) if (TAILQ_EMPTY(&pds)) return; - NET_LOCK(); while ((pd = TAILQ_FIRST(&pds)) != NULL) { TAILQ_REMOVE(&pds, pd, pd_entry); pfsync_defer_output(pd); } - NET_UNLOCK(); } static void pfsync_defer_output(struct pfsync_deferral *pd) { - struct pf_pdesc pdesc; struct pf_state *st = pd->pd_st; + struct ifnet *ifp; + struct mbuf *m; - if (st->rt == PF_ROUTETO) { - if (pf_setup_pdesc(&pdesc, st->key[PF_SK_WIRE]->af, - st->direction, NULL, pd->pd_m, NULL) != PF_PASS) - return; - switch (st->key[PF_SK_WIRE]->af) { - case AF_INET: - pf_route(&pdesc, st); - break; -#ifdef INET6 - case AF_INET6: - pf_route6(&pdesc, st); - break; -#endif /* INET6 */ - default: - unhandled_af(st->key[PF_SK_WIRE]->af); - } - pd->pd_m = pdesc.m; - } else { - switch (st->key[PF_SK_WIRE]->af) { - case AF_INET: - ip_output(pd->pd_m, NULL, NULL, 0, NULL, NULL, 0); - break; -#ifdef INET6 - case AF_INET6: - ip6_output(pd->pd_m, NULL, NULL, 0, NULL, NULL); - break; -#endif /* INET6 */ - default: - unhandled_af(st->key[PF_SK_WIRE]->af); + ifp = if_get(pd->pd_ifidx); + if (ifp != NULL) { + while ((m = ml_dequeue(&pd->pd_ml)) != NULL) { + if (if_enqueue(ifp, m) != 0) + break; } - - pd->pd_m = NULL; } + if_put(ifp); pf_state_unref(st); - m_freem(pd->pd_m); + ml_purge(&pd->pd_ml); pool_put(&pfsync_deferrals_pool, pd); } Index: sys/net/if_pfsync.h =================================================================== RCS file: /cvs/src/sys/net/if_pfsync.h,v diff -u -p -r1.62 if_pfsync.h --- sys/net/if_pfsync.h 13 May 2024 01:15:53 -0000 1.62 +++ sys/net/if_pfsync.h 21 Aug 2024 05:11:32 -0000 @@ -333,6 +333,7 @@ void pfsync_update_tdb(struct tdb *, i void pfsync_delete_tdb(struct tdb *); int pfsync_defer(struct pf_state *, struct mbuf *); +struct mbuf *pfsync_defer_out(struct ifnet *, struct mbuf *); int pfsync_is_up(void); int pfsync_state_in_use(struct pf_state *); Index: sys/net/pf.c =================================================================== RCS file: /cvs/src/sys/net/pf.c,v diff -u -p -r1.1204 pf.c --- sys/net/pf.c 6 Aug 2024 16:56:09 -0000 1.1204 +++ sys/net/pf.c 21 Aug 2024 05:11:32 -0000 @@ -4573,8 +4573,7 @@ pf_test_rule(struct pf_pdesc *pd, struct * firewall has to know about it to allow * replies through it. */ - if (pfsync_defer(*sm, pd->m)) - return (PF_DEFER); + pfsync_defer(*sm, pd->m); } #endif /* NPFSYNC > 0 */ @@ -6563,17 +6562,12 @@ pf_rtlabel_match(struct pf_addr *addr, s return (ret); } -/* pf_route() may change pd->m, adjust local copies after calling */ -void -pf_route(struct pf_pdesc *pd, struct pf_state *st) +static void +pf_route_af(struct pf_pdesc *pd, struct pf_state *st, + void (*send)(struct mbuf *)) { struct mbuf *m0; - struct mbuf_list ml; - struct sockaddr_in *dst, sin; - struct rtentry *rt = NULL; - struct ip *ip; - struct ifnet *ifp = NULL; - unsigned int rtableid; + struct m_tag *mtag; if (pd->m->m_pkthdr.pf.routed++ > 3) { m_freem(pd->m); @@ -6582,220 +6576,49 @@ pf_route(struct pf_pdesc *pd, struct pf_ } if (st->rt == PF_DUPTO) { - if ((m0 = m_dup_pkt(pd->m, max_linkhdr, M_NOWAIT)) == NULL) - return; - } else { - if ((st->rt == PF_REPLYTO) == (st->direction == pd->dir)) + m0 = m_dup_pkt(pd->m, max_linkhdr, M_NOWAIT); + if (m0 == NULL) return; + } else if ((st->rt == PF_REPLYTO) == (st->direction == pd->dir)) + return; + else m0 = pd->m; - pd->m = NULL; - } - - if (m0->m_len < sizeof(struct ip)) { - DPFPRINTF(LOG_ERR, - "%s: m0->m_len < sizeof(struct ip)", __func__); - goto bad; - } - - ip = mtod(m0, struct ip *); - - if (pd->dir == PF_IN) { - if (ip->ip_ttl <= IPTTLDEC) { - if (st->rt != PF_DUPTO) { - pf_send_icmp(m0, ICMP_TIMXCEED, - ICMP_TIMXCEED_INTRANS, 0, - pd->af, st->rule.ptr, pd->rdomain); - } - goto bad; - } - ip->ip_ttl -= IPTTLDEC; - } - - memset(&sin, 0, sizeof(sin)); - dst = &sin; - dst->sin_family = AF_INET; - dst->sin_len = sizeof(*dst); - dst->sin_addr = st->rt_addr.v4; - rtableid = m0->m_pkthdr.ph_rtableid; - - rt = rtalloc_mpath(sintosa(dst), &ip->ip_src.s_addr, rtableid); - if (!rtisvalid(rt)) { - if (st->rt != PF_DUPTO) { - pf_send_icmp(m0, ICMP_UNREACH, ICMP_UNREACH_HOST, - 0, pd->af, st->rule.ptr, pd->rdomain); - } - ipstat_inc(ips_noroute); - goto bad; - } - - ifp = if_get(rt->rt_ifidx); - if (ifp == NULL) - goto bad; - - /* A locally generated packet may have invalid source address. */ - if ((ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET && - (ifp->if_flags & IFF_LOOPBACK) == 0) - ip->ip_src = ifatoia(rt->rt_ifa)->ia_addr.sin_addr; - if (st->rt != PF_DUPTO && pd->dir == PF_IN) { - if (pf_test(AF_INET, PF_OUT, ifp, &m0) != PF_PASS) - goto bad; - else if (m0 == NULL) - goto done; - if (m0->m_len < sizeof(struct ip)) { - DPFPRINTF(LOG_ERR, - "%s: m0->m_len < sizeof(struct ip)", __func__); - goto bad; + mtag = m_tag_find(m0, PACKET_TAG_PF_ROUTE, NULL); + if (mtag == NULL) { + mtag = m_tag_get(PACKET_TAG_PF_ROUTE, sizeof(st->rt_addr), + M_NOWAIT); + if (mtag == NULL) { + if (m0 == pd->m) + pd->m = NULL; + m_freem(m0); + return; } - ip = mtod(m0, struct ip *); - } - if (if_output_tso(ifp, &m0, sintosa(dst), rt, ifp->if_mtu) || - m0 == NULL) - goto done; - - /* - * Too large for interface; fragment if possible. - * Must be able to put at least 8 bytes per fragment. - */ - if (ip->ip_off & htons(IP_DF)) { - ipstat_inc(ips_cantfrag); - if (st->rt != PF_DUPTO) - pf_send_icmp(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, - ifp->if_mtu, pd->af, st->rule.ptr, pd->rdomain); - goto bad; + m_tag_prepend(m0, mtag); } - if (ip_fragment(m0, &ml, ifp, ifp->if_mtu) || - if_output_ml(ifp, &ml, sintosa(dst), rt)) - goto done; - ipstat_inc(ips_fragmented); + *(struct pf_addr *)(mtag + 1) = st->rt_addr; -done: - if_put(ifp); - rtfree(rt); - return; + if (st->rt == PF_DUPTO) { + SET(m0->m_pkthdr.pf.flags, PF_TAG_GENERATED); + (*send)(m0); + } else if (pd->dir == PF_OUT) + SET(m0->m_pkthdr.pf.flags, PF_TAG_REROUTE); +} -bad: - m_freem(m0); - goto done; +/* pf_route() may change pd->m, adjust local copies after calling */ +void +pf_route(struct pf_pdesc *pd, struct pf_state *st) +{ + pf_route_af(pd, st, ip_send); } #ifdef INET6 -/* pf_route6() may change pd->m, adjust local copies after calling */ void pf_route6(struct pf_pdesc *pd, struct pf_state *st) { - struct mbuf *m0; - struct sockaddr_in6 *dst, sin6; - struct rtentry *rt = NULL; - struct ip6_hdr *ip6; - struct ifnet *ifp = NULL; - struct m_tag *mtag; - unsigned int rtableid; - - if (pd->m->m_pkthdr.pf.routed++ > 3) { - m_freem(pd->m); - pd->m = NULL; - return; - } - - if (st->rt == PF_DUPTO) { - if ((m0 = m_dup_pkt(pd->m, max_linkhdr, M_NOWAIT)) == NULL) - return; - } else { - if ((st->rt == PF_REPLYTO) == (st->direction == pd->dir)) - return; - m0 = pd->m; - pd->m = NULL; - } - - if (m0->m_len < sizeof(struct ip6_hdr)) { - DPFPRINTF(LOG_ERR, - "%s: m0->m_len < sizeof(struct ip6_hdr)", __func__); - goto bad; - } - ip6 = mtod(m0, struct ip6_hdr *); - - if (pd->dir == PF_IN) { - if (ip6->ip6_hlim <= IPV6_HLIMDEC) { - if (st->rt != PF_DUPTO) { - pf_send_icmp(m0, ICMP6_TIME_EXCEEDED, - ICMP6_TIME_EXCEED_TRANSIT, 0, - pd->af, st->rule.ptr, pd->rdomain); - } - goto bad; - } - ip6->ip6_hlim -= IPV6_HLIMDEC; - } - - memset(&sin6, 0, sizeof(sin6)); - dst = &sin6; - dst->sin6_family = AF_INET6; - dst->sin6_len = sizeof(*dst); - dst->sin6_addr = st->rt_addr.v6; - rtableid = m0->m_pkthdr.ph_rtableid; - - rt = rtalloc_mpath(sin6tosa(dst), &ip6->ip6_src.s6_addr32[0], - rtableid); - if (!rtisvalid(rt)) { - if (st->rt != PF_DUPTO) { - pf_send_icmp(m0, ICMP6_DST_UNREACH, - ICMP6_DST_UNREACH_NOROUTE, 0, - pd->af, st->rule.ptr, pd->rdomain); - } - ip6stat_inc(ip6s_noroute); - goto bad; - } - - ifp = if_get(rt->rt_ifidx); - if (ifp == NULL) - goto bad; - - /* A locally generated packet may have invalid source address. */ - if (IN6_IS_ADDR_LOOPBACK(&ip6->ip6_src) && - (ifp->if_flags & IFF_LOOPBACK) == 0) - ip6->ip6_src = ifatoia6(rt->rt_ifa)->ia_addr.sin6_addr; - - if (st->rt != PF_DUPTO && pd->dir == PF_IN) { - if (pf_test(AF_INET6, PF_OUT, ifp, &m0) != PF_PASS) - goto bad; - else if (m0 == NULL) - goto done; - if (m0->m_len < sizeof(struct ip6_hdr)) { - DPFPRINTF(LOG_ERR, - "%s: m0->m_len < sizeof(struct ip6_hdr)", __func__); - goto bad; - } - } - - /* - * If packet has been reassembled by PF earlier, we have to - * use pf_refragment6() here to turn it back to fragments. - */ - if ((mtag = m_tag_find(m0, PACKET_TAG_PF_REASSEMBLED, NULL))) { - (void) pf_refragment6(&m0, mtag, dst, ifp, rt); - goto done; - } - - if (if_output_tso(ifp, &m0, sin6tosa(dst), rt, ifp->if_mtu) || - m0 == NULL) - goto done; - - ip6stat_inc(ip6s_cantfrag); - if (st->rt != PF_DUPTO) - pf_send_icmp(m0, ICMP6_PACKET_TOO_BIG, 0, - ifp->if_mtu, pd->af, st->rule.ptr, pd->rdomain); - goto bad; - -done: - if_put(ifp); - rtfree(rt); - return; - -bad: - m_freem(m0); - goto done; + pf_route_af(pd, st, ip6_send); } #endif /* INET6 */ @@ -7935,10 +7758,6 @@ done: case PF_SYNPROXY_DROP: m_freem(pd.m); /* FALLTHROUGH */ - case PF_DEFER: - pd.m = NULL; - action = PF_PASS; - break; case PF_DIVERT: switch (pd.af) { case AF_INET: @@ -8059,6 +7878,9 @@ pf_ouraddr(struct mbuf *m) { struct pf_state_key *sk; + if (ISSET(m->m_pkthdr.ph_tagsset, PACKET_TAG_PF_ROUTE)) + return (0); + if (m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) return (1); @@ -8078,6 +7900,16 @@ pf_ouraddr(struct mbuf *m) void pf_pkt_addr_changed(struct mbuf *m) { + struct m_tag *mtag; + + mtag = m_tag_find(m, PACKET_TAG_PF_ROUTE, NULL); + if (mtag != NULL) { + m_tag_delete(m, mtag); + + KASSERTMSG(m_tag_find(m, PACKET_TAG_PF_ROUTE, NULL) == NULL, + "mbuf %p had multiple PACKET_TAG_PF_ROUTE mbuf tags", m); + } + pf_mbuf_unlink_state_key(m); pf_mbuf_unlink_inpcb(m); } Index: sys/net/pfvar.h =================================================================== RCS file: /cvs/src/sys/net/pfvar.h,v diff -u -p -r1.538 pfvar.h --- sys/net/pfvar.h 13 May 2024 01:15:53 -0000 1.538 +++ sys/net/pfvar.h 21 Aug 2024 05:11:32 -0000 @@ -67,7 +67,7 @@ typedef struct refcnt pf_refcnt_t; enum { PF_INOUT, PF_IN, PF_OUT, PF_FWD }; enum { PF_PASS, PF_DROP, PF_SCRUB, PF_NOSCRUB, PF_NAT, PF_NONAT, - PF_BINAT, PF_NOBINAT, PF_RDR, PF_NORDR, PF_SYNPROXY_DROP, PF_DEFER, + PF_BINAT, PF_NOBINAT, PF_RDR, PF_NORDR, PF_SYNPROXY_DROP, PF_MATCH, PF_DIVERT, PF_RT, PF_AFRT }; enum { PF_TRANS_RULESET, PF_TRANS_TABLE }; enum { PF_OP_NONE, PF_OP_IRG, PF_OP_EQ, PF_OP_NE, PF_OP_LT, Index: sys/netinet/ip_icmp.c =================================================================== RCS file: /cvs/src/sys/netinet/ip_icmp.c,v diff -u -p -r1.196 ip_icmp.c --- sys/netinet/ip_icmp.c 14 Jul 2024 18:53:39 -0000 1.196 +++ sys/netinet/ip_icmp.c 21 Aug 2024 05:11:32 -0000 @@ -684,7 +684,8 @@ icmp_reflect(struct mbuf *m, struct mbuf struct ip *ip = mtod(m, struct ip *); struct mbuf *opts = NULL; struct sockaddr_in sin; - struct rtentry *rt = NULL; + struct rtentry *rt; + struct in_addr ip_src = { INADDR_ANY }; int optlen = (ip->ip_hl << 2) - sizeof(struct ip); u_int rtableid; u_int8_t pfflags; @@ -701,10 +702,6 @@ icmp_reflect(struct mbuf *m, struct mbuf return (ELOOP); } rtableid = m->m_pkthdr.ph_rtableid; - pfflags = m->m_pkthdr.pf.flags; - m_resethdr(m); - m->m_pkthdr.ph_rtableid = rtableid; - m->m_pkthdr.pf.flags = pfflags & PF_TAG_GENERATED; /* * If the incoming packet was addressed directly to us, @@ -718,41 +715,80 @@ icmp_reflect(struct mbuf *m, struct mbuf sin.sin_addr = ip->ip_dst; rt = rtalloc(sintosa(&sin), 0, rtableid); - if (rtisvalid(rt) && - ISSET(rt->rt_flags, RTF_LOCAL|RTF_BROADCAST)) - ia = ifatoia(rt->rt_ifa); - } + if (rtisvalid(rt)) { + if (ISSET(rt->rt_flags, RTF_LOCAL)) + ip_src = ip->ip_dst; + else if (ISSET(rt->rt_flags, RTF_BROADCAST)) { + ia = ifatoia(rt->rt_ifa); + ip_src = ia->ia_addr.sin_addr; + } + } + rtfree(rt); + } else + ip_src = ia->ia_addr.sin_addr; /* * The following happens if the packet was not addressed to us. - * Use the new source address and do a route lookup. If it fails - * drop the packet as there is no path to the host. + * If we're directly connected use the closest address, otherwise + * try to use the sourceaddr from the routing table. */ - if (ia == NULL) { - rtfree(rt); - + if (ip_src.s_addr == INADDR_ANY) { memset(&sin, 0, sizeof(sin)); sin.sin_len = sizeof(sin); sin.sin_family = AF_INET; sin.sin_addr = ip->ip_src; - /* keep packet in the original virtual instance */ - rt = rtalloc(sintosa(&sin), RT_RESOLVE, rtableid); - if (rt == NULL) { - ipstat_inc(ips_noroute); - m_freem(m); - return (EHOSTUNREACH); + rt = rtalloc_mpath(sintosa(&sin), &ip->ip_dst.s_addr, rtableid); + if (rtisvalid(rt) && + ISSET(rt->rt_flags, RTF_LLINFO|RTF_HOST)) { + ia = ifatoia(rt->rt_ifa); + ip_src = ia->ia_addr.sin_addr; + } else { + struct sockaddr *sourceaddr; + struct ifaddr *ifa; + + sourceaddr = rtable_getsource(rtableid, AF_INET); + if (sourceaddr != NULL) { + ifa = ifa_ifwithaddr(sourceaddr, rtableid); + if (ifa != NULL && + ISSET(ifa->ifa_ifp->if_flags, IFF_UP)) + ip_src = satosin(sourceaddr)->sin_addr; + } } + rtfree(rt); + } - ia = ifatoia(rt->rt_ifa); + /* + * If the above didn't find an ip_src, get the IP of the + * interface the original packet was received on. If all this + * comes up with nothing, ip_output() will try and fill it + * in for us. + */ + if (ip_src.s_addr == INADDR_ANY) { + struct ifnet *ifp; + struct ifaddr *ifa; + + ifp = if_get(m->m_pkthdr.ph_ifidx); + if (ifp != NULL) { + TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { + if (ifa->ifa_addr->sa_family != AF_INET) + continue; + + ip_src = satosin(ifa->ifa_addr)->sin_addr; + break; + } + } + if_put(ifp); } + pfflags = m->m_pkthdr.pf.flags; + + m_resethdr(m); + m->m_pkthdr.ph_rtableid = rtableid; + m->m_pkthdr.pf.flags = pfflags & PF_TAG_GENERATED; ip->ip_dst = ip->ip_src; + ip->ip_src = ip_src; ip->ip_ttl = MAXTTL; - - /* It is safe to dereference ``ia'' iff ``rt'' is valid. */ - ip->ip_src = ia->ia_addr.sin_addr; - rtfree(rt); if (optlen > 0) { u_char *cp; Index: sys/netinet/ip_input.c =================================================================== RCS file: /cvs/src/sys/netinet/ip_input.c,v diff -u -p -r1.401 ip_input.c --- sys/netinet/ip_input.c 6 Aug 2024 16:56:09 -0000 1.401 +++ sys/netinet/ip_input.c 21 Aug 2024 05:11:32 -0000 @@ -1560,6 +1560,10 @@ ip_forward(struct mbuf *m, struct ifnet struct mbuf *mcopy; int error = 0, type = 0, code = 0, destmtu = 0; u_int32_t dest; + struct in_addr *rt_dst; +#if NPF > 0 + struct m_tag *rt_mtag; +#endif dest = 0; if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) { @@ -1571,12 +1575,21 @@ ip_forward(struct mbuf *m, struct ifnet icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, dest, 0); goto done; } + rt_dst = &ip->ip_dst; +#if NPF > 0 + rt_mtag = m_tag_find(m, PACKET_TAG_PF_ROUTE, NULL); + if (rt_mtag != NULL) { + struct pf_addr *rt_addr = (struct pf_addr *)(rt_mtag + 1); + rt_dst = &rt_addr->v4; + SET(flags, IP_REDIRECT); + } +#endif if (ro == NULL) { ro = &iproute; ro->ro_rt = NULL; } - rt = route_mpath(ro, &ip->ip_dst, &ip->ip_src, rtableid); + rt = route_mpath(ro, rt_dst, &ip->ip_src, rtableid); if (rt == NULL) { ipstat_inc(ips_noroute); icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, dest, 0); Index: sys/netinet/ip_output.c =================================================================== RCS file: /cvs/src/sys/netinet/ip_output.c,v diff -u -p -r1.401 ip_output.c --- sys/netinet/ip_output.c 2 Jul 2024 18:33:47 -0000 1.401 +++ sys/netinet/ip_output.c 21 Aug 2024 05:11:32 -0000 @@ -110,7 +110,9 @@ ip_output(struct mbuf *m, struct mbuf *o struct sockaddr_in *dst; struct tdb *tdb = NULL; u_long mtu; + struct in_addr *rt_dst; #if NPF > 0 + struct m_tag *rt_mtag; u_int orig_rtableid; #endif @@ -128,7 +130,7 @@ ip_output(struct mbuf *m, struct mbuf *o /* * Fill in IP header. */ - if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) { + if (!ISSET(flags, IP_FORWARDING|IP_RAWOUTPUT)) { ip->ip_v = IPVERSION; ip->ip_off &= htons(IP_DF); ip->ip_id = htons(ip_randomid()); @@ -151,6 +153,7 @@ ip_output(struct mbuf *m, struct mbuf *o orig_rtableid = m->m_pkthdr.ph_rtableid; reroute: #endif + rt_dst = &ip->ip_dst; /* * Do a route lookup now in case we need the source address to @@ -163,11 +166,19 @@ reroute: ro->ro_rt = NULL; } +#if NPF > 0 + rt_mtag = m_tag_find(m, PACKET_TAG_PF_ROUTE, NULL); + if (rt_mtag != NULL) { + struct pf_addr *rt_addr = (struct pf_addr *)(rt_mtag + 1); + rt_dst = &rt_addr->v4; + } +#endif + /* * If there is a cached route, check that it is to the same * destination and is still up. If not, free it and try again. */ - route_cache(ro, &ip->ip_dst, &ip->ip_src, m->m_pkthdr.ph_rtableid); + route_cache(ro, rt_dst, &ip->ip_src, m->m_pkthdr.ph_rtableid); dst = &ro->ro_dstsin; if ((IN_MULTICAST(ip->ip_dst.s_addr) || @@ -398,6 +409,20 @@ sendit: } #endif /* IPSEC */ + if (ro != NULL && ro->ro_rt != NULL) { + struct rtentry *rt = ro->ro_rt; + + if (ISSET(rt->rt_flags, RTF_REJECT)) { + error = ISSET(rt->rt_flags, RTF_HOST) ? + EHOSTUNREACH : ENETUNREACH; + goto bad; + } + if (ISSET(rt->rt_flags, RTF_BLACKHOLE)) { + error = 0; + goto bad; + } + } + /* * Packet filter */ @@ -418,9 +443,6 @@ sendit: else if (m->m_pkthdr.pf.flags & PF_TAG_REROUTE) { /* tag as generated to skip over pf_test on rerun */ m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; - if (ro == &iproute) - rtfree(ro->ro_rt); - ro = NULL; if_put(ifp); /* drop reference since target changed */ ifp = NULL; goto reroute; Index: sys/netinet6/ip6_forward.c =================================================================== RCS file: /cvs/src/sys/netinet6/ip6_forward.c,v diff -u -p -r1.124 ip6_forward.c --- sys/netinet6/ip6_forward.c 19 Jul 2024 16:58:32 -0000 1.124 +++ sys/netinet6/ip6_forward.c 21 Aug 2024 05:11:32 -0000 @@ -101,6 +101,10 @@ ip6_forward(struct mbuf *m, struct route struct tdb *tdb = NULL; #endif /* IPSEC */ char src6[INET6_ADDRSTRLEN], dst6[INET6_ADDRSTRLEN]; + struct in6_addr *rt_dst; +#if NPF > 0 + struct m_tag *rt_mtag; +#endif /* * Do not forward packets to multicast destination (should be handled @@ -202,11 +206,20 @@ reroute: } #endif /* IPSEC */ + rt_dst = &ip6->ip6_dst; +#if NPF > 0 + rt_mtag = m_tag_find(m, PACKET_TAG_PF_ROUTE, NULL); + if (rt_mtag != NULL) { + struct pf_addr *rt_addr = (struct pf_addr *)(rt_mtag + 1); + rt_dst = &rt_addr->v6; + } +#endif + if (ro == NULL) { ro = &iproute; ro->ro_rt = NULL; } - rt = route6_mpath(ro, &ip6->ip6_dst, &ip6->ip6_src, + rt = route6_mpath(ro, rt_dst, &ip6->ip6_src, m->m_pkthdr.ph_rtableid); if (rt == NULL) { ip6stat_inc(ip6s_noroute); Index: sys/netinet6/ip6_input.c =================================================================== RCS file: /cvs/src/sys/netinet6/ip6_input.c,v diff -u -p -r1.266 ip6_input.c --- sys/netinet6/ip6_input.c 19 Jul 2024 16:58:32 -0000 1.266 +++ sys/netinet6/ip6_input.c 21 Aug 2024 05:11:32 -0000 @@ -99,7 +99,6 @@ #include #include "gif.h" -#include "bpfilter.h" #ifdef MROUTING #include @@ -364,7 +363,9 @@ ip6_input_if(struct mbuf **mp, int *offp u_int16_t src_scope, dst_scope; #if NPF > 0 struct in6_addr odst; + struct m_tag *rt_mtag; #endif + struct in6_addr *rt_dst; int flags = 0; KASSERT(*offp == 0); @@ -523,11 +524,19 @@ ip6_input_if(struct mbuf **mp, int *offp goto out; } - /* * Unicast check */ - rt = route6_mpath(&ro, &ip6->ip6_dst, &ip6->ip6_src, + rt_dst = &ip6->ip6_dst; +#if NPF > 0 + rt_mtag = m_tag_find(m, PACKET_TAG_PF_ROUTE, NULL); + if (rt_mtag != NULL) { + struct pf_addr *rt_addr = (struct pf_addr *)(rt_mtag + 1); + rt_dst = &rt_addr->v6; + } +#endif + + rt = route6_mpath(&ro, rt_dst, &ip6->ip6_src, m->m_pkthdr.ph_rtableid); /* Index: sys/netinet6/ip6_output.c =================================================================== RCS file: /cvs/src/sys/netinet6/ip6_output.c,v diff -u -p -r1.292 ip6_output.c --- sys/netinet6/ip6_output.c 4 Jul 2024 12:50:08 -0000 1.292 +++ sys/netinet6/ip6_output.c 21 Aug 2024 05:11:32 -0000 @@ -177,6 +177,7 @@ ip6_output(struct mbuf *m, struct ip6_pk u_int32_t optlen = 0, plen = 0, unfragpartlen = 0; struct ip6_exthdrs exthdrs; struct in6_addr finaldst; + struct in6_addr *rt_dst; struct route *ro_pmtu = NULL; int hdrsplit = 0; u_int8_t sproto = 0; @@ -184,6 +185,9 @@ ip6_output(struct mbuf *m, struct ip6_pk #ifdef IPSEC struct tdb *tdb = NULL; #endif /* IPSEC */ +#if NPF > 0 + struct m_tag *rt_mtag; +#endif ip6 = mtod(m, struct ip6_hdr *); finaldst = ip6->ip6_dst; @@ -387,6 +391,7 @@ ip6_output(struct mbuf *m, struct ip6_pk #if NPF > 0 reroute: #endif + rt_dst = &ip6->ip6_dst; /* initialize cached route */ if (ro == NULL) { @@ -456,8 +461,16 @@ reroute: ifp = if_get(im6o->im6o_ifidx); } +#if NPF > 0 + rt_mtag = m_tag_find(m, PACKET_TAG_PF_ROUTE, NULL); + if (rt_mtag != NULL) { + struct pf_addr *rt_addr = (struct pf_addr *)(rt_mtag + 1); + rt_dst = &rt_addr->v6; + } +#endif + if (ifp == NULL) { - rt = in6_selectroute(&ip6->ip6_dst, opt, ro, + rt = in6_selectroute(rt_dst, opt, ro, m->m_pkthdr.ph_rtableid); if (rt == NULL) { ip6stat_inc(ip6s_noroute); @@ -480,7 +493,7 @@ reroute: goto bad; } } else { - route6_cache(ro, &ip6->ip6_dst, NULL, m->m_pkthdr.ph_rtableid); + route6_cache(ro, rt_dst, NULL, m->m_pkthdr.ph_rtableid); } if (rt && (rt->rt_flags & RTF_GATEWAY) && Index: sys/sys/mbuf.h =================================================================== RCS file: /cvs/src/sys/sys/mbuf.h,v diff -u -p -r1.263 mbuf.h --- sys/sys/mbuf.h 14 Apr 2024 20:46:27 -0000 1.263 +++ sys/sys/mbuf.h 21 Aug 2024 05:11:33 -0000 @@ -471,12 +470,14 @@ struct m_tag *m_tag_next(struct mbuf *, #define PACKET_TAG_IPSEC_IN_DONE 0x0001 /* IPsec applied, in */ #define PACKET_TAG_IPSEC_OUT_DONE 0x0002 /* IPsec applied, out */ #define PACKET_TAG_IPSEC_FLOWINFO 0x0004 /* IPsec flowinfo */ +#define PACKET_TAG_PF_DEFER 0x0008 /* pfsync deferred packet */ #define PACKET_TAG_IP_OFFNXT 0x0010 /* IPv4 offset and next proto */ #define PACKET_TAG_IP6_OFFNXT 0x0020 /* IPv6 offset and next proto */ #define PACKET_TAG_WIREGUARD 0x0040 /* WireGuard data */ #define PACKET_TAG_GRE 0x0080 /* GRE processing done */ #define PACKET_TAG_DLT 0x0100 /* data link layer type */ #define PACKET_TAG_PF_DIVERT 0x0200 /* pf(4) diverted packet */ +#define PACKET_TAG_PF_ROUTE 0x0400 /* pf(4) route-to */ #define PACKET_TAG_PF_REASSEMBLED 0x0800 /* pf reassembled ipv6 packet */ #define PACKET_TAG_SRCROUTE 0x1000 /* IPv4 source routing options */ #define PACKET_TAG_TUNNEL 0x2000 /* Tunnel endpoint address */