Index: pf.c =================================================================== RCS file: /cvs/src/sys/net/pf.c,v retrieving revision 1.1141 diff -u -p -r1.1141 pf.c --- pf.c 10 Oct 2022 16:43:12 -0000 1.1141 +++ pf.c 6 Nov 2022 13:08:15 -0000 @@ -120,10 +120,6 @@ u_char pf_tcp_secret[16]; int pf_tcp_secret_init; int pf_tcp_iss_off; -int pf_npurge; -struct task pf_purge_task = TASK_INITIALIZER(pf_purge, &pf_npurge); -struct timeout pf_purge_to = TIMEOUT_INITIALIZER(pf_purge_timeout, NULL); - enum pf_test_status { PF_TEST_FAIL = -1, PF_TEST_OK, @@ -1280,49 +1276,111 @@ pf_purge_expired_rules(void) } } -void -pf_purge_timeout(void *unused) -{ - /* XXX move to systqmp to avoid KERNEL_LOCK */ - task_add(systq, &pf_purge_task); -} +void pf_purge_states(void *); +struct task pf_purge_states_task = + TASK_INITIALIZER(pf_purge_states, NULL); + +void pf_purge_states_tick(void *); +struct timeout pf_purge_states_to = + TIMEOUT_INITIALIZER(pf_purge_states_tick, NULL); + +unsigned int pf_purge_expired_states(unsigned int, unsigned int); + +/* + * how many states to scan this interval. + * + * this is set when the timeout fires, and reduced by the task. the + * task will reschedule itself until the limit is reduced to zero, + * and then it adds the timeout again. + */ +unsigned int pf_purge_states_limit; + +/* + * limit how many states are processed with locks held per run of + * the state purge task. + */ +unsigned int pf_purge_states_collect = 64; void -pf_purge(void *xnloops) +pf_purge_states_tick(void *null) { - int *nloops = xnloops; + unsigned int limit = pf_status.states; + unsigned int interval = pf_default_rule.timeout[PFTM_INTERVAL]; + + if (limit == 0) { + timeout_add_sec(&pf_purge_states_to, 1); + return; + } /* * process a fraction of the state table every second - * Note: - * we no longer need PF_LOCK() here, because - * pf_purge_expired_states() uses pf_state_lock to maintain - * consistency. */ - if (pf_default_rule.timeout[PFTM_INTERVAL] > 0) - pf_purge_expired_states(1 + (pf_status.states - / pf_default_rule.timeout[PFTM_INTERVAL])); + if (interval > 1) + limit /= interval; + + pf_purge_states_limit = limit; + task_add(systqmp, &pf_purge_states_task); +} + +void +pf_purge_states(void *null) +{ + unsigned int limit; + unsigned int scanned; + + limit = pf_purge_states_limit; + if (limit < pf_purge_states_collect) + limit = pf_purge_states_collect; + + scanned = pf_purge_expired_states(limit, pf_purge_states_collect); + if (scanned >= pf_purge_states_limit) { + /* we've run out of states to scan this "interval" */ + timeout_add_sec(&pf_purge_states_to, 1); + return; + } + + pf_purge_states_limit -= scanned; + task_add(systqmp, &pf_purge_states_task); +} + +void pf_purge_tick(void *); +struct timeout pf_purge_to = + TIMEOUT_INITIALIZER(pf_purge_tick, NULL); + +void pf_purge(void *); +struct task pf_purge_task = + TASK_INITIALIZER(pf_purge, NULL); + +void +pf_purge_tick(void *null) +{ + task_add(systqmp, &pf_purge_task); +} + +void +pf_purge(void *null) +{ + unsigned int interval = max(1, pf_default_rule.timeout[PFTM_INTERVAL]); + + /* XXX is NET_LOCK necessary? */ NET_LOCK(); PF_LOCK(); - /* purge other expired types every PFTM_INTERVAL seconds */ - if (++(*nloops) >= pf_default_rule.timeout[PFTM_INTERVAL]) { - pf_purge_expired_src_nodes(); - pf_purge_expired_rules(); - } + + pf_purge_expired_src_nodes(); + pf_purge_expired_rules(); + PF_UNLOCK(); /* * Fragments don't require PF_LOCK(), they use their own lock. */ - if ((*nloops) >= pf_default_rule.timeout[PFTM_INTERVAL]) { - pf_purge_expired_fragments(); - *nloops = 0; - } + pf_purge_expired_fragments(); NET_UNLOCK(); - timeout_add_sec(&pf_purge_to, 1); + /* interpret the interval as idle time between runs */ + timeout_add_sec(&pf_purge_to, interval); } int32_t @@ -1522,8 +1580,8 @@ pf_free_state(struct pf_state *cur) pf_status.states--; } -void -pf_purge_expired_states(u_int32_t maxcheck) +unsigned int +pf_purge_expired_states(const unsigned int limit, const unsigned int collect) { /* * this task/thread/context/whatever is the only thing that @@ -1537,6 +1595,8 @@ pf_purge_expired_states(u_int32_t maxche struct pf_state *st; SLIST_HEAD(pf_state_gcl, pf_state) gcl = SLIST_HEAD_INITIALIZER(gcl); time_t now; + unsigned int scanned; + unsigned int collected = 0; PF_ASSERT_UNLOCKED(); @@ -1550,7 +1610,7 @@ pf_purge_expired_states(u_int32_t maxche if (head == NULL) { /* the list is empty */ rw_exit_read(&pf_state_list.pfs_rwl); - return; + return (limit); } /* (re)start at the front of the list */ @@ -1559,28 +1619,38 @@ pf_purge_expired_states(u_int32_t maxche now = getuptime(); - do { + for (scanned = 0; scanned < limit; scanned++) { uint8_t stimeout = cur->timeout; + unsigned int limited = 0; if ((stimeout == PFTM_UNLINKED) || (pf_state_expires(cur, stimeout) <= now)) { st = pf_state_ref(cur); SLIST_INSERT_HEAD(&gcl, st, gc_list); + + if (++collected >= collect) + limited = 1; } /* don't iterate past the end of our view of the list */ if (cur == tail) { + scanned = limit; cur = NULL; break; } cur = TAILQ_NEXT(cur, entry_list); - } while (maxcheck--); + + /* don't spend too much time here. */ + if (ISSET(READ_ONCE(curcpu()->ci_schedstate.spc_schedflags), + SPCF_SHOULDYIELD) || limited) + break; + } rw_exit_read(&pf_state_list.pfs_rwl); if (SLIST_EMPTY(&gcl)) - return; + return (scanned); NET_LOCK(); rw_enter_write(&pf_state_list.pfs_rwl); @@ -1601,6 +1671,8 @@ pf_purge_expired_states(u_int32_t maxche SLIST_REMOVE_HEAD(&gcl, gc_list); pf_state_unref(st); } + + return (scanned); } int Index: pf_ioctl.c =================================================================== RCS file: /cvs/src/sys/net/pf_ioctl.c,v retrieving revision 1.386 diff -u -p -r1.386 pf_ioctl.c --- pf_ioctl.c 6 Nov 2022 13:03:52 -0000 1.386 +++ pf_ioctl.c 6 Nov 2022 13:08:15 -0000 @@ -1169,6 +1169,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t a pf_status.stateid = gettime(); pf_status.stateid = pf_status.stateid << 32; } + timeout_add_sec(&pf_purge_states_to, 1); timeout_add_sec(&pf_purge_to, 1); pf_create_queues(); DPFPRINTF(LOG_NOTICE, "pf: started"); @@ -2768,8 +2769,9 @@ pfioctl(dev_t dev, u_long cmd, caddr_t a pf_default_rule.timeout[i] = pf_default_rule_new.timeout[i]; if (pf_default_rule.timeout[i] == PFTM_INTERVAL && - pf_default_rule.timeout[i] < old) - task_add(net_tq(0), &pf_purge_task); + pf_default_rule.timeout[i] < old && + timeout_del(&pf_purge_to)) + task_add(systqmp, &pf_purge_task); } pfi_xcommit(); pf_trans_set_commit(); Index: pfvar.h =================================================================== RCS file: /cvs/src/sys/net/pfvar.h,v retrieving revision 1.511 diff -u -p -r1.511 pfvar.h --- pfvar.h 10 Oct 2022 16:43:12 -0000 1.511 +++ pfvar.h 6 Nov 2022 13:08:16 -0000 @@ -1716,7 +1716,6 @@ extern void pf_tbladdr_remove(struct extern void pf_tbladdr_copyout(struct pf_addr_wrap *); extern void pf_calc_skip_steps(struct pf_rulequeue *); extern void pf_purge_expired_src_nodes(void); -extern void pf_purge_expired_states(u_int32_t); extern void pf_purge_expired_rules(void); extern void pf_remove_state(struct pf_state *); extern void pf_remove_divert_state(struct pf_state_key *); Index: pfvar_priv.h =================================================================== RCS file: /cvs/src/sys/net/pfvar_priv.h,v retrieving revision 1.10 diff -u -p -r1.10 pfvar_priv.h --- pfvar_priv.h 29 Apr 2022 08:58:49 -0000 1.10 +++ pfvar_priv.h 6 Nov 2022 13:08:16 -0000 @@ -209,6 +209,7 @@ struct pf_pdesc { } hdr; }; +extern struct timeout pf_purge_states_to; extern struct task pf_purge_task; extern struct timeout pf_purge_to; @@ -262,8 +263,6 @@ extern struct rwlock pf_state_lock; rw_status(&pf_state_lock), __func__);\ } while (0) -extern void pf_purge_timeout(void *); -extern void pf_purge(void *); #endif /* _KERNEL */ #endif /* _NET_PFVAR_PRIV_H_ */