Index: sbin/pfctl/parse.y =================================================================== RCS file: /cvs/src/sbin/pfctl/parse.y,v retrieving revision 1.715 diff -u -p -r1.715 parse.y --- sbin/pfctl/parse.y 2 Nov 2023 20:47:31 -0000 1.715 +++ sbin/pfctl/parse.y 3 Mar 2024 05:50:01 -0000 @@ -57,6 +57,10 @@ #include "pfctl_parser.h" #include "pfctl.h" +#ifndef ISSET +#define ISSET(_v, _m) ((_v) & (_m)) +#endif + static struct pfctl *pf = NULL; static int debug = 0; static u_int16_t returnicmpdefault = @@ -146,7 +150,7 @@ enum { PF_STATE_OPT_MAX, PF_STATE_OPT_NO PF_STATE_OPT_MAX_SRC_CONN_RATE, PF_STATE_OPT_MAX_SRC_NODES, PF_STATE_OPT_OVERLOAD, PF_STATE_OPT_STATELOCK, PF_STATE_OPT_TIMEOUT, PF_STATE_OPT_SLOPPY, - PF_STATE_OPT_PFLOW }; + PF_STATE_OPT_PFLOW, PF_STATE_OPT_STATEPL, PF_STATE_OPT_SOURCEPL }; enum { PF_SRCTRACK_NONE, PF_SRCTRACK, PF_SRCTRACK_GLOBAL, PF_SRCTRACK_RULE }; @@ -171,6 +175,8 @@ struct node_state_opt { int number; u_int32_t seconds; } timeout; + uint32_t statepl; + uint32_t sourcepl; } data; struct node_state_opt *next; struct node_state_opt *tail; @@ -341,6 +347,50 @@ struct table_opts { struct node_tinithead init_nodes; } table_opts; +struct statepl_opts { + unsigned int marker; +#define STATEPL_M_LIMIT (1 << 0) +#define STATEPL_M_RATE (1 << 1) +#define STATEPL_M_DESCR (1 << 2) + + unsigned int limit; + struct { + unsigned int limit; + unsigned int seconds; + } rate; + char descr[PF_STATEPL_DESCR_LEN]; +}; + +static struct statepl_opts statepl_opts; + +struct sourcepl_opts { + unsigned int marker; +#define SOURCEPL_M_LIMIT (1 << 0) +#define SOURCEPL_M_STATES (1 << 1) +#define SOURCEPL_M_RATE (1 << 2) +#define SOURCEPL_M_TABLE (1 << 3) +#define SOURCEPL_M_INET_MASK (1 << 4) +#define SOURCEPL_M_INET6_MASK (1 << 5) +#define SOURCEPL_M_DESCR (1 << 6) + + unsigned int limit; + unsigned int states; + struct { + unsigned int limit; + unsigned int seconds; + } rate; + struct { + char *name; + unsigned int above; + unsigned int below; + } table; + unsigned int inet_mask; + unsigned int inet6_mask; + char descr[PF_SOURCEPL_DESCR_LEN]; +}; + +static struct sourcepl_opts sourcepl_opts; + struct node_hfsc_opts hfsc_opts; struct node_state_opt *keep_state_defaults = NULL; struct pfctl_watermarks syncookie_opts; @@ -452,6 +502,8 @@ typedef struct { struct table_opts table_opts; struct pool_opts pool_opts; struct node_hfsc_opts hfsc_opts; + struct statepl_opts *statepl_opts; + struct sourcepl_opts *sourcepl_opts; struct pfctl_watermarks *watermarks; } v; int lineno; @@ -484,12 +536,13 @@ int parseport(char *, struct range *r, i %token MAXSRCCONN MAXSRCCONNRATE OVERLOAD FLUSH SLOPPY PFLOW MAXPKTRATE %token TAGGED TAG IFBOUND FLOATING STATEPOLICY STATEDEFAULTS ROUTE %token DIVERTTO DIVERTREPLY DIVERTPACKET NATTO AFTO RDRTO RECEIVEDON NE LE GE +%token POOL RATE SOURCE STATES ABOVE BELOW MASK DESCRIPTION %token STRING %token NUMBER %token PORTBINARY %type interface if_list if_item_not if_item %type number icmptype icmp6type uid gid -%type tos not yesno optnodf +%type tos not yesno optnodf sourcepl_opt_below %type probability %type optweight %type dir af optimizer syncookie_val @@ -530,6 +583,9 @@ int parseport(char *, struct range *r, i %type scrub_opts scrub_opt scrub_opts_l %type table_opts table_opt table_opts_l %type pool_opts pool_opt pool_opts_l +%type statepl_id sourcepl_id +%type statepl_opts +%type sourcepl_opts %type syncookie_opts %% @@ -537,6 +593,8 @@ ruleset : /* empty */ | ruleset include '\n' | ruleset '\n' | ruleset option '\n' + | ruleset statepl '\n' + | ruleset sourcepl '\n' | ruleset pfrule '\n' | ruleset anchorrule '\n' | ruleset loadrule '\n' @@ -1548,6 +1606,340 @@ bandwidth : STRING { } ; +statepl : statepl_id statepl_opts { + struct pfctl_statepl *stpl; + + if (!ISSET($2->marker, STATEPL_M_LIMIT)) { + yyerror("limit not specified"); + YYERROR; + } + + stpl = calloc(1, sizeof(*stpl)); + if (stpl == NULL) + err(1, "state pool: malloc"); + + stpl->ioc.id = $1; + stpl->ioc.limit = $2->limit; + stpl->ioc.rate.limit = $2->rate.limit; + stpl->ioc.rate.seconds = $2->rate.seconds; + + if (ISSET($2->marker, STATEPL_M_DESCR)) { + if (strlcpy(stpl->ioc.description, $2->descr, + sizeof(stpl->ioc.description)) >= + sizeof(stpl->ioc.description)) { + free(stpl); + yyerror("state pool %lld" + " description is too long", $1); + YYERROR; + } + } + + if (pfctl_add_statepl(pf, stpl) != 0) { + free(stpl); + yyerror("state pool %lld already exists", $1); + YYERROR; + } + } + ; + +statepl_id : STATE POOL NUMBER { + if ($3 < PF_STATEPL_ID_MIN || + $3 > PF_STATEPL_ID_MAX) { + yyerror("state pool %lld: " + "invalid identifier", $3); + YYERROR; + } + $$ = $3; + } + ; + +statepl_opts : /* empty */ { + yyerror("state pool missing options"); + YYERROR; + } + | { + memset(&statepl_opts, 0, sizeof(statepl_opts)); + } statepl_opts_l { + $$ = &statepl_opts; + } + ; + +statepl_opts_l : statepl_opts_l statepl_opt + | statepl_opt + ; + +statepl_opt : LIMIT NUMBER { + if (ISSET(statepl_opts.marker, STATEPL_M_LIMIT)) { + yyerror("limit cannot be respecified"); + YYERROR; + } + + if ($2 < PF_STATEPL_LIMIT_MIN || + $2 > PF_STATEPL_LIMIT_MAX) { + yyerror("invalid state pool limit"); + YYERROR; + } + + statepl_opts.limit = $2; + + statepl_opts.marker |= STATEPL_M_LIMIT; + } + | RATE NUMBER '/' NUMBER { + if (ISSET(statepl_opts.marker, STATEPL_M_RATE)) { + yyerror("rate cannot be respecified"); + YYERROR; + } + if ($2 < 1) { + yyerror("invalid rate limit %lld", $2); + YYERROR; + } + if ($4 < 1) { + yyerror("invalid rate seconds %lld", $4); + YYERROR; + } + + statepl_opts.rate.limit = $2; + statepl_opts.rate.seconds = $4; + + statepl_opts.marker |= STATEPL_M_RATE; + } + | DESCRIPTION string { + if (ISSET(statepl_opts.marker, STATEPL_M_DESCR)) { + yyerror("description cannot be respecified"); + free($2); + YYERROR; + } + + if (strlcpy(statepl_opts.descr, $2, + sizeof(statepl_opts.descr)) >= + sizeof(statepl_opts.descr)) { + yyerror("description is too long"); + free($2); + YYERROR; + } + + free($2); + + statepl_opts.marker |= STATEPL_M_DESCR; + } + ; + +sourcepl : sourcepl_id sourcepl_opts { + struct pfctl_sourcepl *srpl; + + if (!ISSET($2->marker, SOURCEPL_M_LIMIT)) { + yyerror("limit not specified"); + YYERROR; + } + if (!ISSET($2->marker, SOURCEPL_M_STATES)) { + yyerror("states limit not specified"); + YYERROR; + } + + srpl = calloc(1, sizeof(*srpl)); + if (srpl == NULL) + err(1, "source pool: malloc"); + + srpl->ioc.id = $1; + srpl->ioc.limit = $2->limit; + srpl->ioc.states = $2->states; + srpl->ioc.rate.limit = $2->rate.limit; + srpl->ioc.rate.seconds = $2->rate.seconds; + + if (ISSET($2->marker, SOURCEPL_M_DESCR)) { + if (strlcpy(srpl->ioc.description, $2->descr, + sizeof(srpl->ioc.description)) >= + sizeof(srpl->ioc.description)) { + free(srpl); + yyerror("source pool %lld" + " description is too long", $1); + YYERROR; + } + } + + if (ISSET($2->marker, SOURCEPL_M_TABLE)) { + if (strlcpy(srpl->ioc.overload_tblname, + $2->table.name, + sizeof(srpl->ioc.overload_tblname)) >= + sizeof(srpl->ioc.overload_tblname)) { + abort(); + } + srpl->ioc.overload_hwm = $2->table.above; + srpl->ioc.overload_lwm = $2->table.below; + + free($2->table.name); + } + + srpl->ioc.inet_prefix = $2->inet_mask; + srpl->ioc.inet6_prefix = $2->inet6_mask; + + if (pfctl_add_sourcepl(pf, srpl) != 0) { + free(srpl); + yyerror("source pool %lld already exists", $1); + YYERROR; + } + } + ; + +sourcepl_id : SOURCE POOL NUMBER { + if ($3 < PF_SOURCEPL_ID_MIN || + $3 > PF_SOURCEPL_ID_MAX) { + yyerror("source pool %lld: " + "invalid identifier", $3); + YYERROR; + } + $$ = $3; + } + ; + +sourcepl_opts : /* empty */ { + yyerror("source pool missing options"); + YYERROR; + } + | { + memset(&sourcepl_opts, 0, sizeof(sourcepl_opts)); + sourcepl_opts.inet_mask = 32; + sourcepl_opts.inet6_mask = 128; + } sourcepl_opts_l { + $$ = &sourcepl_opts; + } + ; + +sourcepl_opts_l : sourcepl_opts_l sourcepl_opt + | sourcepl_opt + ; + +sourcepl_opt : LIMIT NUMBER { + if (ISSET(sourcepl_opts.marker, SOURCEPL_M_LIMIT)) { + yyerror("limit cannot be respecified"); + YYERROR; + } + + sourcepl_opts.limit = $2; + + sourcepl_opts.marker |= SOURCEPL_M_LIMIT; + } + | STATES NUMBER { + if (ISSET(sourcepl_opts.marker, SOURCEPL_M_STATES)) { + yyerror("source state limit " + "cannot be respecified"); + YYERROR; + } + + sourcepl_opts.states = $2; + + sourcepl_opts.marker |= SOURCEPL_M_STATES; + } + | RATE NUMBER '/' NUMBER { + if (ISSET(sourcepl_opts.marker, SOURCEPL_M_RATE)) { + yyerror("rate cannot be respecified"); + YYERROR; + } + + sourcepl_opts.rate.limit = $2; + sourcepl_opts.rate.seconds = $4; + + sourcepl_opts.marker |= SOURCEPL_M_RATE; + } + | TABLE '<' STRING '>' ABOVE NUMBER sourcepl_opt_below { + size_t stringlen; + + if (ISSET(sourcepl_opts.marker, SOURCEPL_M_TABLE)) { + yyerror("rate cannot be respecified"); + YYERROR; + } + + stringlen = strlen($3); + if (stringlen == 0 || + stringlen >= PF_TABLE_NAME_SIZE) { + free($3); + yyerror("invalid table name"); + YYERROR; + } + + if ($6 < 0) { + free($3); + yyerror("above limit is invalid"); + YYERROR; + } + if ($7 > $6) { + free($3); + yyerror("below limit higher than above limit"); + YYERROR; + } + + sourcepl_opts.table.name = $3; + sourcepl_opts.table.above = $6; + sourcepl_opts.table.below = $7; + + sourcepl_opts.marker |= SOURCEPL_M_TABLE; + } + | INET MASK NUMBER { + if (ISSET(sourcepl_opts.marker, + SOURCEPL_M_INET_MASK)) { + yyerror("inet mask cannot be respecified"); + YYERROR; + } + + if ($3 < 1 || $3 > 32) { + yyerror("inet mask length out of range"); + YYERROR; + } + + sourcepl_opts.inet_mask = $3; + + sourcepl_opts.marker |= SOURCEPL_M_INET_MASK; + } + | INET6 MASK NUMBER { + if (ISSET(sourcepl_opts.marker, + SOURCEPL_M_INET6_MASK)) { + yyerror("inet6 mask cannot be respecified"); + YYERROR; + } + + if ($3 < 1 || $3 > 128) { + yyerror("inet6 mask length out of range"); + YYERROR; + } + + sourcepl_opts.inet6_mask = $3; + + sourcepl_opts.marker |= SOURCEPL_M_INET6_MASK; + } + | DESCRIPTION string { + if (ISSET(sourcepl_opts.marker, SOURCEPL_M_DESCR)) { + yyerror("description cannot be respecified"); + free($2); + YYERROR; + } + + if (strlcpy(sourcepl_opts.descr, $2, + sizeof(sourcepl_opts.descr)) >= + sizeof(sourcepl_opts.descr)) { + yyerror("description is too long"); + free($2); + YYERROR; + } + + free($2); + + sourcepl_opts.marker |= SOURCEPL_M_DESCR; + } + ; + +sourcepl_opt_below + : /* empty */ { + $$ = 0; + } + | BELOW NUMBER { + if ($2 < 1) { + yyerror("below limit is invalid"); + YYERROR; + } + $$ = $2; + } + ; + pfrule : action dir logquick interface af proto fromto filter_opts { @@ -1763,6 +2155,22 @@ pfrule : action dir logquick interface } r.rule_flag |= PFRULE_PFLOW; break; + case PF_STATE_OPT_STATEPL: + if (r.statepl != PF_STATEPL_ID_NONE) { + yyerror("state pool: " + "multiple definitions"); + YYERROR; + } + r.statepl = o->data.statepl; + break; + case PF_STATE_OPT_SOURCEPL: + if (r.sourcepl != PF_SOURCEPL_ID_NONE) { + yyerror("source pool: " + "multiple definitions"); + YYERROR; + } + r.sourcepl = o->data.sourcepl; + break; case PF_STATE_OPT_TIMEOUT: if (o->data.timeout.number == PFTM_ADAPTIVE_START || @@ -1778,6 +2186,7 @@ pfrule : action dir logquick interface } r.timeout[o->data.timeout.number] = o->data.timeout.seconds; + break; } o = o->next; if (!defaults) @@ -3442,6 +3851,24 @@ state_opt_item : MAXIMUM NUMBER { $$->next = NULL; $$->tail = $$; } + | statepl_id { + $$ = calloc(1, sizeof(struct node_state_opt)); + if ($$ == NULL) + err(1, "state_opt_item: calloc"); + $$->type = PF_STATE_OPT_STATEPL; + $$->data.statepl = $1; + $$->next = NULL; + $$->tail = $$; + } + | sourcepl_id { + $$ = calloc(1, sizeof(struct node_state_opt)); + if ($$ == NULL) + err(1, "state_opt_item: calloc"); + $$->type = PF_STATE_OPT_SOURCEPL; + $$->data.sourcepl = $1; + $$->next = NULL; + $$->tail = $$; + } | STRING NUMBER { int i; @@ -4932,6 +5359,7 @@ lookup(char *s) { /* this has to be sorted always */ static const struct keywords keywords[] = { + { "above", ABOVE}, { "af-to", AFTO}, { "all", ALL}, { "allow-opts", ALLOWOPTS}, @@ -4939,6 +5367,7 @@ lookup(char *s) { "antispoof", ANTISPOOF}, { "any", ANY}, { "bandwidth", BANDWIDTH}, + { "below", BELOW}, { "binat-to", BINATTO}, { "bitmask", BITMASK}, { "block", BLOCK}, @@ -4948,6 +5377,7 @@ lookup(char *s) { "debug", DEBUG}, { "default", DEFAULT}, { "delay", DELAY}, + { "description", DESCRIPTION}, { "divert-packet", DIVERTPACKET}, { "divert-reply", DIVERTREPLY}, { "divert-to", DIVERTTO}, @@ -4979,6 +5409,7 @@ lookup(char *s) { "load", LOAD}, { "log", LOG}, { "loginterface", LOGINTERFACE}, + { "mask", MASK}, { "match", MATCH}, { "matches", MATCHES}, { "max", MAXIMUM}, @@ -5005,6 +5436,7 @@ lookup(char *s) { "parent", PARENT}, { "pass", PASS}, { "pflow", PFLOW}, + { "pool", POOL}, { "port", PORT}, { "prio", PRIO}, { "probability", PROBABILITY}, @@ -5015,6 +5447,7 @@ lookup(char *s) { "quick", QUICK}, { "random", RANDOM}, { "random-id", RANDOMID}, + { "rate", RATE}, { "rdomain", RDOMAIN}, { "rdr-to", RDRTO}, { "reassemble", REASSEMBLE}, @@ -5034,11 +5467,13 @@ lookup(char *s) { "set", SET}, { "skip", SKIP}, { "sloppy", SLOPPY}, + { "source", SOURCE}, { "source-hash", SOURCEHASH}, { "source-track", SOURCETRACK}, { "state", STATE}, { "state-defaults", STATEDEFAULTS}, { "state-policy", STATEPOLICY}, + { "states", STATES}, { "static-port", STATICPORT}, { "sticky-address", STICKYADDRESS}, { "syncookies", SYNCOOKIES}, Index: sbin/pfctl/pfctl.c =================================================================== RCS file: /cvs/src/sbin/pfctl/pfctl.c,v retrieving revision 1.394 diff -u -p -r1.394 pfctl.c --- sbin/pfctl/pfctl.c 2 Feb 2024 08:23:29 -0000 1.394 +++ sbin/pfctl/pfctl.c 3 Mar 2024 05:50:01 -0000 @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -53,11 +54,16 @@ #include #include #include +#include #include #include "pfctl_parser.h" #include "pfctl.h" +#ifndef nitems +#define nitems(_a) (sizeof((_a)) / sizeof((_a)[0])) +#endif + void usage(void); int pfctl_enable(int, int); int pfctl_disable(int, int); @@ -74,6 +80,7 @@ void pfctl_net_kill_states(int, const c void pfctl_label_kill_states(int, const char *, int, int); void pfctl_id_kill_states(int, int); void pfctl_key_kill_states(int, const char *, int, int); +void pfctl_kill_source(int, const char *, const char *, int); int pfctl_parse_host(char *, struct pf_rule_addr *); void pfctl_init_options(struct pfctl *); int pfctl_load_options(struct pfctl *); @@ -86,6 +93,8 @@ int pfctl_load_reassembly(struct pfctl int pfctl_load_syncookies(struct pfctl *, u_int8_t); int pfctl_set_synflwats(struct pfctl *, u_int32_t, u_int32_t); void pfctl_print_rule_counters(struct pf_rule *, int); +int pfctl_show_statepls(int, enum pfctl_show); +int pfctl_show_sourcepls(int, enum pfctl_show, int, const char *); int pfctl_show_rules(int, char *, int, enum pfctl_show, char *, int, int, long); int pfctl_show_src_nodes(int, int); @@ -101,6 +110,10 @@ void pfctl_load_queue(struct pfctl *, u int pfctl_load_queues(struct pfctl *); u_int pfctl_leafqueue_check(char *); u_int pfctl_check_qassignments(struct pf_ruleset *); +void pfctl_load_statepls(struct pfctl *); +void pfctl_load_statepl(struct pfctl *, struct pfctl_statepl *); +void pfctl_load_sourcepls(struct pfctl *); +void pfctl_load_sourcepl(struct pfctl *, struct pfctl_sourcepl *); int pfctl_load_ruleset(struct pfctl *, char *, struct pf_ruleset *, int); int pfctl_load_rule(struct pfctl *, char *, struct pf_rule *, int); const char *pfctl_lookup_option(char *, const char **); @@ -120,6 +133,9 @@ int pfctl_call_cleartables(int, int, str int pfctl_call_clearanchors(int, int, struct pfr_anchoritem *); int pfctl_call_showtables(int, int, struct pfr_anchoritem *); +RBT_PROTOTYPE(pfctl_statepls, pfctl_statepl, entry, pfctl_statepl_cmp); +RBT_PROTOTYPE(pfctl_sourcepls, pfctl_sourcepl, entry, pfctl_sourcepl_cmp); + const char *clearopt; char *rulesopt; const char *showopt; @@ -228,7 +244,7 @@ static const char *clearopt_list[] = { static const char *showopt_list[] = { "queue", "rules", "Anchors", "Sources", "states", "info", "Interfaces", "labels", "timeouts", "memory", "Tables", "osfp", - "all", NULL + "pools", "Pools", "all", NULL }; static const char *tblcmdopt_list[] = { @@ -834,6 +850,274 @@ pfctl_print_title(char *title) } int +pfctl_show_statepls(int dev, enum pfctl_show format) +{ + struct pfioc_statepl stpl; + uint32_t id = PF_STATEPL_ID_MIN; + + if (format == PFCTL_SHOW_LABELS) { + printf("%3s %8s/%-8s %5s/%-5s %8s %8s %8s\n", "ID", + "USE", "LIMIT", "RATE", "SECS", + "ADMIT", "HARDLIM", "RATELIM"); + } + + for (;;) { + memset(&stpl, 0, sizeof(stpl)); + stpl.id = id; + + if (ioctl(dev, DIOCGETNSTATEPL, &stpl) == -1) { + if (errno == ESRCH) { + /* we're done */ + return (0); + } + warn("DIOCGETNSTATEPL %u", stpl.id); + return (-1); + } + + switch (format) { + case PFCTL_SHOW_RULES: + print_statepl(&stpl); + break; + case PFCTL_SHOW_LABELS: + printf("%3u %8u/%-8u ", stpl.id, + stpl.inuse, stpl.limit); + if (stpl.rate.limit != 0) { + printf("%5u/%-5u ", + stpl.rate.limit, stpl.rate.seconds); + } else + printf("%5s/%-5s ", "nil", "nil"); + printf("%8llu %8llu %8llu\n", + stpl.admitted, stpl.hardlimited, stpl.ratelimited); + break; + default: + errx(1, "%s: unexpected format %d", __func__, format); + /* NOTREACHED */ + } + + id = stpl.id + 1; + } +} + +static inline int +pf_addr_inc(struct pf_addr *addr) +{ + int i; + uint32_t val, inc; + + for (i = 3; i >= 0; i--) { + val = ntohl(addr->addr32[i]); + inc = val + 1; + addr->addr32[i] = htonl(inc); + if (inc > val) + return (0); + } + + return (1); +} + +static int +pfctl_show_sources(int dev, const struct pfioc_sourcepl *srpl, + enum pfctl_show format, int opts) +{ + struct pfioc_source sr = { .id = srpl->id }; + struct pfioc_source_entry *entries, *e; + size_t len, used; + + if (format != PFCTL_SHOW_LABELS) + errx(1, "%s format is not PFCTL_SHOW_LABELS", __func__); + + if (srpl->nsources == 0) + return (0); + + entries = reallocarray(NULL, srpl->nsources, sizeof(*entries)); + if (entries == NULL) + err(1, "show %u source pool entries", srpl->nsources); + + len = srpl->nsources * sizeof(*entries); + + e = entries; + + /* start from af 0 address 0 */ + memset(e, 0, sizeof(*e)); + + sr.entry_size = sizeof(*e); + sr.key = e; + + for (;;) { + sr.entries = entries; + sr.entrieslen = len; + + if (ioctl(dev, DIOCGETNSOURCE, &sr) == -1) { + switch (errno) { + case ESRCH: /* can't find the sourcepl */ + case ENOENT: /* no more sources */ + return (0); /* we're done */ + } + warn("DIOCGETNSOURCE %u", sr.id); + return (-1); + } + + used = 0; + if (sr.entrieslen > len) + errx(1, "DIOCGETNSOURCE used too much buffer"); + + e = entries; + for (;;) { + if (used > sr.entrieslen) + errx(1, "DIOCGETNSOURCE weird entrieslen"); + + //printf("af %u ", e->af); + print_addr_str(e->af, &e->addr); + switch (e->af) { + case AF_INET: + printf("/%u ", sr.inet_prefix); + break; + case AF_INET6: + printf("/%u ", sr.inet6_prefix); + break; + default: + printf("/af? "); + break; + } + printf("rdomain %u ", e->rdomain); + + printf("inuse %u/%u ", e->inuse, sr.limit); + printf("admit %llu hardlim %llu ratelim %llu\n", + e->admitted, e->hardlimited, e->ratelimited); + + used += sizeof(*e); + if (used == sr.entrieslen) + break; + + e++; + } + + /* reuse the last entry as the next key */ + e->af += pf_addr_inc(&e->addr); + sr.key = e; + } + + return (0); +} + +int +pfctl_show_sourcepls(int dev, enum pfctl_show format, int opts, + const char *idopt) +{ + struct pfioc_sourcepl srpl; + uint32_t id = PF_SOURCEPL_ID_MIN; + unsigned long cmd = DIOCGETNSOURCEPL; + + if (idopt != NULL) { + const char *errstr; + + id = strtonum(idopt, PF_SOURCEPL_ID_MIN, PF_SOURCEPL_ID_MAX, + &errstr); + if (errstr != NULL) + errx(1, "source pool id: %s", errstr); + + cmd = DIOCGETSOURCEPL; + } + + if (format == PFCTL_SHOW_LABELS) { + printf("%3s %8s/%-8s %5s %5s/%-5s %8s %8s %8s %8s\n", "ID", + "USE", "ADDRS", "LIMIT", "RATE", "SECS", + "ADMIT", "ADDRLIM", "HARDLIM", "RATELIM"); + } + + for (;;) { + memset(&srpl, 0, sizeof(srpl)); + srpl.id = id; + + if (ioctl(dev, cmd, &srpl) == -1) { + if (errno == ESRCH) { + /* we're done */ + return (0); + } + warn("DIOCGETNSOURCEPL %u", srpl.id); + return (-1); + } + + switch (format) { + case PFCTL_SHOW_RULES: + print_sourcepl(&srpl); + break; + + case PFCTL_SHOW_LABELS: + printf("%3u %8u/%-8u %5u ", srpl.id, + srpl.nsources, srpl.limit, srpl.states); + if (srpl.rate.limit != 0) { + printf("%5u/%-5u ", + srpl.rate.limit, srpl.rate.seconds); + } else + printf("%5s/%-5s ", "nil", "nil"); + printf("%8llu %8llu %8llu %8llu\n", + srpl.admitted, srpl.addrlimited, + srpl.hardlimited, srpl.ratelimited); + + if (opts & PF_OPT_VERBOSE) + if (pfctl_show_sources(dev, &srpl, + format, opts) != 0) + return (-1); + break; + + default: + errx(1, "%s: unexpected format %d", __func__, format); + /* NOTREACHED */ + } + + id = srpl.id + 1; + } + + return (0); +} + +void +pfctl_kill_source(int dev, const char *idopt, const char *source, int opts) +{ + struct pfioc_source_kill ioc; + unsigned int id; + const char *errstr; + struct addrinfo hints, *res; + int error; + + if (idopt == NULL) + errx(1, "source pool id unspecified"); + if (source == NULL) + errx(1, "source pool address unspecified"); + + id = strtonum(idopt, PF_SOURCEPL_ID_MIN, PF_SOURCEPL_ID_MAX, &errstr); + if (errstr != NULL) + errx(1, "source pool id: %s", errstr); + + memset(&hints, 0, sizeof(hints)); + hints.ai_socktype = SOCK_DGRAM; /* dummy */ + hints.ai_flags = AI_NUMERICHOST; + + error = getaddrinfo(source, NULL, &hints, &res); + if (error != 0) + errx(1, "source pool address: %s", gai_strerror(error)); + + ioc.id = id; + ioc.af = res->ai_family; + copy_satopfaddr(&ioc.addr, res->ai_addr); + ioc.rmstates = 0; + + freeaddrinfo(res); + + if (ioctl(dev, DIOCCLRSOURCE, &ioc) == -1) { + switch (errno) { + case ESRCH: + errx(1, "source pool %u not found", id); + case ENOENT: + errx(1, "source pool %u: %s not found", id, source); + default: + err(1, "kill source pool %u entry %s", id, source); + } + } +} + +int pfctl_show_rules(int dev, char *path, int opts, enum pfctl_show format, char *anchorname, int depth, int wildcard, long shownr) { @@ -847,6 +1131,15 @@ pfctl_show_rules(int dev, char *path, in return (-1); } + if (anchorname[0] == '\0') { + ret = pfctl_show_statepls(dev, format); + if (ret != 0) + goto error; + ret = pfctl_show_sourcepls(dev, format, opts, NULL); + if (ret != 0) + goto error; + } + /* * Truncate a trailing / and * on an anchorname before searching for * the ruleset, this is syntactic sugar that doesn't actually make it @@ -1424,6 +1717,66 @@ pfctl_check_qassignments(struct pf_rules return (errs); } +void +pfctl_load_statepl(struct pfctl *pf, struct pfctl_statepl *stpl) +{ + if (pf->opts & PF_OPT_VERBOSE) + print_statepl(&stpl->ioc); + + if (pf->opts & PF_OPT_NOACTION) + return; + + if (ioctl(pf->dev, DIOCADDSTATEPL, &stpl->ioc) == -1) + err(1, "DIOCADDSTATEPL %u", stpl->ioc.id); +} + +void +pfctl_load_statepls(struct pfctl *pf) +{ + struct pfctl_statepl *stpl; + u_int32_t ticket = 0; + + if ((pf->opts & PF_OPT_NOACTION) == 0) + ticket = pfctl_get_ticket(pf->trans, PF_TRANS_RULESET, ""); + + RBT_FOREACH(stpl, pfctl_statepls, &pf->statepls) { + stpl->ioc.ticket = ticket; + pfctl_load_statepl(pf, stpl); + } + + /* XXX should this free the statepls? */ +} + +void +pfctl_load_sourcepl(struct pfctl *pf, struct pfctl_sourcepl *srpl) +{ + if (pf->opts & PF_OPT_VERBOSE) + print_sourcepl(&srpl->ioc); + + if (pf->opts & PF_OPT_NOACTION) + return; + + if (ioctl(pf->dev, DIOCADDSOURCEPL, &srpl->ioc) == -1) + err(1, "DIOCADDSOURCEPL %u", srpl->ioc.id); +} + +void +pfctl_load_sourcepls(struct pfctl *pf) +{ + struct pfctl_sourcepl *srpl; + u_int32_t ticket = 0; + + if ((pf->opts & PF_OPT_NOACTION) == 0) + ticket = pfctl_get_ticket(pf->trans, PF_TRANS_RULESET, ""); + + RBT_FOREACH(srpl, pfctl_sourcepls, &pf->sourcepls) { + srpl->ioc.ticket = ticket; + pfctl_load_sourcepl(pf, srpl); + } + + /* XXX should this free the statepls? */ +} + int pfctl_load_ruleset(struct pfctl *pf, char *path, struct pf_ruleset *rs, int depth) @@ -1570,6 +1923,9 @@ pfctl_rules(int dev, char *filename, int pf.opts = opts; pf.optimize = optimize; + RBT_INIT(pfctl_statepls, &pf.statepls); + RBT_INIT(pfctl_sourcepls, &pf.sourcepls); + /* non-brace anchor, create without resolving the path */ if ((pf.anchor = calloc(1, sizeof(*pf.anchor))) == NULL) ERRX("pfctl_rules: calloc"); @@ -1615,12 +1971,17 @@ pfctl_rules(int dev, char *filename, int goto _error; } - if (!anchorname[0] && (pfctl_check_qassignments(&pf.anchor->ruleset) || - pfctl_load_queues(&pf))) { - if ((opts & PF_OPT_NOACTION) == 0) - ERRX("Unable to load queues into kernel"); - else - goto _error; + if (anchorname[0] == '\0') { + if (pfctl_check_qassignments(&pf.anchor->ruleset) || + pfctl_load_queues(&pf)) { + if ((opts & PF_OPT_NOACTION) == 0) + ERRX("Unable to load queues into kernel"); + else + goto _error; + } + + pfctl_load_statepls(&pf); + pfctl_load_sourcepls(&pf); } if (pfctl_load_ruleset(&pf, path, rs, 0)) { @@ -2489,6 +2850,7 @@ main(int argc, char *argv[]) int anchor_wildcard = 0; char *path; char *lfile = NULL, *sfile = NULL; + const char *idopt = NULL; const char *errstr; long shownr = -1; @@ -2496,7 +2858,7 @@ main(int argc, char *argv[]) usage(); while ((ch = getopt(argc, argv, - "a:dD:eqf:F:ghi:k:K:L:Nno:Pp:R:rS:s:t:T:vV:x:z")) != -1) { + "a:dD:eqf:F:ghi:I:k:K:L:Nno:Pp:R:rS:s:t:T:vV:x:z")) != -1) { switch (ch) { case 'a': anchoropt = optarg; @@ -2528,6 +2890,9 @@ main(int argc, char *argv[]) case 'i': ifaceopt = optarg; break; + case 'I': + idopt = optarg; + break; case 'k': if (state_killers >= 2) { warnx("can only specify -k twice"); @@ -2771,6 +3136,13 @@ main(int argc, char *argv[]) case 'I': pfctl_show_ifaces(ifaceopt, opts); break; + case 'p': + pfctl_show_statepls(dev, PFCTL_SHOW_LABELS); + break; + case 'P': + pfctl_show_sourcepls(dev, PFCTL_SHOW_LABELS, opts, + idopt); + break; } } @@ -2840,6 +3212,8 @@ main(int argc, char *argv[]) pfctl_id_kill_states(dev, opts); else if (!strcmp(state_kill[0], "key")) pfctl_key_kill_states(dev, ifaceopt, opts, rdomain); + else if (!strcmp(state_kill[0], "source")) + pfctl_kill_source(dev, idopt, state_kill[1], opts); else pfctl_net_kill_states(dev, ifaceopt, opts, rdomain); } @@ -2925,4 +3299,50 @@ pf_strerror(int errnum) default: return strerror(errnum); } +} + +static inline int +pfctl_statepl_cmp(const struct pfctl_statepl *a, + const struct pfctl_statepl *b) +{ + uint32_t ida = a->ioc.id; + uint32_t idb = b->ioc.id; + + if (ida > idb) + return (1); + if (ida < idb) + return (-1); + + return (0); +} + +RBT_GENERATE(pfctl_statepls, pfctl_statepl, entry, pfctl_statepl_cmp); + +int +pfctl_add_statepl(struct pfctl *pf, struct pfctl_statepl *stpl) +{ + return (RBT_INSERT(pfctl_statepls, &pf->statepls, stpl) != NULL); +} + +static inline int +pfctl_sourcepl_cmp(const struct pfctl_sourcepl *a, + const struct pfctl_sourcepl *b) +{ + uint32_t ida = a->ioc.id; + uint32_t idb = b->ioc.id; + + if (ida > idb) + return (1); + if (ida < idb) + return (-1); + + return (0); +} + +RBT_GENERATE(pfctl_sourcepls, pfctl_sourcepl, entry, pfctl_sourcepl_cmp); + +int +pfctl_add_sourcepl(struct pfctl *pf, struct pfctl_sourcepl *srcpl) +{ + return (RBT_INSERT(pfctl_sourcepls, &pf->sourcepls, srcpl) != NULL); } Index: sbin/pfctl/pfctl_parser.c =================================================================== RCS file: /cvs/src/sbin/pfctl/pfctl_parser.c,v retrieving revision 1.350 diff -u -p -r1.350 pfctl_parser.c --- sbin/pfctl/pfctl_parser.c 7 Feb 2024 23:53:44 -0000 1.350 +++ sbin/pfctl/pfctl_parser.c 3 Mar 2024 05:50:01 -0000 @@ -702,6 +702,41 @@ print_src_node(struct pf_src_node *sn, i } void +print_statepl(const struct pfioc_statepl *ioc) +{ + printf("state pool %u limit %u", ioc->id, ioc->limit); + if (ioc->rate.limit != 0) + printf(" rate %u/%u", ioc->rate.limit, ioc->rate.seconds); + if (ioc->description[0] != '\0') + printf(" description \"%s\"", ioc->description); + + printf("\n"); +} + +void +print_sourcepl(const struct pfioc_sourcepl *ioc) +{ + printf("source pool %u limit %u states %u", ioc->id, + ioc->limit, ioc->states); + if (ioc->rate.limit != 0) + printf(" rate %u/%u", ioc->rate.limit, ioc->rate.seconds); + if (ioc->overload_tblname[0] != '\0') { + printf(" table <%s> above %u", + ioc->overload_tblname, ioc->overload_hwm); + if (ioc->overload_lwm) + printf(" below %u", ioc->overload_lwm); + } + if (ioc->inet_prefix < 32) + printf(" inet mask %u", ioc->inet_prefix); + if (ioc->inet6_prefix < 128) + printf(" inet6 mask %u", ioc->inet6_prefix); + if (ioc->description[0] != '\0') + printf(" description \"%s\"", ioc->description); + + printf("\n"); +} + +void print_rule(struct pf_rule *r, const char *anchor_call, int opts) { static const char *actiontypes[] = { "pass", "block", "scrub", @@ -926,6 +961,10 @@ print_rule(struct pf_rule *r, const char ropts = 1; if (r->rule_flag & PFRULE_PFLOW) ropts = 1; + if (r->statepl != PF_STATEPL_ID_NONE) + ropts = 1; + if (r->sourcepl != PF_SOURCEPL_ID_NONE) + ropts = 1; for (i = 0; !ropts && i < PFTM_MAX; ++i) if (r->timeout[i]) ropts = 1; @@ -1026,6 +1065,18 @@ print_rule(struct pf_rule *r, const char if (!ropts) printf(", "); printf("pflow"); + ropts = 0; + } + if (r->statepl != PF_STATEPL_ID_NONE) { + if (!ropts) + printf(", "); + printf("state pool %u", r->statepl); + ropts = 0; + } + if (r->sourcepl != PF_SOURCEPL_ID_NONE) { + if (!ropts) + printf(", "); + printf("source pool %u", r->sourcepl); ropts = 0; } for (i = 0; i < PFTM_MAX; ++i) Index: sbin/pfctl/pfctl_parser.h =================================================================== RCS file: /cvs/src/sbin/pfctl/pfctl_parser.h,v retrieving revision 1.119 diff -u -p -r1.119 pfctl_parser.h --- sbin/pfctl/pfctl_parser.h 15 Jan 2024 07:23:32 -0000 1.119 +++ sbin/pfctl/pfctl_parser.h 3 Mar 2024 05:50:01 -0000 @@ -71,6 +71,19 @@ struct pfr_buffer; /* forward definition */ +struct pfctl_statepl { + struct pfioc_statepl ioc; + RBT_ENTRY(pfctl_statepl) entry; +}; + +RBT_HEAD(pfctl_statepls, pfctl_statepl); + +struct pfctl_sourcepl { + struct pfioc_sourcepl ioc; + RBT_ENTRY(pfctl_sourcepl) entry; +}; + +RBT_HEAD(pfctl_sourcepls, pfctl_sourcepl); struct pfctl { int dev; @@ -82,11 +95,13 @@ struct pfctl { int tdirty; /* kernel dirty */ #define PFCTL_ANCHOR_STACK_DEPTH 64 struct pf_anchor *astack[PFCTL_ANCHOR_STACK_DEPTH]; - struct pfioc_queue *pqueue; struct pfr_buffer *trans; struct pf_anchor *anchor, *alast; const char *ruleset; + struct pfctl_statepls statepls; + struct pfctl_sourcepls sourcepls; + /* 'set foo' options */ u_int32_t timeout[PFTM_MAX]; u_int32_t limit[PF_LIMIT_MAX]; @@ -221,6 +236,9 @@ int add_opt_table(struct pfctl *, st void pfctl_add_rule(struct pfctl *, struct pf_rule *); +int pfctl_add_statepl(struct pfctl *, struct pfctl_statepl *); +int pfctl_add_sourcepl(struct pfctl *, struct pfctl_sourcepl *); + int pfctl_set_timeout(struct pfctl *, const char *, int, int); int pfctl_set_reassembly(struct pfctl *, int, int); int pfctl_set_syncookies(struct pfctl *, u_int8_t, @@ -242,6 +260,8 @@ struct pfctl_qsitem * pfctl_find_queue(c void print_pool(struct pf_pool *, u_int16_t, u_int16_t, sa_family_t, int, int); void print_src_node(struct pf_src_node *, int); +void print_statepl(const struct pfioc_statepl *); +void print_sourcepl(const struct pfioc_sourcepl *); void print_rule(struct pf_rule *, const char *, int); void print_tabledef(const char *, int, int, struct node_tinithead *); void print_status(struct pf_status *, struct pfctl_watermarks *, int); Index: sys/arch/amd64/include/cpu_full.h =================================================================== RCS file: /cvs/src/sys/arch/amd64/include/cpu_full.h,v retrieving revision 1.5 diff -u -p -r1.5 cpu_full.h --- sys/arch/amd64/include/cpu_full.h 17 May 2019 19:07:47 -0000 1.5 +++ sys/arch/amd64/include/cpu_full.h 3 Mar 2024 05:50:02 -0000 @@ -58,6 +58,7 @@ CTASSERT(_ALIGN(sizeof(struct x86_64_tss /* verify expected alignment */ CTASSERT(offsetof(struct cpu_info_full, cif_cpu.ci_PAGEALIGN) % PAGE_SIZE == 0); +CTASSERT(offsetof(struct cpu_info_full, cif_cpu.ci_mds_tmp) % 32 == 0); /* verify total size is multiple of page size */ CTASSERT(sizeof(struct cpu_info_full) % PAGE_SIZE == 0); Index: sys/dev/kstat.c =================================================================== RCS file: /cvs/src/sys/dev/kstat.c,v retrieving revision 1.2 diff -u -p -r1.2 kstat.c --- sys/dev/kstat.c 31 Jan 2022 05:09:17 -0000 1.2 +++ sys/dev/kstat.c 3 Mar 2024 05:50:05 -0000 @@ -22,6 +22,7 @@ #include #include #include +#include /* for kstat_set_cpu */ #include @@ -162,10 +163,16 @@ struct rwlock kstat_default_lock = RWLO int kstat_read(struct kstat *); int kstat_copy(struct kstat *, void *); +static void kstat_attach_late(void *); + +static struct task kstat_attach_task = + TASK_INITIALIZER(kstat_attach_late, NULL); + int kstatattach(int num) { /* XXX install system stats here */ + task_add(systq, &kstat_attach_task); return (0); } @@ -698,4 +705,77 @@ kstat_kv_unit_init(struct kstat_kv *kv, strlcpy(kv->kv_key, name, sizeof(kv->kv_key)); /* XXX truncated? */ kv->kv_type = type; kv->kv_unit = unit; +} + +/* + * common system level kstats + */ + +static const char *kstat_cpustate_names[CPUSTATES] = { + [CP_USER] = "user", + [CP_NICE] = "nice", + [CP_SYS] = "sys", + [CP_SPIN] = "spin", + [CP_INTR] = "intr", + [CP_IDLE] = "idle", +}; + +static int +kstat_cpustates_read(struct kstat *ks) +{ + struct cpu_info *ci = ks->ks_softc; + struct kstat_kv *kvs = ks->ks_data; + struct schedstate_percpu *spc = &ci->ci_schedstate; + unsigned int gen; + size_t i; + + pc_cons_enter(&spc->spc_cp_time_lock, &gen); + do { + for (i = 0; i < CPUSTATES; i++) + kstat_kv_u64(&kvs[i]) = spc->spc_cp_time[i]; + } while (pc_cons_leave(&spc->spc_cp_time_lock, &gen) != 0); + + getnanouptime(&ks->ks_updated); + + return (0); +} + +static void +kstat_cpustates_attach(struct cpu_info *ci) +{ + struct kstat *ks; + struct kstat_kv *kvs; + size_t i; + + ks = kstat_create(ci->ci_dev->dv_xname, 0, "cpustates", 0, + KSTAT_T_KV, 0); + if (ks == NULL) { + /* printf oh well */ + return; + } + + kvs = mallocarray(CPUSTATES, sizeof(*kvs), M_DEVBUF, + M_WAITOK | M_ZERO); + + for (i = 0; i < CPUSTATES; i++) { + kstat_kv_init(&kvs[i], kstat_cpustate_names[i], + KSTAT_KV_T_COUNTER64); + } + + ks->ks_softc = ci; + ks->ks_data = kvs; + ks->ks_datalen = CPUSTATES * sizeof(struct kstat_kv); + ks->ks_read = kstat_cpustates_read; + + kstat_install(ks); +} + +static void +kstat_attach_late(void *null) +{ + CPU_INFO_ITERATOR cii; + struct cpu_info *ci; + + CPU_INFO_FOREACH(cii, ci) + kstat_cpustates_attach(ci); } Index: sys/dev/ic/qwx.c =================================================================== RCS file: /cvs/src/sys/dev/ic/qwx.c,v retrieving revision 1.48 diff -u -p -r1.48 qwx.c --- sys/dev/ic/qwx.c 20 Feb 2024 11:48:19 -0000 1.48 +++ sys/dev/ic/qwx.c 3 Mar 2024 05:50:05 -0000 @@ -23634,29 +23634,28 @@ qwx_dp_tx(struct qwx_softc *sc, struct q goto fail_remove_idr; } #endif + ret = bus_dmamap_load_mbuf(sc->sc_dmat, tx_data->map, m, BUS_DMA_WRITE | BUS_DMA_NOWAIT); - if (ret && ret != EFBIG) { + switch (ret) { + case 0: + break; + case EFBIG: + ret = m_defrag(m, M_DONTWAIT); + if (ret == 0) { + ret = bus_dmamap_load_mbuf(sc->sc_dmat, tx_data->map, + m, BUS_DMA_WRITE | BUS_DMA_NOWAIT); + if (ret == 0) + break; + } + /* FALLTHROUGH */ + break; + default: printf("%s: failed to map Tx buffer: %d\n", sc->sc_dev.dv_xname, ret); m_freem(m); - return ret; - } - if (ret) { - /* Too many DMA segments, linearize mbuf. */ - if (m_defrag(m, M_DONTWAIT)) { - m_freem(m); - return ENOBUFS; - } - ret = bus_dmamap_load_mbuf(sc->sc_dmat, tx_data->map, m, - BUS_DMA_NOWAIT | BUS_DMA_WRITE); - if (ret) { - printf("%s: failed to map Tx buffer: %d\n", - sc->sc_dev.dv_xname, ret); - m_freem(m); - return ret; - } - } + return (ret); + } ti.paddr = tx_data->map->dm_segs[0].ds_addr; ti.data_len = m->m_pkthdr.len; Index: sys/dev/pv/vmt.c =================================================================== RCS file: /cvs/src/sys/dev/pv/vmt.c,v retrieving revision 1.31 diff -u -p -r1.31 vmt.c --- sys/dev/pv/vmt.c 26 Sep 2023 08:30:13 -0000 1.31 +++ sys/dev/pv/vmt.c 3 Mar 2024 05:50:08 -0000 @@ -892,6 +892,8 @@ vmt_tclo_capreg(struct vmt_softc *sc) " response\n", DEVNAME(sc)); sc->sc_rpc_error = 1; } + + printf("%s: %s\n", DEVNAME(sc), __func__); } void Index: sys/kern/kern_clock.c =================================================================== RCS file: /cvs/src/sys/kern/kern_clock.c,v retrieving revision 1.123 diff -u -p -r1.123 kern_clock.c --- sys/kern/kern_clock.c 12 Feb 2024 22:07:33 -0000 1.123 +++ sys/kern/kern_clock.c 3 Mar 2024 05:50:08 -0000 @@ -267,6 +267,8 @@ statclock(struct clockrequest *cr, void struct schedstate_percpu *spc = &ci->ci_schedstate; struct proc *p = curproc; struct process *pr; + int cp; + unsigned int gen; if (statclock_is_randomized) { count = clockrequest_advance_random(cr, statclock_min, @@ -282,10 +284,7 @@ statclock(struct clockrequest *cr, void * If this process is being profiled record the tick. */ p->p_uticks += count; - if (pr->ps_nice > NZERO) - spc->spc_cp_time[CP_NICE] += count; - else - spc->spc_cp_time[CP_USER] += count; + cp = (pr->ps_nice > NZERO) ? CP_NICE : CP_USER; } else { /* * Came from kernel mode, so we were: @@ -303,16 +302,20 @@ statclock(struct clockrequest *cr, void if (CLKF_INTR(frame)) { if (p != NULL) p->p_iticks += count; - spc->spc_cp_time[spc->spc_spinning ? - CP_SPIN : CP_INTR] += count; + cp = CP_INTR; } else if (p != NULL && p != spc->spc_idleproc) { p->p_sticks += count; - spc->spc_cp_time[spc->spc_spinning ? - CP_SPIN : CP_SYS] += count; + cp = CP_SYS; } else - spc->spc_cp_time[spc->spc_spinning ? - CP_SPIN : CP_IDLE] += count; + cp = CP_IDLE; + + if (spc->spc_spinning) + cp = CP_SPIN; } + + gen = pc_sprod_enter(&spc->spc_cp_time_lock); + spc->spc_cp_time[cp] += count; + pc_sprod_leave(&spc->spc_cp_time_lock, gen); if (p != NULL) { p->p_cpticks += count; Index: sys/kern/kern_lock.c =================================================================== RCS file: /cvs/src/sys/kern/kern_lock.c,v retrieving revision 1.72 diff -u -p -r1.72 kern_lock.c --- sys/kern/kern_lock.c 26 Apr 2022 15:31:14 -0000 1.72 +++ sys/kern/kern_lock.c 3 Mar 2024 05:50:08 -0000 @@ -24,6 +24,7 @@ #include #include #include +#include #include @@ -443,3 +444,95 @@ _mtx_init_flags(struct mutex *m, int ipl _mtx_init(m, ipl); } #endif /* WITNESS */ + +void +pc_lock_init(struct pc_lock *pcl) +{ + pcl->pcl_gen = 0; +} + +unsigned int +pc_mprod_enter(struct pc_lock *pcl) +{ + unsigned int gen, ngen, ogen; + + gen = pcl->pcl_gen; + for (;;) { + while (gen & 1) { + CPU_BUSY_CYCLE(); + gen = pcl->pcl_gen; + } + + ngen = 1 + gen; + ogen = atomic_cas_uint(&pcl->pcl_gen, gen, ngen); + if (gen == ogen) + break; + + CPU_BUSY_CYCLE(); + gen = ogen; + } + + membar_enter_after_atomic(); + return (ngen); +} + +void +pc_mprod_leave(struct pc_lock *pcl, unsigned int gen) +{ + membar_exit(); + pcl->pcl_gen = ++gen; +} + +unsigned int +pc_sprod_enter(struct pc_lock *pcl) +{ + unsigned int gen; + + gen = pcl->pcl_gen; + pcl->pcl_gen = ++gen; + membar_producer(); + + return (gen); +} + +void +pc_sprod_leave(struct pc_lock *pcl, unsigned int gen) +{ + membar_producer(); + pcl->pcl_gen = ++gen; +} + +void +pc_cons_enter(struct pc_lock *pcl, unsigned int *genp) +{ + unsigned int gen; + + gen = pcl->pcl_gen; + while (gen & 1) { + CPU_BUSY_CYCLE(); + gen = pcl->pcl_gen; + } + + membar_consumer(); + *genp = gen; +} + +int +pc_cons_leave(struct pc_lock *pcl, unsigned int *genp) +{ + unsigned int gen; + + membar_consumer(); + + gen = pcl->pcl_gen; + if (gen & 1) { + do { + CPU_BUSY_CYCLE(); + gen = pcl->pcl_gen; + } while (gen & 1); + } else if (gen == *genp) + return (0); + + *genp = gen; + return (EBUSY); +} Index: sys/kern/kern_sched.c =================================================================== RCS file: /cvs/src/sys/kern/kern_sched.c,v retrieving revision 1.94 diff -u -p -r1.94 kern_sched.c --- sys/kern/kern_sched.c 24 Jan 2024 19:23:38 -0000 1.94 +++ sys/kern/kern_sched.c 3 Mar 2024 05:50:08 -0000 @@ -83,6 +83,8 @@ sched_init_cpu(struct cpu_info *ci) struct schedstate_percpu *spc = &ci->ci_schedstate; int i; + pc_lock_init(&spc->spc_cp_time_lock); + for (i = 0; i < SCHED_NQS; i++) TAILQ_INIT(&spc->spc_qs[i]); Index: sys/net/if.c =================================================================== RCS file: /cvs/src/sys/net/if.c,v retrieving revision 1.718 diff -u -p -r1.718 if.c --- sys/net/if.c 6 Feb 2024 00:18:53 -0000 1.718 +++ sys/net/if.c 3 Mar 2024 05:50:09 -0000 @@ -70,6 +70,7 @@ #include "ppp.h" #include "pppoe.h" #include "if_wg.h" +#include "kstat.h" #include #include @@ -138,6 +139,10 @@ #include #endif +#if NKSTAT > 0 +#include +#endif + #include void if_attachsetup(struct ifnet *); @@ -179,6 +184,11 @@ void ifa_print_all(void); void if_qstart_compat(struct ifqueue *); +#if NKSTAT > 0 +static void if_kstat_attach(struct ifnet *); +static void if_kstat_detach(struct ifnet *); +#endif + /* * interface index map * @@ -500,6 +510,10 @@ if_attachsetup(struct ifnet *ifp) /* Announce the interface. */ rtm_ifannounce(ifp, IFAN_ARRIVAL); + +#if NKSTAT > 0 + if_kstat_attach(ifp); +#endif } /* @@ -1193,6 +1207,10 @@ if_detach(struct ifnet *ifp) #endif NET_LOCK(); +#if NKSTAT > 0 + if_kstat_detach(ifp); +#endif + s = splnet(); ifp->if_ioctl = if_detached_ioctl; ifp->if_watchdog = NULL; @@ -1793,6 +1811,7 @@ if_linkstate(struct ifnet *ifp) rt_if_track(ifp); } + ifp->if_nlinkstatech++; if_hooks_run(&ifp->if_linkstatehooks); } @@ -2828,6 +2847,130 @@ if_getdata(struct ifnet *ifp, struct if_ ifiq_add_data(ifiq, data); } } + +#if NKSTAT +struct if_kstat_data { + struct kstat_kv kd_up; + struct kstat_kv kd_link; + struct kstat_kv kd_nlinkch; + struct kstat_kv kd_baudrate; + struct kstat_kv kd_ibytes; + struct kstat_kv kd_ipackets; + struct kstat_kv kd_ierrors; + struct kstat_kv kd_iqdrops; + struct kstat_kv kd_obytes; + struct kstat_kv kd_opackets; + struct kstat_kv kd_oerrors; + struct kstat_kv kd_oqdrops; +}; + +static const struct if_kstat_data if_kstat_data_template = { + KSTAT_KV_INITIALIZER("up", KSTAT_KV_T_BOOL), + KSTAT_KV_INITIALIZER("link", KSTAT_KV_T_BOOL), + KSTAT_KV_INITIALIZER("link-changes", KSTAT_KV_T_COUNTER32), + KSTAT_KV_UNIT_INITIALIZER("baudrate", + KSTAT_KV_T_UINT64, KSTAT_KV_U_NONE), + + KSTAT_KV_UNIT_INITIALIZER("ibytes", + KSTAT_KV_T_COUNTER64, KSTAT_KV_U_BYTES), + KSTAT_KV_UNIT_INITIALIZER("ipackets", + KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS), + KSTAT_KV_UNIT_INITIALIZER("ierrors", + KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS), + KSTAT_KV_UNIT_INITIALIZER("iqdrops", + KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS), + + KSTAT_KV_UNIT_INITIALIZER("obytes", + KSTAT_KV_T_COUNTER64, KSTAT_KV_U_BYTES), + KSTAT_KV_UNIT_INITIALIZER("opackets", + KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS), + KSTAT_KV_UNIT_INITIALIZER("oerrors", + KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS), + KSTAT_KV_UNIT_INITIALIZER("oqdrops", + KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS), +}; + +static int if_kstat_read(struct kstat *); + +static void +if_kstat_attach(struct ifnet *ifp) +{ + struct kstat *ks; + struct if_kstat_data *kd; + + kd = malloc(sizeof(*kd), M_DEVBUF, M_WAITOK|M_CANFAIL|M_ZERO); + if (kd == NULL) + return; + + ks = kstat_create(ifp->if_xname, 0, "ifstat", 0, KSTAT_T_KV, 0); + if (ks == NULL) { + free(kd, M_DEVBUF, sizeof(*kd)); + return; + } + + *kd = if_kstat_data_template; + + /* which lock? */ + ks->ks_softc = ifp; + ks->ks_data = kd; + ks->ks_datalen = sizeof(*kd); + ks->ks_read = if_kstat_read; + + ifp->if_kstat = ks; + + kstat_install(ks); +} + +static int +if_kstat_read(struct kstat *ks) +{ + struct ifnet *ifp = ks->ks_softc; + struct if_kstat_data *kd = ks->ks_data; + struct if_data data; + + memset(&data, 0, sizeof(data)); + + NET_LOCK_SHARED(); + if_getdata(ifp, &data); + + kstat_kv_bool(&kd->kd_up) = !!ISSET(ifp->if_flags, IFF_RUNNING); + kstat_kv_bool(&kd->kd_link) = LINK_STATE_IS_UP(ifp->if_link_state); + kstat_kv_u32(&kd->kd_nlinkch) = ifp->if_nlinkstatech; + NET_UNLOCK_SHARED(); + + nanouptime(&ks->ks_updated); + + kstat_kv_u64(&kd->kd_baudrate) = data.ifi_baudrate; + + kstat_kv_u64(&kd->kd_ibytes) = data.ifi_ibytes; + kstat_kv_u64(&kd->kd_ipackets) = data.ifi_ipackets; + kstat_kv_u64(&kd->kd_ierrors) = data.ifi_ierrors; + kstat_kv_u64(&kd->kd_iqdrops) = data.ifi_iqdrops; + + kstat_kv_u64(&kd->kd_obytes) = data.ifi_obytes; + kstat_kv_u64(&kd->kd_opackets) = data.ifi_opackets; + kstat_kv_u64(&kd->kd_oerrors) = data.ifi_oerrors; + kstat_kv_u64(&kd->kd_oqdrops) = data.ifi_oqdrops; + + return (0); +} + +static void +if_kstat_detach(struct ifnet *ifp) +{ + struct kstat *ks = ifp->if_kstat; + struct if_kstat_data *kd; + + if (ks == NULL) + return; + + kstat_remove(ks); + kd = ks->ks_data; + kstat_destroy(ks); + + free(kd, M_DEVBUF, sizeof(*kd)); +} +#endif /* * Dummy functions replaced in ifnet during detach (if protocols decide to Index: sys/net/if_var.h =================================================================== RCS file: /cvs/src/sys/net/if_var.h,v retrieving revision 1.132 diff -u -p -r1.132 if_var.h --- sys/net/if_var.h 23 Dec 2023 10:52:54 -0000 1.132 +++ sys/net/if_var.h 3 Mar 2024 05:50:09 -0000 @@ -89,6 +89,7 @@ struct rtentry; struct ifnet; struct task; struct cpumem; +struct kstat; /* * Structure describing a `cloning' interface. @@ -150,6 +151,7 @@ struct ifnet { /* and the entries */ int if_xflags; /* [N] extra softnet flags */ struct if_data if_data; /* stats and other data about if */ struct cpumem *if_counters; /* per cpu stats */ + uint32_t if_nlinkstatech; /* [N] number of link changes */ uint32_t if_hardmtu; /* [d] maximum MTU device supports */ char if_description[IFDESCRSIZE]; /* [c] interface description */ u_short if_rtlabelid; /* [c] next route label */ @@ -183,6 +185,7 @@ struct ifnet { /* and the entries */ struct ifiqueue if_rcv; /* rx/input queue */ struct ifiqueue **if_iqs; /* [I] pointer to the array of iqs */ unsigned int if_niqs; /* [I] number of input queues */ + struct kstat *if_kstat; struct sockaddr_dl *if_sadl; /* [N] pointer to our sockaddr_dl */ Index: sys/net/pf.c =================================================================== RCS file: /cvs/src/sys/net/pf.c,v retrieving revision 1.1193 diff -u -p -r1.1193 pf.c --- sys/net/pf.c 10 Jan 2024 16:44:30 -0000 1.1193 +++ sys/net/pf.c 3 Mar 2024 05:50:09 -0000 @@ -147,11 +147,15 @@ struct pf_test_ctx { struct pf_ruleset *arsm; struct pf_ruleset *aruleset; struct tcphdr *th; + struct pf_statepl *statepl; + struct pf_source *source; }; struct pool pf_src_tree_pl, pf_rule_pl, pf_queue_pl; struct pool pf_state_pl, pf_state_key_pl, pf_state_item_pl; struct pool pf_rule_item_pl, pf_sn_item_pl, pf_pktdelay_pl; +struct pool pf_statepl_pl, pf_sourcepl_pl, pf_source_pl; +struct pool pf_state_link_pl; void pf_add_threshold(struct pf_threshold *); int pf_check_threshold(struct pf_threshold *); @@ -192,7 +196,8 @@ static __inline int pf_create_state(str struct pf_state_key **, struct pf_state_key **, int *, struct pf_state **, int, struct pf_rule_slist *, struct pf_rule_actions *, - struct pf_src_node **); + struct pf_src_node **, + struct pf_statepl *, struct pf_source *); static __inline int pf_state_key_addr_setup(struct pf_pdesc *, void *, int, struct pf_addr *, int, struct pf_addr *, int, int); @@ -202,6 +207,8 @@ int pf_tcp_track_full(struct pf_pdesc struct pf_state **, u_short *, int *, int); int pf_tcp_track_sloppy(struct pf_pdesc *, struct pf_state **, u_short *); +static __inline int pf_synproxy_ack(struct pf_rule *, struct pf_pdesc *, + struct pf_state **, struct pf_rule_actions *); static __inline int pf_synproxy(struct pf_pdesc *, struct pf_state **, u_short *); int pf_test_state(struct pf_pdesc *, struct pf_state **, @@ -307,6 +314,255 @@ static __inline void pf_cksum_cover(u_in #endif /* INET6 */ static __inline void pf_set_protostate(struct pf_state *, int, u_int8_t); +static inline int +pf_statepl_cmp(const struct pf_statepl *a, const struct pf_statepl *b) +{ + if (a->pfstpl_id > b->pfstpl_id) + return (1); + if (a->pfstpl_id < b->pfstpl_id) + return (-1); + + return (0); +} + +RBT_GENERATE(pf_statepl_tree, pf_statepl, pfstpl_tree, pf_statepl_cmp); + +struct pf_statepl_tree pf_statepl_tree_active = + RBT_INITIALIZER(pf_statepl_tree_active); +struct pf_statepl_list pf_statepl_list_active = + TAILQ_HEAD_INITIALIZER(pf_statepl_list_active); + +struct pf_statepl_tree pf_statepl_tree_inactive = + RBT_INITIALIZER(pf_statepl_tree_inactive); +struct pf_statepl_list pf_statepl_list_inactive = + TAILQ_HEAD_INITIALIZER(pf_statepl_list_inactive); + +static inline int +pf_sourcepl_cmp(const struct pf_sourcepl *a, const struct pf_sourcepl *b) +{ + if (a->pfsrpl_id > b->pfsrpl_id) + return (1); + if (a->pfsrpl_id < b->pfsrpl_id) + return (-1); + + return (0); +} + +RBT_GENERATE(pf_sourcepl_tree, pf_sourcepl, pfsrpl_tree, pf_sourcepl_cmp); + +static inline int +pf_source_cmp(const struct pf_source *a, const struct pf_source *b) +{ + if (a->pfsr_af > b->pfsr_af) + return (1); + if (a->pfsr_af < b->pfsr_af) + return (-1); + if (a->pfsr_rdomain > b->pfsr_rdomain) + return (1); + if (a->pfsr_rdomain < b->pfsr_rdomain) + return (-1); + + return (pf_addr_compare(&a->pfsr_addr, &b->pfsr_addr, a->pfsr_af)); +} + +RBT_GENERATE(pf_source_tree, pf_source, pfsr_tree, pf_source_cmp); + +static inline int +pf_source_ioc_cmp(const struct pf_source *a, const struct pf_source *b) +{ + size_t i; + + if (a->pfsr_af > b->pfsr_af) + return (1); + if (a->pfsr_af < b->pfsr_af) + return (-1); + if (a->pfsr_rdomain > b->pfsr_rdomain) + return (1); + if (a->pfsr_rdomain < b->pfsr_rdomain) + return (-1); + + for (i = 0; i < nitems(a->pfsr_addr.addr32); i++) { + uint32_t wa = ntohl(a->pfsr_addr.addr32[i]); + uint32_t wb = ntohl(b->pfsr_addr.addr32[i]); + + if (wa > wb) + return (1); + if (wa < wb) + return (-1); + } + + return (0); +} + +RBT_GENERATE(pf_source_ioc_tree, pf_source, pfsr_ioc_tree, pf_source_ioc_cmp); + +struct pf_sourcepl_tree pf_sourcepl_tree_active = + RBT_INITIALIZER(pf_sourcepl_tree_active); +struct pf_sourcepl_list pf_sourcepl_list_active = + TAILQ_HEAD_INITIALIZER(pf_sourcepl_list_active); + +struct pf_sourcepl_tree pf_sourcepl_tree_inactive = + RBT_INITIALIZER(pf_sourcepl_tree_inactive); +struct pf_sourcepl_list pf_sourcepl_list_inactive = + TAILQ_HEAD_INITIALIZER(pf_sourcepl_list_inactive); + +static inline struct pf_statepl * +pf_statepl_find(uint32_t id) +{ + struct pf_statepl key; + + /* only the id is used in cmp, so don't have to zero all the things */ + key.pfstpl_id = id; + + return (RBT_FIND(pf_statepl_tree, &pf_statepl_tree_active, &key)); +} + +static inline struct pf_sourcepl * +pf_sourcepl_find(uint32_t id) +{ + struct pf_sourcepl key; + + /* only the id is used in cmp, so don't have to zero all the things */ + key.pfsrpl_id = id; + + return (RBT_FIND(pf_sourcepl_tree, &pf_sourcepl_tree_active, &key)); +} + +struct pf_source_list pf_source_gc = TAILQ_HEAD_INITIALIZER(pf_source_gc); + +static void +pf_source_purge(void) +{ + struct pf_source *sr, *nsr; + time_t now = getuptime(); + + TAILQ_FOREACH_SAFE(sr, &pf_source_gc, pfsr_empty_gc, nsr) { + struct pf_sourcepl *srpl; + + if (now <= sr->pfsr_empty_ts + 1) + continue; + + TAILQ_REMOVE(&pf_source_gc, sr, pfsr_empty_gc); + + srpl = sr->pfsr_parent; + RBT_REMOVE(pf_source_tree, &srpl->pfsrpl_sources, sr); + RBT_REMOVE(pf_source_ioc_tree, &srpl->pfsrpl_ioc_sources, sr); + srpl->pfsrpl_nsources--; + + pool_put(&pf_source_pl, sr); + } +} + +static void +pf_source_pfr_addr(struct pfr_addr *p, const struct pf_source *sr) +{ + struct pf_sourcepl *srpl = sr->pfsr_parent; + + memset(p, 0, sizeof(*p)); + + p->pfra_af = sr->pfsr_af; + switch (sr->pfsr_af) { + case AF_INET: + p->pfra_net = srpl->pfsrpl_ipv4_prefix; + p->pfra_ip4addr = sr->pfsr_addr.v4; + break; +#ifdef INET6 + case AF_INET6: + p->pfra_net = srpl->pfsrpl_ipv6_prefix; + p->pfra_ip6addr = sr->pfsr_addr.v6; + break; +#endif /* INET6 */ + } +} + +static void +pf_source_used(struct pf_source *sr) +{ + struct pf_sourcepl *srpl = sr->pfsr_parent; + struct pfr_ktable *t; + unsigned int used; + + used = sr->pfsr_inuse++; + sr->pfsr_rate_ts += srpl->pfsrpl_rate_token; + + if (used == 0) + TAILQ_REMOVE(&pf_source_gc, sr, pfsr_empty_gc); + else if ((t = srpl->pfsrpl_overload.table) != NULL && + used >= srpl->pfsrpl_overload.hwm && !sr->pfsr_intable) { + struct pfr_addr p; + + pf_source_pfr_addr(&p, sr); + + pfr_insert_kentry(t, &p, gettime()); + sr->pfsr_intable = 1; + } +} + +static void +pf_source_rele(struct pf_source *sr) +{ + struct pf_sourcepl *srpl = sr->pfsr_parent; + struct pfr_ktable *t; + unsigned int used; + + used = --sr->pfsr_inuse; + + t = srpl->pfsrpl_overload.table; + if (t != NULL && sr->pfsr_intable && + used < srpl->pfsrpl_overload.lwm) { + struct pfr_addr p; + + pf_source_pfr_addr(&p, sr); + + pfr_remove_kentry(t, &p); + sr->pfsr_intable = 0; + } + + if (used == 0) { + TAILQ_INSERT_TAIL(&pf_source_gc, sr, pfsr_empty_gc); + sr->pfsr_empty_ts = getuptime() + srpl->pfsrpl_rate.seconds; + } +} + +static void +pf_source_key(struct pf_sourcepl *srpl, struct pf_source *key, + sa_family_t af, unsigned int rdomain, const struct pf_addr *addr) +{ + size_t i; + + /* only af+addr is used for lookup. */ + key->pfsr_af = af; + key->pfsr_rdomain = rdomain; + switch (af) { + case AF_INET: + key->pfsr_addr.addr32[0] = + srpl->pfsrpl_ipv4_mask.v4.s_addr & + addr->v4.s_addr; + + for (i = 1; i < nitems(key->pfsr_addr.addr32); i++) + key->pfsr_addr.addr32[i] = htonl(0); + break; +#ifdef INET6 + case AF_INET6: + for (i = 0; i < nitems(key->pfsr_addr.addr32); i++) { + key->pfsr_addr.addr32[i] = + srpl->pfsrpl_ipv6_mask.addr32[i] & + addr->addr32[i]; + } + break; +#endif + default: + unhandled_af(af); + /* NOTREACHED */ + } +} + +static inline struct pf_source * +pf_source_find(struct pf_sourcepl *srpl, const struct pf_source *key) +{ + return (RBT_FIND(pf_source_tree, &srpl->pfsrpl_sources, key)); +} + struct pf_src_tree tree_src_tracking; struct pf_state_tree_id tree_id; @@ -1672,6 +1928,7 @@ pf_purge(void *null) PF_LOCK(); pf_purge_expired_src_nodes(); + pf_source_purge(); PF_UNLOCK(); @@ -1774,6 +2031,8 @@ pf_src_tree_remove_state(struct pf_state void pf_remove_state(struct pf_state *st) { + struct pf_state_link *pfl, *npfl; + PF_ASSERT_LOCKED(); mtx_enter(&st->mtx); @@ -1800,6 +2059,63 @@ pf_remove_state(struct pf_state *st) if (st->key[PF_SK_STACK]->proto == IPPROTO_TCP) pf_set_protostate(st, PF_PEER_BOTH, TCPS_CLOSED); + SLIST_FOREACH_SAFE(pfl, &st->linkage, pfl_linkage, npfl) { + struct pf_state_link_list *list; + unsigned int gen; + + switch (pfl->pfl_type) { + case PF_STATE_LINK_TYPE_STATEPL: { + struct pf_statepl *stpl; + + stpl = pf_statepl_find(st->statepl); + KASSERTMSG(stpl != NULL, + "pf_state %p pfl %p cannot find statepl %u", + st, pfl, st->statepl); + + gen = pf_statepl_enter(stpl); + stpl->pfstpl_inuse--; + pf_statepl_leave(stpl, gen); + + list = &stpl->pfstpl_states; + break; + } + case PF_STATE_LINK_TYPE_SOURCEPL: { + struct pf_sourcepl *srpl; + struct pf_source key, *sr; + + srpl = pf_sourcepl_find(st->sourcepl); + KASSERTMSG(srpl != NULL, + "pf_state %p pfl %p cannot find sourcepl %u", + st, pfl, st->sourcepl); + + pf_source_key(srpl, &key, + st->key[PF_SK_WIRE]->af, + st->key[PF_SK_WIRE]->rdomain, + &st->key[PF_SK_WIRE]->addr[0 /* XXX or 1? */]); + + sr = pf_source_find(srpl, &key); + KASSERTMSG(sr != NULL, + "pf_state %p pfl %p cannot find source in %u", + st, pfl, st->sourcepl); + + gen = pf_sourcepl_enter(srpl); + srpl->pfsrpl_counters.inuse--; + pf_sourcepl_leave(srpl, gen); + pf_source_rele(sr); + + list = &sr->pfsr_states; + break; + } + default: + panic("%s: unexpected link type on pfl %p", + __func__, pfl); + } + + TAILQ_REMOVE(list, pfl, pfl_link); + pool_put(&pf_state_link_pl, pfl); + } + SLIST_INIT(&st->linkage); + RBT_REMOVE(pf_state_tree_id, &tree_id, st); #if NPFLOW > 0 if (st->state_flags & PFSTATE_PFLOW) @@ -4095,6 +4411,9 @@ pf_match_rule(struct pf_test_ctx *ctx, s enter_ruleset: r = TAILQ_FIRST(ruleset->rules.active.ptr); while (r != NULL) { + struct pf_statepl *stpl = NULL; + struct pf_source *sr = NULL; + PF_TEST_ATTRIB(r->rule_flag & PFRULE_EXPIRED, TAILQ_NEXT(r, entries)); r->evaluations++; @@ -4226,6 +4545,164 @@ enter_ruleset: ctx->pd->m->m_pkthdr.pf.prio), TAILQ_NEXT(r, entries)); + if (r->statepl != PF_STATEPL_ID_NONE) { + unsigned int gen; + + stpl = pf_statepl_find(r->statepl); + + /* + * Treat a missing pool like an overcommitted pool. + * There is no "backend" to get a resource out of + * so the rule can't create state. + */ + PF_TEST_ATTRIB(stpl == NULL, + TAILQ_NEXT(r, entries)); + + /* + * An overcommitted pool means this rule + * can't create state. + */ + if (stpl->pfstpl_inuse >= stpl->pfstpl_limit) { + gen = pf_statepl_enter(stpl); + stpl->pfstpl_counters.hardlimited++; + pf_statepl_leave(stpl, gen); + r = TAILQ_NEXT(r, entries); + continue; + } + + /* + * Is access to the pool rate limited? + */ + if (stpl->pfstpl_rate.limit != 0) { + uint64_t ts = getnsecuptime(); + uint64_t diff = ts - stpl->pfstpl_rate_ts; + + if (diff < stpl->pfstpl_rate_token) { + gen = pf_statepl_enter(stpl); + stpl->pfstpl_counters.ratelimited++; + pf_statepl_leave(stpl, gen); + r = TAILQ_NEXT(r, entries); + continue; + } + + if (diff > stpl->pfstpl_rate_bucket) { + stpl->pfstpl_rate_ts = + ts - stpl->pfstpl_rate_bucket; + } + + /* + * stpl->pfstpl_rate_ts += + * stpl->pfstpl_rate_token; + */ + } + + /* + * stpl->pfstpl_inuse++; + */ + } + + if (r->sourcepl != PF_SOURCEPL_ID_NONE) { + struct pf_source key; + struct pf_sourcepl *srpl = + pf_sourcepl_find(r->sourcepl); + unsigned int gen; + + /* + * Treat a missing pool like an overcommitted pool. + * There is no "backend" to get a resource out of + * so the rule can't create state. + */ + PF_TEST_ATTRIB(srpl == NULL, + TAILQ_NEXT(r, entries)); + + pf_source_key(srpl, &key, + ctx->pd->af, ctx->pd->rdomain, ctx->pd->src); + + sr = pf_source_find(srpl, &key); + if (sr == NULL) { + if (srpl->pfsrpl_nsources >= + srpl->pfsrpl_limit) { + gen = pf_sourcepl_enter(srpl); + srpl->pfsrpl_counters.addrlimited++; + pf_sourcepl_leave(srpl, gen); + r = TAILQ_NEXT(r, entries); + continue; + } + + sr = pool_get(&pf_source_pl, + PR_NOWAIT|PR_ZERO); + if (sr == NULL) { + gen = pf_sourcepl_enter(srpl); + srpl->pfsrpl_counters.addrnomem++; + pf_sourcepl_leave(srpl, gen); + REASON_SET(&ctx->reason, PFRES_MEMORY); + return (PF_TEST_FAIL); + } + + sr->pfsr_parent = srpl; + sr->pfsr_af = key.pfsr_af; + sr->pfsr_rdomain = key.pfsr_rdomain; + sr->pfsr_addr = key.pfsr_addr; + TAILQ_INIT(&sr->pfsr_states); + + if (RBT_INSERT(pf_source_tree, + &srpl->pfsrpl_sources, sr) != NULL) { + panic("%s: insert collision?!", + __func__); + } + + if (RBT_INSERT(pf_source_ioc_tree, + &srpl->pfsrpl_ioc_sources, sr) != NULL) { + panic("%s: insert collision?!", + __func__); + } + + sr->pfsr_empty_ts = getuptime(); + TAILQ_INSERT_TAIL(&pf_source_gc, sr, + pfsr_empty_gc); + + gen = pf_sourcepl_enter(srpl); + srpl->pfsrpl_nsources++; + srpl->pfsrpl_counters.addrallocs++; + pf_sourcepl_leave(srpl, gen); + } + + /* + * An overcommitted pool means this rule + * can't create state. + */ + if (sr->pfsr_inuse >= srpl->pfsrpl_states) { + sr->pfsr_counters.hardlimited++; + gen = pf_sourcepl_enter(srpl); + srpl->pfsrpl_counters.hardlimited++; + pf_sourcepl_leave(srpl, gen); + r = TAILQ_NEXT(r, entries); + continue; + } + + /* + * Is access to the pool rate limited? + */ + if (srpl->pfsrpl_rate.limit != 0) { + uint64_t ts = getnsecuptime(); + uint64_t diff = ts - sr->pfsr_rate_ts; + + if (diff < srpl->pfsrpl_rate_token) { + sr->pfsr_counters.ratelimited++; + gen = pf_sourcepl_enter(srpl); + srpl->pfsrpl_counters.ratelimited++; + pf_sourcepl_leave(srpl, gen); + r = TAILQ_NEXT(r, entries); + continue; + } + + if (diff > srpl->pfsrpl_rate_bucket) { + sr->pfsr_rate_ts = + ts - srpl->pfsrpl_rate_bucket; + } + } + } + /* must be last! */ if (r->pktrate.limit) { pf_add_threshold(&r->pktrate); @@ -4295,6 +4772,12 @@ enter_ruleset: * ruleset, where anchor belongs to. */ ctx->arsm = ctx->aruleset; + + /* + * state/source pools + */ + ctx->statepl = stpl; + ctx->source = sr; } #if NPFLOG > 0 @@ -4520,10 +5003,19 @@ pf_test_rule(struct pf_pdesc *pd, struct } action = pf_create_state(pd, r, a, ctx.nr, &skw, &sks, - &rewrite, sm, ctx.tag, &ctx.rules, &ctx.act, ctx.sns); + &rewrite, sm, ctx.tag, &ctx.rules, &ctx.act, ctx.sns, + ctx.statepl, ctx.source); if (action != PF_PASS) goto cleanup; + + if (pd->proto == IPPROTO_TCP && + r->keep_state == PF_STATE_SYNPROXY && pd->dir == PF_IN) { + action = pf_synproxy_ack(r, pd, sm, &ctx.act); + if (action != PF_PASS) + return (action); /* PF_SYNPROXY_DROP */ + } + if (sks != skw) { struct pf_state_key *sk; @@ -4587,11 +5079,12 @@ static __inline int pf_create_state(struct pf_pdesc *pd, struct pf_rule *r, struct pf_rule *a, struct pf_rule *nr, struct pf_state_key **skw, struct pf_state_key **sks, int *rewrite, struct pf_state **sm, int tag, struct pf_rule_slist *rules, - struct pf_rule_actions *act, struct pf_src_node *sns[PF_SN_MAX]) + struct pf_rule_actions *act, struct pf_src_node *sns[PF_SN_MAX], + struct pf_statepl *stpl, struct pf_source *sr) { struct pf_state *st = NULL; + struct pf_state_link *pfl; struct tcphdr *th = &pd->hdr.tcp; - u_int16_t mss = tcp_mssdflt; u_short reason; u_int i; @@ -4690,8 +5183,7 @@ pf_create_state(struct pf_pdesc *pd, str st->timeout = PFTM_OTHER_FIRST_PACKET; } - st->creation = getuptime(); - st->expire = getuptime(); + st->creation = st->expire = getuptime(); if (pd->proto == IPPROTO_TCP) { if (st->state_flags & PFSTATE_SCRUB_TCP && @@ -4734,6 +5226,57 @@ pf_create_state(struct pf_pdesc *pd, str sni->sn->states++; } + if (stpl != NULL) { + unsigned int gen; + + pfl = pool_get(&pf_state_link_pl, PR_NOWAIT); + if (pfl == NULL) { + REASON_SET(&reason, PFRES_MEMORY); + goto csfailed; + } + + gen = pf_statepl_enter(stpl); + stpl->pfstpl_counters.admitted++; + stpl->pfstpl_inuse++; + pf_statepl_leave(stpl, gen); + + stpl->pfstpl_rate_ts += stpl->pfstpl_rate_token; + + st->statepl = stpl->pfstpl_id; + pfl->pfl_state = st; + pfl->pfl_type = PF_STATE_LINK_TYPE_STATEPL; + + TAILQ_INSERT_TAIL(&stpl->pfstpl_states, pfl, pfl_link); + SLIST_INSERT_HEAD(&st->linkage, pfl, pfl_linkage); + } + + if (sr != NULL) { + struct pf_sourcepl *srpl = sr->pfsr_parent; + unsigned int gen; + + pfl = pool_get(&pf_state_link_pl, PR_NOWAIT); + if (pfl == NULL) { + REASON_SET(&reason, PFRES_MEMORY); + goto csfailed; + } + + pf_source_used(sr); + + sr->pfsr_counters.admitted++; + + gen = pf_sourcepl_enter(srpl); + srpl->pfsrpl_counters.inuse++; + srpl->pfsrpl_counters.admitted++; + pf_sourcepl_leave(srpl, gen); + + st->sourcepl = srpl->pfsrpl_id; + pfl->pfl_state = st; + pfl->pfl_type = PF_STATE_LINK_TYPE_SOURCEPL; + + TAILQ_INSERT_TAIL(&sr->pfsr_states, pfl, pfl_link); + SLIST_INSERT_HEAD(&st->linkage, pfl, pfl_linkage); + } + #if NPFSYNC > 0 pfsync_init_state(st, *skw, *sks, 0); #endif @@ -4756,29 +5299,52 @@ pf_create_state(struct pf_pdesc *pd, str pf_tag_ref(tag); st->tag = tag; } - if (pd->proto == IPPROTO_TCP && (th->th_flags & (TH_SYN|TH_ACK)) == - TH_SYN && r->keep_state == PF_STATE_SYNPROXY && pd->dir == PF_IN) { - int rtid = pd->rdomain; - if (act->rtableid >= 0) - rtid = act->rtableid; - pf_set_protostate(st, PF_PEER_SRC, PF_TCPS_PROXY_SRC); - st->src.seqhi = arc4random(); - /* Find mss option */ - mss = pf_get_mss(pd); - mss = pf_calc_mss(pd->src, pd->af, rtid, mss); - mss = pf_calc_mss(pd->dst, pd->af, rtid, mss); - st->src.mss = mss; - pf_send_tcp(r, pd->af, pd->dst, pd->src, th->th_dport, - th->th_sport, st->src.seqhi, ntohl(th->th_seq) + 1, - TH_SYN|TH_ACK, 0, st->src.mss, 0, 1, 0, pd->rdomain); - REASON_SET(&reason, PFRES_SYNPROXY); - return (PF_SYNPROXY_DROP); - } return (PF_PASS); csfailed: if (st) { + struct pf_state_link *npfl; + + SLIST_FOREACH_SAFE(pfl, &st->linkage, pfl_linkage, npfl) { + struct pf_state_link_list *list; + unsigned int gen; + + /* who needs KASSERTS when we have NULL derefs */ + + switch (pfl->pfl_type) { + case PF_STATE_LINK_TYPE_STATEPL: + gen = pf_statepl_enter(stpl); + stpl->pfstpl_inuse--; + pf_statepl_leave(stpl, gen); + + stpl->pfstpl_rate_ts -= + stpl->pfstpl_rate_token; + list = &stpl->pfstpl_states; + break; + case PF_STATE_LINK_TYPE_SOURCEPL: { + struct pf_sourcepl *srpl = sr->pfsr_parent; + + pf_source_rele(sr); + sr->pfsr_rate_ts -= + srpl->pfsrpl_rate_token; + + gen = pf_sourcepl_enter(srpl); + srpl->pfsrpl_counters.inuse--; + pf_sourcepl_leave(srpl, gen); + + list = &sr->pfsr_states; + break; + } + default: + panic("%s: unexpected link type on pfl %p", + __func__, pfl); + } + + TAILQ_REMOVE(list, pfl, pfl_link); + pool_put(&pf_state_link_pl, pfl); + } + pf_normalize_tcp_cleanup(st); /* safe even w/o init */ pf_src_tree_remove_state(st); pool_put(&pf_state_pl, st); @@ -5376,6 +5942,38 @@ pf_synproxy(struct pf_pdesc *pd, struct return (PF_PASS); } +static __inline int +pf_synproxy_ack(struct pf_rule *r, struct pf_pdesc *pd, struct pf_state **sm, + struct pf_rule_actions *act) +{ + struct tcphdr *th = &pd->hdr.tcp; + struct pf_state *s; + u_int16_t mss; + int rtid; + u_short reason; + + if ((th->th_flags & (TH_SYN|TH_ACK)) != TH_SYN) + return (PF_PASS); + + s = *sm; + rtid = (act->rtableid >= 0) ? act->rtableid : pd->rdomain; + + pf_set_protostate(s, PF_PEER_SRC, PF_TCPS_PROXY_SRC); + s->src.seqhi = arc4random(); + /* Find mss option */ + mss = pf_get_mss(pd); + mss = pf_calc_mss(pd->src, pd->af, rtid, mss); + mss = pf_calc_mss(pd->dst, pd->af, rtid, mss); + s->src.mss = mss; + + pf_send_tcp(r, pd->af, pd->dst, pd->src, th->th_dport, + th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1, + TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, 0, pd->rdomain); + + REASON_SET(&reason, PFRES_SYNPROXY); + return (PF_SYNPROXY_DROP); +} + int pf_test_state(struct pf_pdesc *pd, struct pf_state **stp, u_short *reason) { @@ -8302,6 +8900,8 @@ pf_state_unref(struct pf_state *st) pf_state_key_unref(st->key[PF_SK_WIRE]); pf_state_key_unref(st->key[PF_SK_STACK]); + + KASSERT(SLIST_EMPTY(&st->linkage)); pool_put(&pf_state_pl, st); } Index: sys/net/pf_ioctl.c =================================================================== RCS file: /cvs/src/sys/net/pf_ioctl.c,v retrieving revision 1.415 diff -u -p -r1.415 pf_ioctl.c --- sys/net/pf_ioctl.c 6 Jul 2023 04:55:05 -0000 1.415 +++ sys/net/pf_ioctl.c 3 Mar 2024 05:50:09 -0000 @@ -37,6 +37,7 @@ #include "pfsync.h" #include "pflog.h" +#include "kstat.h" #include #include @@ -87,6 +88,8 @@ #endif /* NPFSYNC > 0 */ struct pool pf_tag_pl; +extern struct pool pf_statepl_pl, pf_sourcepl_pl, pf_source_pl; +extern struct pool pf_state_link_pl; void pfattach(int); void pf_thread_create(void *); @@ -128,6 +131,14 @@ void pf_cleanup_tgetrule(struct pf_tr struct pf_rule pf_default_rule, pf_default_rule_new; +#if NKSTAT > 0 +static void pf_kstat_attach(void); +static void pf_statepl_kstat_attach(struct pf_statepl *); +static void pf_statepl_kstat_detach(struct pf_statepl *); +static void pf_sourcepl_kstat_attach(struct pf_sourcepl *); +static void pf_sourcepl_kstat_detach(struct pf_sourcepl *); +#endif /* NKSTAT > 0 */ + struct { char statusif[IFNAMSIZ]; u_int32_t debug; @@ -196,7 +207,7 @@ pfattach(int num) IPL_SOFTNET, 0, "pfsrctr", NULL); pool_init(&pf_sn_item_pl, sizeof(struct pf_sn_item), 0, IPL_SOFTNET, 0, "pfsnitem", NULL); - pool_init(&pf_state_pl, sizeof(struct pf_state), 0, + pool_init(&pf_state_pl, sizeof(struct pf_state), CACHELINESIZE, IPL_SOFTNET, 0, "pfstate", NULL); pool_init(&pf_state_key_pl, sizeof(struct pf_state_key), 0, IPL_SOFTNET, 0, "pfstkey", NULL); @@ -213,6 +224,15 @@ pfattach(int num) pool_init(&pf_anchor_pl, sizeof(struct pf_anchor), 0, IPL_SOFTNET, 0, "pfanchor", NULL); + pool_init(&pf_statepl_pl, sizeof(struct pf_statepl), 0, + IPL_SOFTNET, 0, "pfstpl", NULL); + pool_init(&pf_sourcepl_pl, sizeof(struct pf_sourcepl), 0, + IPL_SOFTNET, 0, "pfsrcpl", NULL); + pool_init(&pf_source_pl, sizeof(struct pf_source), 0, + IPL_SOFTNET, 0, "pfsrc", NULL); + pool_init(&pf_state_link_pl, sizeof(struct pf_state_link), 0, + IPL_SOFTNET, 0, "pfslink", NULL); + hfsc_initialize(); pfr_initialize(); pfi_initialize(); @@ -291,6 +311,10 @@ pfattach(int num) M_WAITOK|M_ZERO); CPUMEM_FOREACH(sf, &cmi, pf_anchor_stack) sf[PF_ANCHOR_STACK_MAX].sf_stack_top = &sf[0]; + +#if NKSTAT > 0 + pf_kstat_attach(); +#endif } int @@ -904,54 +928,947 @@ pf_calc_chksum(struct pf_ruleset *rs) struct pf_rule *rule; u_int8_t digest[PF_MD5_DIGEST_LENGTH]; - MD5Init(&ctx); + MD5Init(&ctx); + + if (rs->rules.inactive.rcount) { + TAILQ_FOREACH(rule, rs->rules.inactive.ptr, entries) { + pf_hash_rule(&ctx, rule); + } + } + + MD5Final(digest, &ctx); + memcpy(pf_status.pf_chksum, digest, sizeof(pf_status.pf_chksum)); +} + +int +pf_addr_setup(struct pf_ruleset *ruleset, struct pf_addr_wrap *addr, + sa_family_t af) +{ + if (pfi_dynaddr_setup(addr, af, PR_WAITOK) || + pf_tbladdr_setup(ruleset, addr, PR_WAITOK) || + pf_rtlabel_add(addr)) + return (EINVAL); + + return (0); +} + +struct pfi_kif * +pf_kif_setup(struct pfi_kif *kif_buf) +{ + struct pfi_kif *kif; + + if (kif_buf == NULL) + return (NULL); + + KASSERT(kif_buf->pfik_name[0] != '\0'); + + kif = pfi_kif_get(kif_buf->pfik_name, &kif_buf); + if (kif_buf != NULL) + pfi_kif_free(kif_buf); + pfi_kif_ref(kif, PFI_KIF_REF_RULE); + + return (kif); +} + +void +pf_addr_copyout(struct pf_addr_wrap *addr) +{ + pfi_dynaddr_copyout(addr); + pf_tbladdr_copyout(addr); + pf_rtlabel_copyout(addr); +} + +int +pf_statepl_add(const struct pfioc_statepl *ioc) +{ + struct pf_statepl *pfstpl; + int error; + size_t descrlen; + + if (ioc->id < PF_STATEPL_ID_MIN || + ioc->id > PF_STATEPL_ID_MAX) + return (EINVAL); + + if (ioc->limit < PF_STATEPL_LIMIT_MIN || + ioc->limit > PF_STATEPL_LIMIT_MAX) + return (EINVAL); + + if ((ioc->rate.limit == 0) != (ioc->rate.seconds == 0)) + return (EINVAL); + + /* XXX check rate */ + + descrlen = strnlen(ioc->description, sizeof(ioc->description)); + if (descrlen == sizeof(ioc->description)) + return (EINVAL); + + pfstpl = pool_get(&pf_statepl_pl, PR_WAITOK|PR_ZERO); + if (pfstpl == NULL) + return (ENOMEM); + + pfstpl->pfstpl_id = ioc->id; + pfstpl->pfstpl_limit = ioc->limit; + pfstpl->pfstpl_rate.limit = ioc->rate.limit; + pfstpl->pfstpl_rate.seconds = ioc->rate.seconds; + memcpy(pfstpl->pfstpl_descr, ioc->description, descrlen); + + if (pfstpl->pfstpl_rate.limit) { + uint64_t bucket = + pfstpl->pfstpl_rate.seconds * 1000000000ULL; + + pfstpl->pfstpl_rate_ts = getnsecuptime() - bucket; + pfstpl->pfstpl_rate_token = bucket / pfstpl->pfstpl_rate.limit; + pfstpl->pfstpl_rate_bucket = bucket; + } + + TAILQ_INIT(&pfstpl->pfstpl_states); + pc_lock_init(&pfstpl->pfstpl_lock); + + NET_LOCK(); + PF_LOCK(); + if (ioc->ticket != pf_main_ruleset.rules.inactive.version) { + error = EBUSY; + goto unlock; + } + + if (RBT_INSERT(pf_statepl_tree, + &pf_statepl_tree_inactive, pfstpl) != NULL) { + error = EBUSY; + goto unlock; + } + + TAILQ_INSERT_HEAD(&pf_statepl_list_inactive, pfstpl, pfstpl_list); + + PF_UNLOCK(); + NET_UNLOCK(); + + return (0); + +unlock: + PF_UNLOCK(); + NET_UNLOCK(); +/* free: */ + pool_put(&pf_statepl_pl, pfstpl); + + return (error); +} + +static void +pf_statepl_unlink(struct pf_statepl *pfstpl, + struct pf_state_link_list *garbage) +{ + struct pf_state_link *pfl; + + PF_STATE_ENTER_WRITE(); + + /* unwire the links */ + TAILQ_FOREACH(pfl, &pfstpl->pfstpl_states, pfl_link) { + struct pf_state *s = pfl->pfl_state; + + /* if !rmst */ + s->statepl = 0; + SLIST_REMOVE(&s->linkage, pfl, pf_state_link, pfl_linkage); + } + + /* take the list away */ + TAILQ_CONCAT(garbage, &pfstpl->pfstpl_states, pfl_link); + pfstpl->pfstpl_inuse = 0; + + PF_STATE_EXIT_WRITE(); +} + +int +pf_statepl_clr(uint32_t id, int rmst) +{ + struct pf_statepl key = { .pfstpl_id = id }; + struct pf_statepl *pfstpl; + int error = ESRCH; /* is this right? */ + struct pf_state_link_list garbage = TAILQ_HEAD_INITIALIZER(garbage); + struct pf_state_link *pfl, *npfl; + + if (rmst) + return (EOPNOTSUPP); + + NET_LOCK(); + PF_LOCK(); + pfstpl = RBT_FIND(pf_statepl_tree, &pf_statepl_tree_active, &key); + if (pfstpl != NULL) { + pf_statepl_unlink(pfstpl, &garbage); + error = 0; + } + PF_UNLOCK(); + NET_UNLOCK(); + + TAILQ_FOREACH_SAFE(pfl, &garbage, pfl_link, npfl) + pool_put(&pf_state_link_pl, pfl); + + return (error); +} + +void +pf_statepl_commit(void) +{ + struct pf_statepl *pfstpl, *npfstpl, *opfstpl; + struct pf_statepl_list l = TAILQ_HEAD_INITIALIZER(l); + struct pf_state_link_list garbage = TAILQ_HEAD_INITIALIZER(garbage); + struct pf_state_link *pfl, *npfl; + + PF_ASSERT_LOCKED(); + NET_ASSERT_LOCKED(); + + /* merge the new statepls into the current set */ + + /* start with an empty active list */ + TAILQ_CONCAT(&l, &pf_statepl_list_active, pfstpl_list); + + /* beware, the inactive bits gets messed up here */ + + /* try putting pending statepls into the active tree */ + TAILQ_FOREACH_SAFE(pfstpl, &pf_statepl_list_inactive, + pfstpl_list, npfstpl) { + opfstpl = RBT_INSERT(pf_statepl_tree, + &pf_statepl_tree_active, pfstpl); + if (opfstpl != NULL) { + /* this statepl already exists, merge */ + opfstpl->pfstpl_limit = + pfstpl->pfstpl_limit; + opfstpl->pfstpl_rate.limit = + pfstpl->pfstpl_rate.limit; + opfstpl->pfstpl_rate.seconds = + pfstpl->pfstpl_rate.seconds; + + opfstpl->pfstpl_rate_ts = + pfstpl->pfstpl_rate_ts; + opfstpl->pfstpl_rate_token = + pfstpl->pfstpl_rate_token; + opfstpl->pfstpl_rate_bucket = + pfstpl->pfstpl_rate_bucket; + + memcpy(opfstpl->pfstpl_descr, pfstpl->pfstpl_descr, + sizeof(opfstpl->pfstpl_descr)); + + /* use the existing statepl instead */ + pool_put(&pf_statepl_pl, pfstpl); + TAILQ_REMOVE(&l, opfstpl, pfstpl_list); + pfstpl = opfstpl; + } + + TAILQ_INSERT_TAIL(&pf_statepl_list_active, + pfstpl, pfstpl_list); + +#if NKSTAT > 0 + pf_statepl_kstat_attach(pfstpl); +#endif + } + + /* clean up the now unused statepls from the old set */ + TAILQ_FOREACH_SAFE(pfstpl, &l, pfstpl_list, npfstpl) { + pf_statepl_unlink(pfstpl, &garbage); + + RBT_REMOVE(pf_statepl_tree, + &pf_statepl_tree_active, pfstpl); + +#if NKSTAT > 0 + pf_statepl_kstat_detach(pfstpl); +#endif + pool_put(&pf_statepl_pl, pfstpl); + } + + /* fix up the inactive tree */ + RBT_INIT(pf_statepl_tree, &pf_statepl_tree_inactive); + TAILQ_INIT(&pf_statepl_list_inactive); + + TAILQ_FOREACH_SAFE(pfl, &garbage, pfl_link, npfl) + pool_put(&pf_state_link_pl, pfl); +} + +static void +pf_sourcepl_unlink(struct pf_sourcepl *pfsrpl, + struct pf_state_link_list *garbage) +{ + extern struct pf_source_list pf_source_gc; + struct pf_source *pfsr; + struct pf_state_link *pfl; + + PF_STATE_ENTER_WRITE(); + + while ((pfsr = RBT_ROOT(pf_source_tree, + &pfsrpl->pfsrpl_sources)) != NULL) { + RBT_REMOVE(pf_source_tree, + &pfsrpl->pfsrpl_sources, pfsr); + RBT_REMOVE(pf_source_ioc_tree, + &pfsrpl->pfsrpl_ioc_sources, pfsr); + if (pfsr->pfsr_inuse == 0) + TAILQ_REMOVE(&pf_source_gc, pfsr, pfsr_empty_gc); + + /* unwire the links */ + TAILQ_FOREACH(pfl, &pfsr->pfsr_states, pfl_link) { + struct pf_state *s = pfl->pfl_state; + + /* if !rmst */ + s->sourcepl = 0; + SLIST_REMOVE(&s->linkage, pfl, + pf_state_link, pfl_linkage); + } + + /* take the list away */ + TAILQ_CONCAT(garbage, &pfsr->pfsr_states, pfl_link); + + pool_put(&pf_source_pl, pfsr); + } + + PF_STATE_EXIT_WRITE(); +} + +int +pf_sourcepl_check(void) +{ + struct pf_sourcepl *pfsrpl, *npfsrpl; + + PF_ASSERT_LOCKED(); + NET_ASSERT_LOCKED(); + + /* check if we can merge */ + + TAILQ_FOREACH(pfsrpl, &pf_sourcepl_list_inactive, pfsrpl_list) { + npfsrpl = RBT_FIND(pf_sourcepl_tree, + &pf_sourcepl_tree_active, pfsrpl); + + /* new config, no conflict */ + if (npfsrpl == NULL) + continue; + + /* nothing is tracked at the moment, no conflict */ + if (RBT_EMPTY(pf_source_tree, &npfsrpl->pfsrpl_sources)) + continue; + + if (strcmp(npfsrpl->pfsrpl_overload.name, + pfsrpl->pfsrpl_overload.name) != 0) + return (EBUSY); + + /* + * we should allow the prefixlens to get shorter + * and merge pf_source entries. + */ + + if ((npfsrpl->pfsrpl_ipv4_prefix != + pfsrpl->pfsrpl_ipv4_prefix) || + (npfsrpl->pfsrpl_ipv6_prefix != + pfsrpl->pfsrpl_ipv6_prefix)) + return (EBUSY); + } + + return (0); +} + +void +pf_sourcepl_commit(void) +{ + struct pf_sourcepl *pfsrpl, *npfsrpl, *opfsrpl; + struct pf_sourcepl_list l = TAILQ_HEAD_INITIALIZER(l); + struct pf_state_link_list garbage = TAILQ_HEAD_INITIALIZER(garbage); + struct pf_state_link *pfl, *npfl; + + PF_ASSERT_LOCKED(); + NET_ASSERT_LOCKED(); + + /* merge the new sourcepls into the current set */ + + /* start with an empty active list */ + TAILQ_CONCAT(&l, &pf_sourcepl_list_active, pfsrpl_list); + + /* beware, the inactive bits gets messed up here */ + + /* try putting pending sourcepls into the active tree */ + TAILQ_FOREACH_SAFE(pfsrpl, &pf_sourcepl_list_inactive, + pfsrpl_list, npfsrpl) { + opfsrpl = RBT_INSERT(pf_sourcepl_tree, + &pf_sourcepl_tree_active, pfsrpl); + if (opfsrpl != NULL) { + /* this sourcepl already exists, merge */ + opfsrpl->pfsrpl_limit = + pfsrpl->pfsrpl_limit; + opfsrpl->pfsrpl_states = + pfsrpl->pfsrpl_states; + opfsrpl->pfsrpl_ipv4_prefix = + pfsrpl->pfsrpl_ipv4_prefix; + opfsrpl->pfsrpl_ipv6_prefix = + pfsrpl->pfsrpl_ipv6_prefix; + opfsrpl->pfsrpl_rate.limit = + pfsrpl->pfsrpl_rate.limit; + opfsrpl->pfsrpl_rate.seconds = + pfsrpl->pfsrpl_rate.seconds; + + opfsrpl->pfsrpl_ipv4_mask = + pfsrpl->pfsrpl_ipv4_mask; + opfsrpl->pfsrpl_ipv6_mask = + pfsrpl->pfsrpl_ipv6_mask; + +#if 0 + opfstpl->pfstpl_rate_ts = + pfstpl->pfstpl_rate_ts; +#endif + opfsrpl->pfsrpl_rate_token = + pfsrpl->pfsrpl_rate_token; + opfsrpl->pfsrpl_rate_bucket = + pfsrpl->pfsrpl_rate_bucket; + + if (opfsrpl->pfsrpl_overload.table != NULL) { + pfr_detach_table( + opfsrpl->pfsrpl_overload.table); + } + + strlcpy(opfsrpl->pfsrpl_overload.name, + pfsrpl->pfsrpl_overload.name, + sizeof(opfsrpl->pfsrpl_overload.name)); + opfsrpl->pfsrpl_overload.hwm = + pfsrpl->pfsrpl_overload.hwm; + opfsrpl->pfsrpl_overload.lwm = + pfsrpl->pfsrpl_overload.lwm; + opfsrpl->pfsrpl_overload.table = + pfsrpl->pfsrpl_overload.table, + + memcpy(opfsrpl->pfsrpl_descr, + pfsrpl->pfsrpl_descr, + sizeof(opfsrpl->pfsrpl_descr)); + + /* use the existing sourcepl instead */ + pool_put(&pf_sourcepl_pl, pfsrpl); + TAILQ_REMOVE(&l, opfsrpl, pfsrpl_list); + pfsrpl = opfsrpl; + } + + TAILQ_INSERT_TAIL(&pf_sourcepl_list_active, + pfsrpl, pfsrpl_list); + +#if NKSTAT > 0 + pf_sourcepl_kstat_attach(pfsrpl); +#endif + } + + /* clean up the now unused sourcepls from the old set */ + TAILQ_FOREACH_SAFE(pfsrpl, &l, pfsrpl_list, npfsrpl) { + pf_sourcepl_unlink(pfsrpl, &garbage); + + RBT_REMOVE(pf_sourcepl_tree, + &pf_sourcepl_tree_active, pfsrpl); + + if (pfsrpl->pfsrpl_overload.table != NULL) + pfr_detach_table(pfsrpl->pfsrpl_overload.table); + +#if NKSTAT > 0 + pf_sourcepl_kstat_detach(pfsrpl); +#endif + + pool_put(&pf_sourcepl_pl, pfsrpl); + } + + /* fix up the inactive tree */ + RBT_INIT(pf_sourcepl_tree, &pf_sourcepl_tree_inactive); + TAILQ_INIT(&pf_sourcepl_list_inactive); + + TAILQ_FOREACH_SAFE(pfl, &garbage, pfl_link, npfl) + pool_put(&pf_state_link_pl, pfl); +} + +void +pf_statepl_rollback(void) +{ + struct pf_statepl *pfstpl, *npfstpl; + + PF_ASSERT_LOCKED(); + NET_ASSERT_LOCKED(); + + TAILQ_FOREACH_SAFE(pfstpl, &pf_statepl_list_inactive, + pfstpl_list, npfstpl) + pool_put(&pf_statepl_pl, pfstpl); + + TAILQ_INIT(&pf_statepl_list_inactive); + RBT_INIT(pf_statepl_tree, &pf_statepl_tree_inactive); +} + +static struct pf_statepl * +pf_statepl_rb_find(struct pf_statepl_tree *tree, const struct pf_statepl *key) +{ + return (RBT_FIND(pf_statepl_tree, tree, key)); +} + +static struct pf_statepl * +pf_statepl_rb_nfind(struct pf_statepl_tree *tree, const struct pf_statepl *key) +{ + return (RBT_NFIND(pf_statepl_tree, tree, key)); +} + +int +pf_statepl_get(struct pfioc_statepl *ioc, + struct pf_statepl *(*rbt_op)(struct pf_statepl_tree *, + const struct pf_statepl *)) +{ + struct pf_statepl key = { .pfstpl_id = ioc->id }; + struct pf_statepl *pfstpl; + int error = 0; + + NET_LOCK(); + PF_LOCK(); +#if 0 + if (ioc->ticket != pf_main_ruleset.rules.active.ticket) { + error = EBUSY; + goto unlock; + } +#endif + + pfstpl = (*rbt_op)(&pf_statepl_tree_active, &key); + if (pfstpl == NULL) { + error = ENOENT; + goto unlock; + } + + ioc->id = pfstpl->pfstpl_id; + ioc->limit = pfstpl->pfstpl_limit; + ioc->rate.limit = pfstpl->pfstpl_rate.limit; + ioc->rate.seconds = pfstpl->pfstpl_rate.seconds; + CTASSERT(sizeof(ioc->description) == + sizeof(pfstpl->pfstpl_descr)); + memcpy(ioc->description, pfstpl->pfstpl_descr, + sizeof(ioc->description)); + + ioc->inuse = pfstpl->pfstpl_inuse; + ioc->admitted = pfstpl->pfstpl_counters.admitted; + ioc->hardlimited = pfstpl->pfstpl_counters.hardlimited; + ioc->ratelimited = pfstpl->pfstpl_counters.ratelimited; + +unlock: + PF_UNLOCK(); + NET_UNLOCK(); + + return (error); +} + +int +pf_sourcepl_add(const struct pfioc_sourcepl *ioc) +{ + struct pf_sourcepl *pfsrpl; + int error; + size_t descrlen, tablelen; + uint64_t product; + unsigned int prefix; + size_t i; + + if (ioc->id < PF_SOURCEPL_ID_MIN || + ioc->id > PF_SOURCEPL_ID_MAX) + return (EINVAL); + + if (ioc->limit < 1) + return (EINVAL); + + if (ioc->states < 1) + return (EINVAL); + + /* XXX does this make sense? */ + product = ioc->limit * ioc->states; + if (product > (1 << 24)) + return (EINVAL); + + if ((ioc->rate.limit == 0) != (ioc->rate.seconds == 0)) + return (EINVAL); + + if (ioc->inet_prefix > 32) + return (EINVAL); + if (ioc->inet6_prefix > 128) + return (EINVAL); + + /* XXX check rate */ + + descrlen = strnlen(ioc->description, sizeof(ioc->description)); + if (descrlen == sizeof(ioc->description)) + return (EINVAL); + + tablelen = strnlen(ioc->overload_tblname, + sizeof(ioc->overload_tblname)); + if (tablelen == sizeof(ioc->overload_tblname)) + return (EINVAL); + if (tablelen != 0) { + if (ioc->overload_hwm == 0) + return (EINVAL); + + /* + * this is stupid, but not harmful? + * + * if (ioc->states < ioc->overload_hwm) + * return (EINVAL); + */ + + if (ioc->overload_hwm < ioc->overload_lwm) + return (EINVAL); + } + + pfsrpl = pool_get(&pf_sourcepl_pl, PR_WAITOK|PR_ZERO); + if (pfsrpl == NULL) + return (ENOMEM); + + pfsrpl->pfsrpl_id = ioc->id; + pfsrpl->pfsrpl_limit = ioc->limit; + pfsrpl->pfsrpl_states = ioc->states; + pfsrpl->pfsrpl_ipv4_prefix = ioc->inet_prefix; + pfsrpl->pfsrpl_ipv6_prefix = ioc->inet6_prefix; + pfsrpl->pfsrpl_rate.limit = ioc->rate.limit; + pfsrpl->pfsrpl_rate.seconds = ioc->rate.seconds; + memcpy(pfsrpl->pfsrpl_overload.name, ioc->overload_tblname, tablelen); + pfsrpl->pfsrpl_overload.hwm = ioc->overload_hwm; + pfsrpl->pfsrpl_overload.lwm = ioc->overload_lwm; + memcpy(pfsrpl->pfsrpl_descr, ioc->description, descrlen); + + if (pfsrpl->pfsrpl_rate.limit) { + uint64_t bucket = pfsrpl->pfsrpl_rate.seconds * 1000000000ULL; + + pfsrpl->pfsrpl_rate_token = bucket / pfsrpl->pfsrpl_rate.limit; + pfsrpl->pfsrpl_rate_bucket = bucket; + } + + pfsrpl->pfsrpl_ipv4_mask.v4.s_addr = + htonl(0xffffffff << (32 - pfsrpl->pfsrpl_ipv4_prefix)); + + prefix = pfsrpl->pfsrpl_ipv6_prefix; + for (i = 0; i < nitems(pfsrpl->pfsrpl_ipv6_mask.addr32); i++) { + if (prefix == 0) { + /* the memory is already zeroed */ + break; + } + if (prefix < 32) { + pfsrpl->pfsrpl_ipv6_mask.addr32[i] = + htonl(0xffffffff << (32 - prefix)); + break; + } + + pfsrpl->pfsrpl_ipv6_mask.addr32[i] = htonl(0xffffffff); + prefix -= 32; + } + + RBT_INIT(pf_source_tree, &pfsrpl->pfsrpl_sources); + pc_lock_init(&pfsrpl->pfsrpl_lock); + + NET_LOCK(); + PF_LOCK(); + if (ioc->ticket != pf_main_ruleset.rules.inactive.version) { + error = EBUSY; + goto unlock; + } + + if (pfsrpl->pfsrpl_overload.name[0] != '\0') { + pfsrpl->pfsrpl_overload.table = pfr_attach_table( + &pf_main_ruleset, + pfsrpl->pfsrpl_overload.name, 0); + if (pfsrpl->pfsrpl_overload.table == NULL) { + error = EINVAL; + goto unlock; + } + } + + if (RBT_INSERT(pf_sourcepl_tree, + &pf_sourcepl_tree_inactive, pfsrpl) != NULL) { + error = EBUSY; + goto unlock; + } + + TAILQ_INSERT_HEAD(&pf_sourcepl_list_inactive, pfsrpl, pfsrpl_list); + + PF_UNLOCK(); + NET_UNLOCK(); + + return (0); + +unlock: + PF_UNLOCK(); + NET_UNLOCK(); +/* free: */ + pool_put(&pf_sourcepl_pl, pfsrpl); + + return (error); +} + +void +pf_sourcepl_rollback(void) +{ + struct pf_sourcepl *pfsrpl, *npfsrpl; + + PF_ASSERT_LOCKED(); + NET_ASSERT_LOCKED(); + + TAILQ_FOREACH_SAFE(pfsrpl, &pf_sourcepl_list_inactive, + pfsrpl_list, npfsrpl) { + if (pfsrpl->pfsrpl_overload.table != NULL) + pfr_detach_table(pfsrpl->pfsrpl_overload.table); + + pool_put(&pf_sourcepl_pl, pfsrpl); + } + + TAILQ_INIT(&pf_sourcepl_list_inactive); + RBT_INIT(pf_sourcepl_tree, &pf_sourcepl_tree_inactive); +} + +static struct pf_sourcepl * +pf_sourcepl_rb_find(struct pf_sourcepl_tree *tree, + const struct pf_sourcepl *key) +{ + return (RBT_FIND(pf_sourcepl_tree, tree, key)); +} + +static struct pf_sourcepl * +pf_sourcepl_rb_nfind(struct pf_sourcepl_tree *tree, + const struct pf_sourcepl *key) +{ + return (RBT_NFIND(pf_sourcepl_tree, tree, key)); +} + +int +pf_sourcepl_get(struct pfioc_sourcepl *ioc, + struct pf_sourcepl *(*rbt_op)(struct pf_sourcepl_tree *, + const struct pf_sourcepl *)) +{ + struct pf_sourcepl key = { .pfsrpl_id = ioc->id }; + struct pf_sourcepl *pfsrpl; + int error = 0; + + NET_LOCK(); + PF_LOCK(); +#if 0 + if (ioc->ticket != pf_main_ruleset.rules.active.ticket) { + error = EBUSY; + goto unlock; + } +#endif + + pfsrpl = (*rbt_op)(&pf_sourcepl_tree_active, &key); + if (pfsrpl == NULL) { + error = ESRCH; + goto unlock; + } + + ioc->id = pfsrpl->pfsrpl_id; + ioc->limit = pfsrpl->pfsrpl_limit; + ioc->states = pfsrpl->pfsrpl_states; + ioc->inet_prefix = pfsrpl->pfsrpl_ipv4_prefix; + ioc->inet6_prefix = pfsrpl->pfsrpl_ipv6_prefix; + ioc->rate.limit = pfsrpl->pfsrpl_rate.limit; + ioc->rate.seconds = pfsrpl->pfsrpl_rate.seconds; + + CTASSERT(sizeof(ioc->overload_tblname) == + sizeof(pfsrpl->pfsrpl_overload.name)); + memcpy(ioc->overload_tblname, pfsrpl->pfsrpl_overload.name, + sizeof(pfsrpl->pfsrpl_overload.name)); + ioc->overload_hwm = pfsrpl->pfsrpl_overload.hwm; + ioc->overload_lwm = pfsrpl->pfsrpl_overload.lwm; + + CTASSERT(sizeof(ioc->description) == + sizeof(pfsrpl->pfsrpl_descr)); + memcpy(ioc->description, pfsrpl->pfsrpl_descr, + sizeof(ioc->description)); + + /* XXX overload table thing */ + + ioc->nsources = pfsrpl->pfsrpl_nsources; + + ioc->inuse = pfsrpl->pfsrpl_counters.inuse; + ioc->addrallocs = pfsrpl->pfsrpl_counters.addrallocs; + ioc->addrnomem = pfsrpl->pfsrpl_counters.addrnomem; + ioc->admitted = pfsrpl->pfsrpl_counters.admitted; + ioc->addrlimited = pfsrpl->pfsrpl_counters.addrlimited; + ioc->hardlimited = pfsrpl->pfsrpl_counters.hardlimited; + ioc->ratelimited = pfsrpl->pfsrpl_counters.ratelimited; + +unlock: + PF_UNLOCK(); + NET_UNLOCK(); + + return (error); +} + +static struct pf_source * +pf_source_rb_find(struct pf_source_ioc_tree *tree, + const struct pf_source *key) +{ + return (RBT_FIND(pf_source_ioc_tree, tree, key)); +} + +static struct pf_source * +pf_source_rb_nfind(struct pf_source_ioc_tree *tree, + const struct pf_source *key) +{ + return (RBT_NFIND(pf_source_ioc_tree, tree, key)); +} + +int +pf_source_get(struct pfioc_source *ioc, + struct pf_source *(*rbt_op)(struct pf_source_ioc_tree *, + const struct pf_source *)) +{ + struct pf_sourcepl plkey = { .pfsrpl_id = ioc->id }; + struct pfioc_source_entry e, *uentry; + struct pf_source key; + struct pf_sourcepl *pfsrpl; + struct pf_source *pfsr; + size_t used = 0, len = ioc->entrieslen; + int error = 0; + + if (ioc->entry_size != sizeof(e)) + return (EINVAL); + if (len < sizeof(e)) + return (EMSGSIZE); + + error = copyin(ioc->key, &e, sizeof(e)); + if (error != 0) + return (error); + + NET_LOCK(); + PF_LOCK(); +#if 0 + if (ioc->ticket != pf_main_ruleset.rules.active.ticket) { + error = EBUSY; + goto unlock; + } +#endif + + pfsrpl = pf_sourcepl_rb_find(&pf_sourcepl_tree_active, &plkey); + if (pfsrpl == NULL) { + error = ESRCH; + goto unlock; + } + + key.pfsr_af = e.af; + key.pfsr_rdomain = e.rdomain; + key.pfsr_addr = e.addr; + pfsr = (*rbt_op)(&pfsrpl->pfsrpl_ioc_sources, &key); + if (pfsr == NULL) { + error = ENOENT; + goto unlock; + } + + memset(&e, 0, sizeof(e)); + + uentry = ioc->entries; + for (;;) { + e.af = pfsr->pfsr_af; + e.rdomain = pfsr->pfsr_rdomain; + e.addr = pfsr->pfsr_addr; + + e.inuse = pfsr->pfsr_inuse; + e.admitted = pfsr->pfsr_counters.admitted; + e.hardlimited = pfsr->pfsr_counters.hardlimited; + e.ratelimited = pfsr->pfsr_counters.ratelimited; - if (rs->rules.inactive.rcount) { - TAILQ_FOREACH(rule, rs->rules.inactive.ptr, entries) { - pf_hash_rule(&ctx, rule); + error = copyout(&e, uentry, sizeof(e)); + if (error != 0) + goto unlock; + + used += sizeof(e); + if (used == len) + break; + + pfsr = RBT_NEXT(pf_source_ioc_tree, pfsr); + if (pfsr == NULL) + break; + + if ((len - used) < sizeof(e)) { + error = EMSGSIZE; + goto unlock; } + + uentry++; } + KASSERT(error == 0); - MD5Final(digest, &ctx); - memcpy(pf_status.pf_chksum, digest, sizeof(pf_status.pf_chksum)); + ioc->inet_prefix = pfsrpl->pfsrpl_ipv4_prefix; + ioc->inet6_prefix = pfsrpl->pfsrpl_ipv6_prefix; + ioc->limit = pfsrpl->pfsrpl_states; + + ioc->entrieslen = used; + +unlock: + PF_UNLOCK(); + NET_UNLOCK(); + + return (error); } int -pf_addr_setup(struct pf_ruleset *ruleset, struct pf_addr_wrap *addr, - sa_family_t af) +pf_source_clr(struct pfioc_source_kill *ioc) { - if (pfi_dynaddr_setup(addr, af, PR_WAITOK) || - pf_tbladdr_setup(ruleset, addr, PR_WAITOK) || - pf_rtlabel_add(addr)) - return (EINVAL); + extern struct pf_source_list pf_source_gc; + struct pf_sourcepl plkey = { + .pfsrpl_id = ioc->id, + }; + struct pf_source skey = { + .pfsr_af = ioc->af, + .pfsr_rdomain = ioc->rdomain, + .pfsr_addr = ioc->addr, + }; + struct pf_sourcepl *pfsrpl; + struct pf_source *pfsr; + struct pf_state_link *pfl, *npfl; + int error = 0; + unsigned int gen; + + if (ioc->rmstates) { + /* XXX userland wants the states removed too */ + return (EOPNOTSUPP); + } - return (0); -} + NET_LOCK(); + PF_LOCK(); +#if 0 + if (ioc->ticket != pf_main_ruleset.rules.active.ticket) { + error = EBUSY; + goto unlock; + } +#endif -struct pfi_kif * -pf_kif_setup(struct pfi_kif *kif_buf) -{ - struct pfi_kif *kif; + pfsrpl = pf_sourcepl_rb_find(&pf_sourcepl_tree_active, &plkey); + if (pfsrpl == NULL) { + error = ESRCH; + goto unlock; + } - if (kif_buf == NULL) - return (NULL); + pfsr = pf_source_rb_find(&pfsrpl->pfsrpl_ioc_sources, &skey); + if (pfsr == NULL) { + error = ENOENT; + goto unlock; + } - KASSERT(kif_buf->pfik_name[0] != '\0'); + RBT_REMOVE(pf_source_tree, &pfsrpl->pfsrpl_sources, pfsr); + RBT_REMOVE(pf_source_ioc_tree, &pfsrpl->pfsrpl_ioc_sources, pfsr); + if (pfsr->pfsr_inuse == 0) + TAILQ_REMOVE(&pf_source_gc, pfsr, pfsr_empty_gc); + + gen = pf_sourcepl_enter(pfsrpl); + pfsrpl->pfsrpl_nsources--; + pfsrpl->pfsrpl_counters.inuse -= pfsr->pfsr_inuse; + pf_sourcepl_leave(pfsrpl, gen); + + /* unwire the links */ + TAILQ_FOREACH(pfl, &pfsr->pfsr_states, pfl_link) { + struct pf_state *st = pfl->pfl_state; + + /* if !rmst */ + st->sourcepl = 0; + SLIST_REMOVE(&st->linkage, pfl, + pf_state_link, pfl_linkage); + } - kif = pfi_kif_get(kif_buf->pfik_name, &kif_buf); - if (kif_buf != NULL) - pfi_kif_free(kif_buf); - pfi_kif_ref(kif, PFI_KIF_REF_RULE); + PF_UNLOCK(); + NET_UNLOCK(); - return (kif); -} + TAILQ_FOREACH_SAFE(pfl, &pfsr->pfsr_states, pfl_link, npfl) + pool_put(&pf_state_link_pl, pfl); -void -pf_addr_copyout(struct pf_addr_wrap *addr) -{ - pfi_dynaddr_copyout(addr); - pf_tbladdr_copyout(addr); - pf_rtlabel_copyout(addr); + pool_put(&pf_source_pl, pfsr); + + return (0); + +unlock: + PF_UNLOCK(); + NET_UNLOCK(); + + return (error); } int @@ -1090,6 +2007,12 @@ pfioctl(dev_t dev, u_long cmd, caddr_t a case DIOCGETSTATES: case DIOCGETTIMEOUT: case DIOCGETLIMIT: + case DIOCGETSTATEPL: + case DIOCGETNSTATEPL: + case DIOCGETSOURCEPL: + case DIOCGETNSOURCEPL: + case DIOCGETSOURCE: + case DIOCGETNSOURCE: case DIOCGETRULESETS: case DIOCGETRULESET: case DIOCGETQUEUES: @@ -1134,6 +2057,12 @@ pfioctl(dev_t dev, u_long cmd, caddr_t a case DIOCGETSTATES: case DIOCGETTIMEOUT: case DIOCGETLIMIT: + case DIOCGETSTATEPL: + case DIOCGETNSTATEPL: + case DIOCGETSOURCEPL: + case DIOCGETNSOURCEPL: + case DIOCGETSOURCE: + case DIOCGETNSOURCE: case DIOCGETRULESETS: case DIOCGETRULESET: case DIOCGETQUEUES: @@ -1357,6 +2286,42 @@ pfioctl(dev_t dev, u_long cmd, caddr_t a break; } + case DIOCADDSTATEPL: + error = pf_statepl_add((struct pfioc_statepl *)addr); + break; + case DIOCGETSTATEPL: + error = pf_statepl_get((struct pfioc_statepl *)addr, + pf_statepl_rb_find); + break; + case DIOCGETNSTATEPL: + error = pf_statepl_get((struct pfioc_statepl *)addr, + pf_statepl_rb_nfind); + break; + + case DIOCADDSOURCEPL: + error = pf_sourcepl_add((struct pfioc_sourcepl *)addr); + break; + case DIOCGETSOURCEPL: + error = pf_sourcepl_get((struct pfioc_sourcepl *)addr, + pf_sourcepl_rb_find); + break; + case DIOCGETNSOURCEPL: + error = pf_sourcepl_get((struct pfioc_sourcepl *)addr, + pf_sourcepl_rb_nfind); + break; + + case DIOCGETSOURCE: + error = pf_source_get((struct pfioc_source *)addr, + pf_source_rb_find); + break; + case DIOCGETNSOURCE: + error = pf_source_get((struct pfioc_source *)addr, + pf_source_rb_nfind); + break; + case DIOCCLRSOURCE: + error = pf_source_clr((struct pfioc_source_kill *)addr); + break; + case DIOCADDRULE: { struct pfioc_rule *pr = (struct pfioc_rule *)addr; struct pf_ruleset *ruleset; @@ -2683,6 +3648,14 @@ pfioctl(dev_t dev, u_long cmd, caddr_t a error = EBUSY; goto fail; } + error = pf_sourcepl_check(); + if (error != 0) { + PF_UNLOCK(); + NET_UNLOCK(); + free(table, M_TEMP, sizeof(*table)); + free(ioe, M_TEMP, sizeof(*ioe)); + goto fail; + } break; default: PF_UNLOCK(); @@ -2789,6 +3762,8 @@ pfioctl(dev_t dev, u_long cmd, caddr_t a timeout_del(&pf_purge_to)) task_add(systqmp, &pf_purge_task); } + pf_statepl_commit(); + pf_sourcepl_commit(); pfi_xcommit(); pf_trans_set_commit(); PF_UNLOCK(); @@ -3108,6 +4083,18 @@ pf_rule_copyin(struct pf_rule *from, str from->set_prio[1] > IFQ_MAXPRIO)) return (EINVAL); + if (from->statepl != PF_STATEPL_ID_NONE) { + if (from->statepl < PF_STATEPL_ID_MIN || + from->statepl > PF_STATEPL_ID_MAX) + return (EINVAL); + } + + if (from->sourcepl != PF_SOURCEPL_ID_NONE) { + if (from->sourcepl < PF_SOURCEPL_ID_MIN || + from->sourcepl > PF_SOURCEPL_ID_MAX) + return (EINVAL); + } + to->src = from->src; to->src.addr.p.tbl = NULL; to->dst = from->dst; @@ -3232,6 +4219,8 @@ pf_rule_copyin(struct pf_rule *from, str to->prio = from->prio; to->set_prio[0] = from->set_prio[0]; to->set_prio[1] = from->set_prio[1]; + to->statepl = from->statepl; + to->sourcepl = from->sourcepl; return (0); } @@ -3365,3 +4354,486 @@ pf_rollback_trans(struct pf_trans *t) pf_free_trans(t); } } + +#if NKSTAT > 0 +#include + +struct pf_kstat_counters { + struct kstat_kv counters[PFRES_MAX]; +}; +static const char *pf_kstat_counters_names[] = PFRES_NAMES; + +struct pf_kstat_lcounters { + struct kstat_kv counters[LCNT_MAX]; +}; + +static const char *pf_kstat_lcounters_names[LCNT_MAX] = { + [LCNT_STATES] = "rule-state", + [LCNT_SRCSTATES] = "src-node-state", + [LCNT_SRCNODES] = "src-node", + [LCNT_SRCCONN] = "src-conn", + [LCNT_SRCCONNRATE] = "src-conn-rate", + [LCNT_OVERLOAD_TABLE] = "overload-table", + [LCNT_OVERLOAD_FLUSH] = "overload-flush", + [LCNT_SYNFLOODS] = "synflood", + [LCNT_SYNCOOKIES_SENT] = "syncookie-sent", + [LCNT_SYNCOOKIES_VALID] = "syncookie-valid", +}; + +struct pf_kstat_fcounters { + struct kstat_kv count; + struct kstat_kv counters[FCNT_MAX]; +}; + +static const char *pf_kstat_fcounters_names[FCNT_MAX] = { + [FCNT_STATE_SEARCH] = "search", + [FCNT_STATE_INSERT] = "insert", + [FCNT_STATE_REMOVALS] = "removal", +}; + +struct pf_kstat_scounters { + struct kstat_kv count; + struct kstat_kv counters[SCNT_MAX]; +}; + +static const char *pf_kstat_scounters_names[SCNT_MAX] = { + [SCNT_SRC_NODE_SEARCH] = "search", + [SCNT_SRC_NODE_INSERT] = "insert", + [SCNT_SRC_NODE_REMOVALS] = "removal", +}; + +static void +pf_kstat_u64s_read(struct kstat_kv *kvs, uint64_t *c, size_t n) +{ + size_t i; + + NET_LOCK_SHARED(); + PF_LOCK(); + + for (i = 0; i < n; i++) + kstat_kv_u64(&kvs[i]) = c[i]; + + PF_UNLOCK(); + NET_UNLOCK_SHARED(); +} + +static int +pf_kstat_counters_read(struct kstat *ks) +{ + struct pf_kstat_counters *pkc = ks->ks_data; + + pf_kstat_u64s_read(pkc->counters, + pf_status.counters, nitems(pkc->counters)); + + nanouptime(&ks->ks_updated); + + return (0); +} + +static void +pf_kstat_counters_attach(void) +{ + struct kstat *ks; + struct pf_kstat_counters *pkc; + size_t i; + + pkc = malloc(sizeof(*pkc), M_DEVBUF, M_WAITOK|M_CANFAIL|M_ZERO); + if (pkc == NULL) { + printf("pf: unable to allocate pf-counters kstat\n"); + return; + } + + ks = kstat_create("pf", 0, "pf-counters", 0, KSTAT_T_KV, 0); + if (ks == NULL) { + printf("pf: unable to create pf-counters kstat\n"); + free(pkc, M_DEVBUF, sizeof(*pkc)); + return; + } + + for (i = 0; i < nitems(pkc->counters); i++) { + struct kstat_kv *kv = &pkc->counters[i]; + + kstat_kv_init(kv, pf_kstat_counters_names[i], + KSTAT_KV_T_COUNTER64); + } + + ks->ks_data = pkc; + ks->ks_datalen = sizeof(*pkc); + ks->ks_read = pf_kstat_counters_read; + + kstat_install(ks); +} + +static int +pf_kstat_lcounters_read(struct kstat *ks) +{ + struct pf_kstat_lcounters *lkc = ks->ks_data; + + pf_kstat_u64s_read(lkc->counters, + pf_status.lcounters, nitems(lkc->counters)); + + nanouptime(&ks->ks_updated); + + return (0); +} + +static void +pf_kstat_lcounters_attach(void) +{ + struct kstat *ks; + struct pf_kstat_lcounters *lkc; + size_t i; + + lkc = malloc(sizeof(*lkc), M_DEVBUF, M_WAITOK|M_CANFAIL|M_ZERO); + if (lkc == NULL) { + printf("pf: unable to allocate pf-limits kstat\n"); + return; + } + + ks = kstat_create("pf", 0, "pf-limits", 0, KSTAT_T_KV, 0); + if (ks == NULL) { + printf("pf: unable to create pf-limits kstat\n"); + free(lkc, M_DEVBUF, sizeof(*lkc)); + return; + } + + for (i = 0; i < nitems(lkc->counters); i++) { + struct kstat_kv *kv = &lkc->counters[i]; + + kstat_kv_init(kv, pf_kstat_lcounters_names[i], + KSTAT_KV_T_COUNTER64); + } + + ks->ks_data = lkc; + ks->ks_datalen = sizeof(*lkc); + ks->ks_read = pf_kstat_lcounters_read; + + kstat_install(ks); +} + +static int +pf_kstat_fcounters_read(struct kstat *ks) +{ + struct pf_kstat_fcounters *fkc = ks->ks_data; + + pf_kstat_u64s_read(fkc->counters, + pf_status.fcounters, nitems(fkc->counters)); + kstat_kv_u64(&fkc->count) = + kstat_kv_u64(&fkc->counters[FCNT_STATE_INSERT]) - + kstat_kv_u64(&fkc->counters[FCNT_STATE_REMOVALS]); + + nanouptime(&ks->ks_updated); + + return (0); +} + +static void +pf_kstat_fcounters_attach(void) +{ + struct kstat *ks; + struct pf_kstat_fcounters *fkc; + size_t i; + + fkc = malloc(sizeof(*fkc), M_DEVBUF, M_WAITOK|M_CANFAIL|M_ZERO); + if (fkc == NULL) { + printf("pf: unable to allocate pf-states kstat\n"); + return; + } + + ks = kstat_create("pf", 0, "pf-states", 0, KSTAT_T_KV, 0); + if (ks == NULL) { + printf("pf: unable to create pf-states kstat\n"); + free(fkc, M_DEVBUF, sizeof(*fkc)); + return; + } + + kstat_kv_init(&fkc->count, "count", KSTAT_KV_T_UINT64); + for (i = 0; i < nitems(fkc->counters); i++) { + struct kstat_kv *kv = &fkc->counters[i]; + + kstat_kv_init(kv, pf_kstat_fcounters_names[i], + KSTAT_KV_T_COUNTER64); + } + + ks->ks_data = fkc; + ks->ks_datalen = sizeof(*fkc); + ks->ks_read = pf_kstat_fcounters_read; + + kstat_install(ks); +} + +static int +pf_kstat_scounters_read(struct kstat *ks) +{ + struct pf_kstat_scounters *skc = ks->ks_data; + + pf_kstat_u64s_read(skc->counters, + pf_status.scounters, nitems(skc->counters)); + kstat_kv_u64(&skc->count) = + kstat_kv_u64(&skc->counters[SCNT_SRC_NODE_INSERT]) - + kstat_kv_u64(&skc->counters[SCNT_SRC_NODE_REMOVALS]); + + nanouptime(&ks->ks_updated); + + return (0); +} + +static void +pf_kstat_scounters_attach(void) +{ + struct kstat *ks; + struct pf_kstat_scounters *skc; + size_t i; + + skc = malloc(sizeof(*skc), M_DEVBUF, M_WAITOK|M_CANFAIL|M_ZERO); + if (skc == NULL) { + printf("pf: unable to allocate pf-src-nodes kstat\n"); + return; + } + + ks = kstat_create("pf", 0, "pf-src-nodes", 0, KSTAT_T_KV, 0); + if (ks == NULL) { + printf("pf: unable to create pf-src-nodes kstat\n"); + free(skc, M_DEVBUF, sizeof(*skc)); + return; + } + + kstat_kv_init(&skc->count, "count", KSTAT_KV_T_UINT64); + for (i = 0; i < nitems(skc->counters); i++) { + struct kstat_kv *kv = &skc->counters[i]; + + kstat_kv_init(kv, pf_kstat_scounters_names[i], + KSTAT_KV_T_COUNTER64); + } + + ks->ks_data = skc; + ks->ks_datalen = sizeof(*skc); + ks->ks_read = pf_kstat_scounters_read; + + kstat_install(ks); +} + +static void +pf_kstat_attach(void) +{ + /* these are just hanging out in the breeze */ + pf_kstat_counters_attach(); + pf_kstat_lcounters_attach(); + pf_kstat_fcounters_attach(); + pf_kstat_scounters_attach(); +} + +struct pf_statepl_kstat { + struct kstat_kv descr; + struct kstat_kv inuse; + struct kstat_kv limit; + struct kstat_kv admitted; + struct kstat_kv hardlimited; + struct kstat_kv ratelimited; +}; + +static int +pf_statepl_kstat_read(struct kstat *ks) +{ + struct pf_statepl *pfstpl = ks->ks_softc; + struct pf_statepl_kstat *d = ks->ks_data; + unsigned int gen; + + strlcpy(kstat_kv_istr(&d->descr), pfstpl->pfstpl_descr, + sizeof(kstat_kv_istr(&d->descr))); + kstat_kv_u32(&d->limit) = pfstpl->pfstpl_limit; + + pc_cons_enter(&pfstpl->pfstpl_lock, &gen); + do { + kstat_kv_u32(&d->inuse) = pfstpl->pfstpl_inuse; + kstat_kv_u64(&d->admitted) = + pfstpl->pfstpl_counters.admitted; + kstat_kv_u64(&d->hardlimited) = + pfstpl->pfstpl_counters.hardlimited; + kstat_kv_u64(&d->ratelimited) = + pfstpl->pfstpl_counters.ratelimited; + } while (pc_cons_leave(&pfstpl->pfstpl_lock, &gen) != 0); + + nanouptime(&ks->ks_updated); + + return (0); +}; + +static void +pf_statepl_kstat_attach(struct pf_statepl *pfstpl) +{ + struct kstat *ks = pfstpl->pfstpl_ks; + struct pf_statepl_kstat *d; + + if (ks != NULL) + return; + + d = malloc(sizeof(*d), M_DEVBUF, M_WAITOK|M_CANFAIL|M_ZERO); + if (d == NULL) { + printf("pf: unable to allocate state-pool kstat\n"); + return; + } + + ks = kstat_create("pf", 0, "state-pool", pfstpl->pfstpl_id, + KSTAT_T_KV, 0); + if (ks == NULL) { + printf("pf: unable to create state-pool kstat\n"); + free(d, M_DEVBUF, sizeof(*d)); + return; + } + + kstat_kv_init(&d->descr, "description", KSTAT_KV_T_ISTR); + kstat_kv_init(&d->inuse, "in-use", KSTAT_KV_T_UINT32); + kstat_kv_init(&d->limit, "limit", KSTAT_KV_T_UINT32); + kstat_kv_init(&d->admitted, "admitted", KSTAT_KV_T_COUNTER64); + kstat_kv_init(&d->hardlimited, "hard-limited", KSTAT_KV_T_COUNTER64); + kstat_kv_init(&d->ratelimited, "rate-limited", KSTAT_KV_T_COUNTER64); + + ks->ks_softc = pfstpl; + ks->ks_data = d; + ks->ks_datalen = sizeof(*d); + ks->ks_read = pf_statepl_kstat_read; + + kstat_install(ks); + + pfstpl->pfstpl_ks = ks; +} + +static void +pf_statepl_kstat_detach(struct pf_statepl *pfstpl) +{ + struct kstat *ks = pfstpl->pfstpl_ks; + struct pf_statepl_kstat *d; + + if (ks == NULL) + return; + + kstat_remove(ks); + d = ks->ks_data; + kstat_destroy(ks); + + free(d, M_DEVBUF, sizeof(*d)); +} + +struct pf_sourcepl_kstat { + struct kstat_kv descr; + + /* pf_source/address/prefix counters */ + struct kstat_kv sourceinuse; + struct kstat_kv sourcelimit; + struct kstat_kv sourceallocs; + struct kstat_kv sourcelimited; + struct kstat_kv sourcenomem; + + /* state counters */ + struct kstat_kv inuse; + struct kstat_kv limit; + struct kstat_kv admitted; + struct kstat_kv hardlimited; + struct kstat_kv ratelimited; +}; + +static int +pf_sourcepl_kstat_read(struct kstat *ks) +{ + struct pf_sourcepl *pfsrpl = ks->ks_softc; + struct pf_sourcepl_kstat *d = ks->ks_data; + unsigned int gen; + + strlcpy(kstat_kv_istr(&d->descr), pfsrpl->pfsrpl_descr, + sizeof(kstat_kv_istr(&d->descr))); + kstat_kv_u32(&d->sourcelimit) = pfsrpl->pfsrpl_limit; + kstat_kv_u32(&d->limit) = pfsrpl->pfsrpl_states; + + pc_cons_enter(&pfsrpl->pfsrpl_lock, &gen); + do { + kstat_kv_u32(&d->sourceinuse) = + pfsrpl->pfsrpl_nsources; + kstat_kv_u64(&d->sourceallocs) = + pfsrpl->pfsrpl_counters.addrallocs; + kstat_kv_u64(&d->sourcelimited) = + pfsrpl->pfsrpl_counters.addrlimited; + kstat_kv_u64(&d->sourcenomem) = + pfsrpl->pfsrpl_counters.addrnomem; + + kstat_kv_u32(&d->inuse) = + pfsrpl->pfsrpl_counters.inuse; + kstat_kv_u64(&d->admitted) = + pfsrpl->pfsrpl_counters.admitted; + kstat_kv_u64(&d->hardlimited) = + pfsrpl->pfsrpl_counters.hardlimited; + kstat_kv_u64(&d->ratelimited) = + pfsrpl->pfsrpl_counters.ratelimited; + } while (pc_cons_leave(&pfsrpl->pfsrpl_lock, &gen) != 0); + + nanouptime(&ks->ks_updated); + + return (0); +}; + +static void +pf_sourcepl_kstat_attach(struct pf_sourcepl *pfsrpl) +{ + struct kstat *ks = pfsrpl->pfsrpl_ks; + struct pf_sourcepl_kstat *d; + + if (ks != NULL) + return; + + d = malloc(sizeof(*d), M_DEVBUF, M_WAITOK|M_CANFAIL|M_ZERO); + if (d == NULL) { + printf("pf: unable to allocate source-pool kstat\n"); + return; + } + + ks = kstat_create("pf", 0, "source-pool", pfsrpl->pfsrpl_id, + KSTAT_T_KV, 0); + if (ks == NULL) { + printf("pf: unable to create source-pool kstat\n"); + free(d, M_DEVBUF, sizeof(*d)); + return; + } + + kstat_kv_init(&d->descr, "description", KSTAT_KV_T_ISTR); + + kstat_kv_init(&d->sourceinuse, "source-in-use", KSTAT_KV_T_UINT32); + kstat_kv_init(&d->sourcelimit, "source-limit", KSTAT_KV_T_UINT32); + kstat_kv_init(&d->sourceallocs, "source-allocs", KSTAT_KV_T_UINT64); + kstat_kv_init(&d->sourcelimited, "source-limited", + KSTAT_KV_T_COUNTER64); + kstat_kv_init(&d->sourcenomem, "source-nomem", + KSTAT_KV_T_COUNTER64); + + kstat_kv_init(&d->inuse, "in-use", KSTAT_KV_T_UINT32); + kstat_kv_init(&d->limit, "limit", KSTAT_KV_T_UINT32); + kstat_kv_init(&d->admitted, "admitted", KSTAT_KV_T_COUNTER64); + kstat_kv_init(&d->hardlimited, "hard-limited", KSTAT_KV_T_COUNTER64); + kstat_kv_init(&d->ratelimited, "rate-limited", KSTAT_KV_T_COUNTER64); + + ks->ks_softc = pfsrpl; + ks->ks_data = d; + ks->ks_datalen = sizeof(*d); + ks->ks_read = pf_sourcepl_kstat_read; + + kstat_install(ks); + + pfsrpl->pfsrpl_ks = ks; +} + +static void +pf_sourcepl_kstat_detach(struct pf_sourcepl *pfsrpl) +{ + struct kstat *ks = pfsrpl->pfsrpl_ks; + struct pf_sourcepl_kstat *d; + + if (ks == NULL) + return; + + kstat_remove(ks); + d = ks->ks_data; + kstat_destroy(ks); + + free(d, M_DEVBUF, sizeof(*d)); +} + +#endif /* NKSTAT > 0 */ Index: sys/net/pf_table.c =================================================================== RCS file: /cvs/src/sys/net/pf_table.c,v retrieving revision 1.145 diff -u -p -r1.145 pf_table.c --- sys/net/pf_table.c 10 Aug 2023 16:44:04 -0000 1.145 +++ sys/net/pf_table.c 3 Mar 2024 05:50:09 -0000 @@ -1157,6 +1157,26 @@ pfr_insert_kentry(struct pfr_ktable *kt, return (0); } +int +pfr_remove_kentry(struct pfr_ktable *kt, struct pfr_addr *ad) +{ + struct pfr_kentryworkq workq = SLIST_HEAD_INITIALIZER(workq); + struct pfr_kentry *p; + + p = pfr_lookup_addr(kt, ad, 1); + if (p == NULL || ISSET(p->pfrke_flags, PFRKE_FLAG_NOT)) + return (ESRCH); + + if (ISSET(p->pfrke_flags, PFRKE_FLAG_MARK)) + return (0); + + SET(p->pfrke_flags, PFRKE_FLAG_MARK); + SLIST_INSERT_HEAD(&workq, p, pfrke_workq); + pfr_remove_kentries(kt, &workq); + + return (0); +} + void pfr_remove_kentries(struct pfr_ktable *kt, struct pfr_kentryworkq *workq) Index: sys/net/pfvar.h =================================================================== RCS file: /cvs/src/sys/net/pfvar.h,v retrieving revision 1.535 diff -u -p -r1.535 pfvar.h --- sys/net/pfvar.h 1 Jan 2024 22:16:51 -0000 1.535 +++ sys/net/pfvar.h 3 Mar 2024 05:50:09 -0000 @@ -592,6 +592,8 @@ struct pf_rule { u_int8_t set_prio[2]; sa_family_t naf; u_int8_t rcvifnot; + uint8_t statepl; + uint8_t sourcepl; struct { struct pf_addr addr; @@ -1500,6 +1502,127 @@ struct pfioc_synflwats { u_int32_t lowat; }; +#define PF_STATEPL_DESCR_LEN 16 /* kstat istr */ + +struct pfioc_statepl { + u_int32_t ticket; + + uint32_t id; +#define PF_STATEPL_ID_NONE 0 +#define PF_STATEPL_ID_MIN 1 +#define PF_STATEPL_ID_MAX 255 /* fits in pf_state uint8_t */ + + /* limit on the total number of states */ + unsigned int limit; +#define PF_STATEPL_LIMIT_MIN 1 +#define PF_STATEPL_LIMIT_MAX (1 << 24) /* pf is pretty scalable */ + + /* rate limit on the admission of states to the pool */ + struct { + unsigned int limit; + unsigned int seconds; + } rate; + + char description[PF_STATEPL_DESCR_LEN]; + + /* kernel state for GET ioctls */ + unsigned int inuse; /* gauge */ + uint64_t admitted; /* counter */ + uint64_t hardlimited; /* counter */ + uint64_t ratelimited; /* counter */ +}; + +#define PF_SOURCEPL_DESCR_LEN 16 /* kstat istr */ + +struct pfioc_sourcepl { + u_int32_t ticket; + + uint32_t id; +#define PF_SOURCEPL_ID_NONE 0 +#define PF_SOURCEPL_ID_MIN 1 +#define PF_SOURCEPL_ID_MAX 255 /* fits in pf_state uint8_t */ + + /* limit on the total number of addresses in the source pool */ + unsigned int limit; + + /* limit on the number of states per address entry */ + unsigned int states; + + /* rate limit on the addition of states to an address entry */ + struct { + unsigned int limit; + unsigned int seconds; + } rate; + + /* + * when the number of states on an entry exceeds hwm, add + * the address to the specified table. when the number of + * states goes below lwm, remove it from the table. + */ + char overload_tblname[PF_TABLE_NAME_SIZE]; + unsigned int overload_hwm; + unsigned int overload_lwm; + + /* + * mask addresses before they're used for entries. /64s + * everywhere for inet6 makes it easy to use too much memory. + */ + unsigned int inet_prefix; + unsigned int inet6_prefix; + + char description[PF_SOURCEPL_DESCR_LEN]; + + /* kernel state for GET ioctls */ + unsigned int nsources; /* gauge */ + unsigned int inuse; /* gauge */ + + uint64_t addrallocs; /* counter */ + uint64_t addrnomem; /* counter */ + uint64_t admitted; /* counter */ + uint64_t addrlimited; /* counter */ + uint64_t hardlimited; /* counter */ + uint64_t ratelimited; /* counter */ +}; + +struct pfioc_source_entry { + sa_family_t af; + unsigned int rdomain; + struct pf_addr addr; + + /* stats */ + + unsigned int inuse; /* gauge */ + uint64_t admitted; /* counter */ + uint64_t hardlimited; /* counter */ + uint64_t ratelimited; /* counter */ +}; + +struct pfioc_source { + uint32_t id; + + /* copied from the parent source pool */ + + unsigned int inet_prefix; + unsigned int inet6_prefix; + unsigned int limit; + + /* source entries */ + size_t entry_size; + + struct pfioc_source_entry *key; + struct pfioc_source_entry *entries; + size_t entrieslen; +}; + +struct pfioc_source_kill { + uint32_t id; + unsigned int rdomain; + sa_family_t af; + struct pf_addr addr; + + unsigned int rmstates; /* kill the states too? */ +}; + /* * ioctl operations */ @@ -1568,6 +1691,15 @@ struct pfioc_synflwats { #define DIOCSETSYNCOOKIES _IOWR('D', 98, u_int8_t) #define DIOCGETSYNFLWATS _IOWR('D', 99, struct pfioc_synflwats) #define DIOCXEND _IOWR('D', 100, u_int32_t) +#define DIOCADDSTATEPL _IOW('D', 101, struct pfioc_statepl) +#define DIOCADDSOURCEPL _IOW('D', 102, struct pfioc_sourcepl) +#define DIOCGETSTATEPL _IOWR('D', 103, struct pfioc_statepl) +#define DIOCGETSOURCEPL _IOWR('D', 104, struct pfioc_sourcepl) +#define DIOCGETSOURCE _IOWR('D', 105, struct pfioc_source) +#define DIOCGETNSTATEPL _IOWR('D', 106, struct pfioc_statepl) +#define DIOCGETNSOURCEPL _IOWR('D', 107, struct pfioc_sourcepl) +#define DIOCGETNSOURCE _IOWR('D', 108, struct pfioc_source) +#define DIOCCLRSOURCE _IOWR('D', 109, struct pfioc_source_kill) #ifdef _KERNEL @@ -1726,6 +1858,8 @@ int pfr_clr_tstats(struct pfr_table *, i int pfr_set_tflags(struct pfr_table *, int, int, int, int *, int *, int); int pfr_clr_addrs(struct pfr_table *, int *, int); int pfr_insert_kentry(struct pfr_ktable *, struct pfr_addr *, time_t); +int pfr_remove_kentry(struct pfr_ktable *, struct pfr_addr *); + int pfr_add_addrs(struct pfr_table *, struct pfr_addr *, int, int *, int); int pfr_del_addrs(struct pfr_table *, struct pfr_addr *, int, int *, Index: sys/net/pfvar_priv.h =================================================================== RCS file: /cvs/src/sys/net/pfvar_priv.h,v retrieving revision 1.35 diff -u -p -r1.35 pfvar_priv.h --- sys/net/pfvar_priv.h 1 Jan 2024 22:16:51 -0000 1.35 +++ sys/net/pfvar_priv.h 3 Mar 2024 05:50:09 -0000 @@ -39,6 +39,7 @@ #include #include +#include #include /* @@ -47,6 +48,45 @@ */ struct pfsync_deferral; +struct kstat; + +/* + * PF state links + * + * This is used to augment a struct pf_state so it can be + * tracked/referenced by the state and source address "pool" things. + * Each pool maintains a list of the states they "own", and these + * state links are what the pools use to wire a state into their + * lists. + * + * Without PF state links, the pf_state struct would have to grow + * a lot to support a feature that may not be used. + * + * pfl_entry is used by the pools to add states to their list. + * pfl_state allows the pools to get from their list of states to + * the states themselves. + * + * pfl_link allows operations on states (well, delete) to be able + * to quickly locate the pf_state_link struct so they can be unwired + * from the pools. + */ + +#define PF_STATE_LINK_TYPE_STATEPL 1 +#define PF_STATE_LINK_TYPE_SOURCEPL 2 + +struct pf_state_link { + /* used by source/state pools to get to states */ + TAILQ_ENTRY(pf_state_link) pfl_link; + + /* used by pf_state to get to source/state pools */ + SLIST_ENTRY(pf_state_link) pfl_linkage; + + struct pf_state *pfl_state; + unsigned int pfl_type; +}; + +TAILQ_HEAD(pf_state_link_list, pf_state_link); +SLIST_HEAD(pf_state_linkage, pf_state_link); /* * pf state items - links from pf_state_key to pf_states @@ -144,6 +184,9 @@ struct pf_state { u_int16_t if_index_out; /* [I] */ u_int16_t delay; /* [I] */ u_int8_t rt; /* [I] */ + uint8_t statepl; + uint8_t sourcepl; + struct pf_state_linkage linkage; }; RBT_HEAD(pf_state_tree_id, pf_state); @@ -255,6 +298,204 @@ struct pf_state_list { .pfs_mtx = MUTEX_INITIALIZER(IPL_SOFTNET), \ .pfs_rwl = RWLOCK_INITIALIZER("pfstates"), \ } + +/* + * State pools + */ + +struct pf_statepl { + RBT_ENTRY(pf_statepl) pfstpl_tree; + TAILQ_ENTRY(pf_statepl) pfstpl_list; + struct kstat *pfstpl_ks; + + uint32_t pfstpl_id; + + /* config */ + + unsigned int pfstpl_limit; + struct { + unsigned int limit; + unsigned int seconds; + } pfstpl_rate; + + char pfstpl_descr[PF_STATEPL_DESCR_LEN]; + + /* run state */ + struct pc_lock pfstpl_lock; + + /* rate limiter */ + uint64_t pfstpl_rate_ts; + uint64_t pfstpl_rate_token; + uint64_t pfstpl_rate_bucket; + + unsigned int pfstpl_inuse; + struct pf_state_link_list pfstpl_states; + + /* counters */ + + struct { + uint64_t admitted; + uint64_t hardlimited; + uint64_t ratelimited; + } pfstpl_counters; + + struct { + time_t created; + time_t updated; + time_t cleared; + } pfstpl_timestamps; +}; + +RBT_HEAD(pf_statepl_tree, pf_statepl); +RBT_PROTOTYPE(pf_statepl_tree, pf_statepl, pfstpl_tree, cmp); + +TAILQ_HEAD(pf_statepl_list, pf_statepl); + +extern struct pf_statepl_tree pf_statepl_tree_active; +extern struct pf_statepl_list pf_statepl_list_active; + +extern struct pf_statepl_tree pf_statepl_tree_inactive; +extern struct pf_statepl_list pf_statepl_list_inactive; + +static inline unsigned int +pf_statepl_enter(struct pf_statepl *pfstpl) +{ + return (pc_sprod_enter(&pfstpl->pfstpl_lock)); +} + +static inline void +pf_statepl_leave(struct pf_statepl *pfstpl, unsigned int gen) +{ + pc_sprod_leave(&pfstpl->pfstpl_lock, gen); +} + +/* + * Source address pools + */ + +struct pf_sourcepl; + +struct pf_source { + RBT_ENTRY(pf_source) pfsr_tree; + RBT_ENTRY(pf_source) pfsr_ioc_tree; + struct pf_sourcepl *pfsr_parent; + + sa_family_t pfsr_af; + u_int16_t pfsr_rdomain; + struct pf_addr pfsr_addr; + + /* run state */ + + unsigned int pfsr_inuse; + unsigned int pfsr_intable; + struct pf_state_link_list pfsr_states; + time_t pfsr_empty_ts; + TAILQ_ENTRY(pf_source) pfsr_empty_gc; + + /* rate limiter */ + uint64_t pfsr_rate_ts; + + struct { + uint64_t admitted; + uint64_t hardlimited; + uint64_t ratelimited; + } pfsr_counters; +}; + +RBT_HEAD(pf_source_tree, pf_source); +RBT_PROTOTYPE(pf_source_tree, pf_source, pfsr_tree, cmp); + +RBT_HEAD(pf_source_ioc_tree, pf_source); +RBT_PROTOTYPE(pf_source_ioc_tree, pf_source, pfsr_ioc_tree, cmp); + +TAILQ_HEAD(pf_source_list, pf_source); + +struct pf_sourcepl { + RBT_ENTRY(pf_sourcepl) pfsrpl_tree; + TAILQ_ENTRY(pf_sourcepl) pfsrpl_list; + struct kstat *pfsrpl_ks; + + uint32_t pfsrpl_id; + unsigned int pfsrpl_disabled; + + /* config */ + + unsigned int pfsrpl_limit; /* nsources limit */ + unsigned int pfsrpl_states; + unsigned int pfsrpl_ipv4_prefix; + unsigned int pfsrpl_ipv6_prefix; + + struct { + unsigned int limit; + unsigned int seconds; + } pfsrpl_rate; + + struct { + char name[PF_TABLE_NAME_SIZE]; + unsigned int hwm; + unsigned int lwm; + struct pfr_ktable *table; + } pfsrpl_overload; + + char pfsrpl_descr[PF_SOURCEPL_DESCR_LEN]; + + /* run state */ + struct pc_lock pfsrpl_lock; + + struct pf_addr pfsrpl_ipv4_mask; + struct pf_addr pfsrpl_ipv6_mask; + + uint64_t pfsrpl_rate_token; + uint64_t pfsrpl_rate_bucket; + + /* number of pf_sources */ + unsigned int pfsrpl_nsources; + struct pf_source_tree pfsrpl_sources; + struct pf_source_ioc_tree pfsrpl_ioc_sources; + + struct { + /* number of times pf_source was allocated */ + uint64_t addrallocs; + /* state was rejected because the address limit was hit */ + uint64_t addrlimited; + /* no memory to create address thing */ + uint64_t addrnomem; + + /* sum of pf_source inuse gauges */ + uint64_t inuse; + /* sum of pf_source admitted counters */ + uint64_t admitted; + /* sum of pf_source hardlimited counters */ + uint64_t hardlimited; + /* sum of pf_source ratelimited counters */ + uint64_t ratelimited; + } pfsrpl_counters; +}; + +RBT_HEAD(pf_sourcepl_tree, pf_sourcepl); +RBT_PROTOTYPE(pf_sourcepl_tree, pf_sourcepl, pfsrpl_tree, cmp); + +TAILQ_HEAD(pf_sourcepl_list, pf_sourcepl); + +extern struct pf_sourcepl_tree pf_sourcepl_tree_active; +extern struct pf_sourcepl_list pf_sourcepl_list_active; + +extern struct pf_sourcepl_tree pf_sourcepl_tree_inactive; +extern struct pf_sourcepl_list pf_sourcepl_list_inactive; + +static inline unsigned int +pf_sourcepl_enter(struct pf_sourcepl *pfsrpl) +{ + return (pc_sprod_enter(&pfsrpl->pfsrpl_lock)); +} + +static inline void +pf_sourcepl_leave(struct pf_sourcepl *pfsrpl, unsigned int gen) +{ + pc_sprod_leave(&pfsrpl->pfsrpl_lock, gen); +} + +/* */ extern struct rwlock pf_lock; Index: sys/sys/pclock.h =================================================================== RCS file: sys/sys/pclock.h diff -N sys/sys/pclock.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/sys/pclock.h 3 Mar 2024 05:50:09 -0000 @@ -0,0 +1,49 @@ +/* $OpenBSD$ */ + +/* + * Copyright (c) 2023 David Gwynne + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef _SYS_PCLOCK_H +#define _SYS_PCLOCK_H + +#include + +struct pc_lock { + volatile unsigned int pcl_gen; +}; + +#ifdef _KERNEL + +#define PC_LOCK_INITIALIZER() { .pcl_gen = 0 } + +void pc_lock_init(struct pc_lock *); + +/* single (non-interlocking) producer */ +unsigned int pc_sprod_enter(struct pc_lock *); +void pc_sprod_leave(struct pc_lock *, unsigned int); + +/* multiple (interlocking) producers */ +unsigned int pc_mprod_enter(struct pc_lock *); +void pc_mprod_leave(struct pc_lock *, unsigned int); + +/* consumer */ +void pc_cons_enter(struct pc_lock *, unsigned int *); +__warn_unused_result int + pc_cons_leave(struct pc_lock *, unsigned int *); + +#endif /* _KERNEL */ + +#endif /* _SYS_RWLOCK_H */ Index: sys/sys/sched.h =================================================================== RCS file: /cvs/src/sys/sys/sched.h,v retrieving revision 1.70 diff -u -p -r1.70 sched.h --- sys/sys/sched.h 24 Jan 2024 19:23:38 -0000 1.70 +++ sys/sys/sched.h 3 Mar 2024 05:50:09 -0000 @@ -97,6 +97,7 @@ struct cpustats { #include #include +#include #define SCHED_NQS 32 /* 32 run queues. */ @@ -112,6 +113,7 @@ struct schedstate_percpu { struct timespec spc_runtime; /* time curproc started running */ volatile int spc_schedflags; /* flags; see below */ u_int spc_schedticks; /* ticks for schedclock() */ + struct pc_lock spc_cp_time_lock; u_int64_t spc_cp_time[CPUSTATES]; /* CPU state statistics */ u_char spc_curpriority; /* usrpri of curproc */