Index: conf/files =================================================================== RCS file: /cvs/src/sys/conf/files,v retrieving revision 1.622 diff -u -p -r1.622 files --- conf/files 5 Aug 2016 19:00:25 -0000 1.622 +++ conf/files 17 Aug 2016 11:10:08 -0000 @@ -687,6 +687,7 @@ file kern/subr_evcount.c file kern/subr_extent.c file kern/subr_hibernate.c hibernate file kern/subr_log.c +file kern/subr_percpu.c file kern/subr_poison.c diagnostic file kern/subr_pool.c file kern/dma_alloc.c Index: kern/init_main.c =================================================================== RCS file: /cvs/src/sys/kern/init_main.c,v retrieving revision 1.253 diff -u -p -r1.253 init_main.c --- kern/init_main.c 17 May 2016 23:28:03 -0000 1.253 +++ kern/init_main.c 17 Aug 2016 11:10:08 -0000 @@ -143,6 +143,7 @@ void init_exec(void); void kqueue_init(void); void taskq_init(void); void pool_gc_pages(void *); +void percpu_init(void); extern char sigcode[], esigcode[], sigcoderet[]; #ifdef SYSCALL_DEBUG @@ -354,6 +355,9 @@ main(void *framep) /* Configure virtual memory system, set vm rlimits. */ uvm_init_limits(p); + /* Per CPU memory allocation */ + percpu_init(); + /* Initialize the file systems. */ #if defined(NFSSERVER) || defined(NFSCLIENT) nfs_init(); /* initialize server/shared data */ Index: kern/subr_percpu.c =================================================================== RCS file: kern/subr_percpu.c diff -N kern/subr_percpu.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ kern/subr_percpu.c 17 Aug 2016 11:10:08 -0000 @@ -0,0 +1,326 @@ +/* $OpenBSD$ */ + +/* + * Copyright (c) 2016 David Gwynne + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include + +#ifdef MULTIPROCESSOR +struct pool cpumem_pl; + +void +percpu_init(void) +{ + pool_init(&cpumem_pl, sizeof(struct cpumem) * ncpus, 0, 0, + PR_WAITOK, "percpumem", &pool_allocator_single); + pool_setipl(&cpumem_pl, IPL_NONE); +} + +struct cpumem * +cpumem_get(struct pool *pp) +{ + struct cpumem *cm; + unsigned int cpu; + + cm = pool_get(&cpumem_pl, PR_WAITOK); + + for (cpu = 0; cpu < ncpus; cpu++) + cm[cpu].mem = pool_get(pp, PR_WAITOK | PR_ZERO); + + return (cm); +} + +void +cpumem_put(struct pool *pp, struct cpumem *cm) +{ + unsigned int cpu; + + for (cpu = 0; cpu < ncpus; cpu++) + pool_put(pp, cm[cpu].mem); + + pool_put(&cpumem_pl, cm); +} + +struct cpumem * +cpumem_malloc(size_t sz, int type) +{ + struct cpumem *cm; + unsigned int cpu; + + sz = roundup(sz, CACHELINESIZE); + + cm = pool_get(&cpumem_pl, PR_WAITOK); + + for (cpu = 0; cpu < ncpus; cpu++) + cm[cpu].mem = malloc(sz, type, M_WAITOK | M_ZERO); + + return (cm); +} + +struct cpumem * +cpumem_realloc(struct cpumem *bootcm, size_t sz, int type) +{ + struct cpumem *cm; + unsigned int cpu; + + sz = roundup(sz, CACHELINESIZE); + + cm = pool_get(&cpumem_pl, PR_WAITOK); + + cm[0].mem = bootcm[0].mem; + for (cpu = 1; cpu < ncpus; cpu++) + cm[cpu].mem = malloc(sz, type, M_WAITOK | M_ZERO); + + return (cm); +} + +void +cpumem_free(struct cpumem *cm, int type, size_t sz) +{ + unsigned int cpu; + + sz = roundup(sz, CACHELINESIZE); + + for (cpu = 0; cpu < ncpus; cpu++) + free(cm[cpu].mem, type, sz); + + pool_put(&cpumem_pl, cm); +} + +void * +cpumem_first(struct cpumem_iter *i, struct cpumem *cm) +{ + i->cpu = 0; + + return (cm[0].mem); +} + +void * +cpumem_next(struct cpumem_iter *i, struct cpumem *cm) +{ + unsigned int cpu = ++i->cpu; + + if (cpu >= ncpus) + return (NULL); + + return (cm[cpu].mem); +} + +struct cpumem * +counters_alloc(unsigned int n, int type) +{ + struct cpumem *cm; + struct cpumem_iter cmi; + uint64_t *counters; + unsigned int i; + + KASSERT(n > 0); + + n++; /* add space for a generation number */ + cm = cpumem_malloc(n * sizeof(uint64_t), type); + + CPUMEM_FOREACH(counters, &cmi, cm) { + for (i = 0; i < n; i++) + counters[i] = 0; + } + + return (cm); +} + +struct cpumem * +counters_realloc(struct cpumem *cm, unsigned int n, int type) +{ + n++; /* the generation number */ + return (cpumem_realloc(cm, n * sizeof(uint64_t), type)); +} + +void +counters_free(struct cpumem *cm, int type, unsigned int n) +{ + n++; /* generation number */ + cpumem_free(cm, type, n * sizeof(uint64_t)); +} + +void +counters_read(struct cpumem *cm, uint64_t *output, unsigned int n) +{ + struct cpumem_iter cmi; + uint64_t *gen, *counters, *temp; + uint64_t enter, leave; + unsigned int i; + + for (i = 0; i < n; i++) + output[i] = 0; + + temp = mallocarray(n, sizeof(uint64_t), M_TEMP, M_WAITOK); + + gen = cpumem_first(&cmi, cm); + do { + counters = gen + 1; + + enter = *gen; + for (;;) { + /* the generation number is odd during an update */ + while (enter & 1) { + yield(); + membar_consumer(); + enter = *gen; + } + + for (i = 0; i < n; i++) + temp[i] = counters[i]; + + membar_consumer(); + leave = *gen; + + if (enter == leave) + break; + + enter = leave; + } + + for (i = 0; i < n; i++) + output[i] += temp[i]; + + gen = cpumem_next(&cmi, cm); + } while (gen != NULL); + + free(temp, M_TEMP, n * sizeof(uint64_t)); +} + +void +counters_zero(struct cpumem *cm, unsigned int n) +{ + struct cpumem_iter cmi; + uint64_t *counters; + unsigned int i; + + n++; /* zero the generation numbers too */ + + counters = cpumem_first(&cmi, cm); + do { + for (i = 0; i < n; i++) + counters[i] = 0; + + counters = cpumem_next(&cmi, cm); + } while (counters != NULL); +} + +#else /* MULTIPROCESSOR */ + +/* + * Uniprocessor implementation of per-CPU data structures. + * + * UP percpu memory is a single memory allocation cast to/from the + * cpumem struct. It is not scaled up to the size of cacheline because + * there's no other cache to contend with. + */ + +void +percpu_init(void) +{ + /* nop */ +} + +struct cpumem * +cpumem_get(struct pool *pp) +{ + return (pool_get(pp, PR_WAITOK)); +} + +void +cpumem_put(struct pool *pp, struct cpumem *cm) +{ + pool_put(pp, cm); +} + +struct cpumem * +cpumem_malloc(size_t sz, int type) +{ + return (malloc(sz, type, M_WAITOK)); +} + +struct cpumem * +cpumem_realloc(struct cpumem *cm, size_t sz, int type) +{ + return (cm); +} + +void +cpumem_free(struct cpumem *cm, int type, size_t sz) +{ + free(cm, type, sz); +} + +struct cpumem * +counters_alloc(unsigned int n, int type) +{ + KASSERT(n > 0); + + return (cpumem_malloc(n * sizeof(uint64_t), type)); +} + +struct cpumem * +counters_realloc(struct cpumem *cm, unsigned int n, int type) +{ + /* this is unecessary, but symmetrical */ + return (cpumem_realloc(cm, n * sizeof(uint64_t), type)); +} + +void +counters_free(struct cpumem *cm, int type, unsigned int n) +{ + cpumem_free(cm, type, n * sizeof(uint64_t)); +} + +void +counters_read(struct cpumem *cm, uint64_t *output, unsigned int n) +{ + uint64_t *counters; + unsigned int i; + int s; + + counters = (uint64_t *)cm; + + s = splhigh(); + for (i = 0; i < n; i++) + output[i] = counters[i]; + splx(s); +} + +void +counters_zero(struct cpumem *cm, unsigned int n) +{ + uint64_t *counters; + unsigned int i; + int s; + + counters = (uint64_t *)cm; + + s = splhigh(); + for (i = 0; i < n; i++) + counters[i] = 0; + splx(s); +} + +#endif /* MULTIPROCESSOR */ + Index: net/pf.c =================================================================== RCS file: /cvs/src/sys/net/pf.c,v retrieving revision 1.979 diff -u -p -r1.979 pf.c --- net/pf.c 18 Jul 2016 13:17:44 -0000 1.979 +++ net/pf.c 17 Aug 2016 11:10:09 -0000 @@ -5833,7 +5833,7 @@ pf_check_proto_cksum(struct pf_pdesc *pd /* need to do it in software */ if (p == IPPROTO_TCP) - tcpstat.tcps_inswcsum++; + tcpc_inc(tcpc_inswcsum); else if (p == IPPROTO_UDP) udpstat.udps_inswcsum++; @@ -5860,7 +5860,7 @@ pf_check_proto_cksum(struct pf_pdesc *pd if (sum) { switch (p) { case IPPROTO_TCP: - tcpstat.tcps_rcvbadsum++; + tcpc_inc(tcpc_rcvbadsum); break; case IPPROTO_UDP: udpstat.udps_badsum++; Index: netinet/ip_output.c =================================================================== RCS file: /cvs/src/sys/netinet/ip_output.c,v retrieving revision 1.325 diff -u -p -r1.325 ip_output.c --- netinet/ip_output.c 1 Jul 2016 18:28:58 -0000 1.325 +++ netinet/ip_output.c 17 Aug 2016 11:10:09 -0000 @@ -1800,7 +1800,7 @@ in_proto_cksum_out(struct mbuf *m, struc if (m->m_pkthdr.csum_flags & M_TCP_CSUM_OUT) { if (!ifp || !(ifp->if_capabilities & IFCAP_CSUM_TCPv4) || ip->ip_hl != 5 || ifp->if_bridgeport != NULL) { - tcpstat.tcps_outswcsum++; + tcpc_inc(tcpc_outswcsum); in_delayed_cksum(m); m->m_pkthdr.csum_flags &= ~M_TCP_CSUM_OUT; /* Clear */ } Index: netinet/tcp_input.c =================================================================== RCS file: /cvs/src/sys/netinet/tcp_input.c,v retrieving revision 1.325 diff -u -p -r1.325 tcp_input.c --- netinet/tcp_input.c 20 Jul 2016 09:15:28 -0000 1.325 +++ netinet/tcp_input.c 17 Aug 2016 11:10:09 -0000 @@ -220,7 +220,7 @@ tcp_reass(struct tcpcb *tp, struct tcphd if (tiqe == NULL || th->th_seq != tp->rcv_nxt) { /* Flush segment queue for this connection */ tcp_freeq(tp); - tcpstat.tcps_rcvmemdrop++; + tcpc_inc(tcpc_rcvmemdrop); m_freem(m); return (0); } @@ -247,8 +247,8 @@ tcp_reass(struct tcpcb *tp, struct tcphd i = phdr->th_seq + phdr->th_reseqlen - th->th_seq; if (i > 0) { if (i >= *tlen) { - tcpstat.tcps_rcvduppack++; - tcpstat.tcps_rcvdupbyte += *tlen; + tcpc_pkt(tcpc_rcvduppack, + tcpc_rcvdupbyte, *tlen); m_freem(m); pool_put(&tcpqe_pool, tiqe); return (0); @@ -258,8 +258,7 @@ tcp_reass(struct tcpcb *tp, struct tcphd th->th_seq += i; } } - tcpstat.tcps_rcvoopack++; - tcpstat.tcps_rcvoobyte += *tlen; + tcpc_pkt(tcpc_rcvoopack, tcpc_rcvoobyte, *tlen); /* * While we overlap succeeding segments trim them or, @@ -372,6 +371,8 @@ tcp_input(struct mbuf *m, ...) int iphlen; va_list ap; struct tcphdr *th; + struct counters_ref r; + uint64_t *tcpc; #ifdef INET6 struct ip6_hdr *ip6 = NULL; #endif /* INET6 */ @@ -390,7 +391,7 @@ tcp_input(struct mbuf *m, ...) iphlen = va_arg(ap, int); va_end(ap); - tcpstat.tcps_rcvtotal++; + tcpc_inc(tcpc_rcvtotal); opti.ts_present = 0; opti.maxseg = 0; @@ -449,7 +450,7 @@ tcp_input(struct mbuf *m, ...) IP6_EXTHDR_GET(th, struct tcphdr *, m, iphlen, sizeof(*th)); if (!th) { - tcpstat.tcps_rcvshort++; + tcpc_inc(tcpc_rcvshort); return; } @@ -509,10 +510,10 @@ tcp_input(struct mbuf *m, ...) int sum; if (m->m_pkthdr.csum_flags & M_TCP_CSUM_IN_BAD) { - tcpstat.tcps_rcvbadsum++; + tcpc_inc(tcpc_rcvbadsum); goto drop; } - tcpstat.tcps_inswcsum++; + tcpc_inc(tcpc_inswcsum); switch (af) { case AF_INET: sum = in4_cksum(m, IPPROTO_TCP, iphlen, tlen); @@ -525,7 +526,7 @@ tcp_input(struct mbuf *m, ...) #endif } if (sum != 0) { - tcpstat.tcps_rcvbadsum++; + tcpc_inc(tcpc_rcvbadsum); goto drop; } } @@ -536,14 +537,14 @@ tcp_input(struct mbuf *m, ...) */ off = th->th_off << 2; if (off < sizeof(struct tcphdr) || off > tlen) { - tcpstat.tcps_rcvbadoff++; + tcpc_inc(tcpc_rcvbadoff); goto drop; } tlen -= off; if (off > sizeof(struct tcphdr)) { IP6_EXTHDR_GET(th, struct tcphdr *, m, iphlen, off); if (!th) { - tcpstat.tcps_rcvshort++; + tcpc_inc(tcpc_rcvshort); return; } optlen = off - sizeof(struct tcphdr); @@ -603,7 +604,7 @@ findpcb: int inpl_reverse = 0; if (m->m_pkthdr.pf.flags & PF_TAG_TRANSLATE_LOCALHOST) inpl_reverse = 1; - ++tcpstat.tcps_pcbhashmiss; + tcpc_inc(tcpc_pcbhashmiss); switch (af) { #ifdef INET6 case AF_INET6: @@ -625,7 +626,7 @@ findpcb: * but should either do a listen or a connect soon. */ if (inp == NULL) { - ++tcpstat.tcps_noport; + tcpc_inc(tcpc_noport); goto dropwithreset_ratelim; } } @@ -842,14 +843,14 @@ findpcb: case AF_INET6: if (IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, &ip6->ip6_dst)) { - tcpstat.tcps_badsyn++; + tcpc_inc(tcpc_badsyn); goto drop; } break; #endif /* INET6 */ case AF_INET: if (ip->ip_dst.s_addr == ip->ip_src.s_addr) { - tcpstat.tcps_badsyn++; + tcpc_inc(tcpc_badsyn); goto drop; } break; @@ -863,7 +864,7 @@ findpcb: if (so->so_qlen > so->so_qlimit || syn_cache_add(&src.sa, &dst.sa, th, iphlen, so, m, optp, optlen, &opti, reuse) == -1) { - tcpstat.tcps_dropsyn++; + tcpc_inc(tcpc_dropsyn); goto drop; } return; @@ -896,7 +897,7 @@ findpcb: ipsp_spd_lookup(m, af, iphlen, &error, IPSP_DIRECTION_IN, tdb, inp, 0); if (error) { - tcpstat.tcps_rcvnosec++; + tcpc_inc(tcpc_rcvnosec); goto drop; } #endif /* IPSEC */ @@ -942,7 +943,7 @@ findpcb: /* if congestion experienced, set ECE bit in subsequent packets. */ if ((iptos & IPTOS_ECN_MASK) == IPTOS_ECN_CE) { tp->t_flags |= TF_RCVD_CE; - tcpstat.tcps_ecn_rcvce++; + tcpc_inc(tcpc_ecn_rcvce); } #endif /* @@ -988,7 +989,6 @@ findpcb: /* * this is a pure ack for outstanding data. */ - ++tcpstat.tcps_predack; if (opti.ts_present && opti.ts_ecr) tcp_xmit_timer(tp, tcp_now - opti.ts_ecr); else if (tp->t_rtttime && @@ -996,8 +996,11 @@ findpcb: tcp_xmit_timer(tp, tcp_now - tp->t_rtttime); acked = th->th_ack - tp->snd_una; - tcpstat.tcps_rcvackpack++; - tcpstat.tcps_rcvackbyte += acked; + tcpc = counters_enter(&r, tcpcounters); + tcpc[tcpc_predack]++; + tcpc[tcpc_rcvackpack]++; + tcpc[tcpc_rcvackbyte] += acked; + counters_leave(&r, tcpcounters); ND6_HINT(tp); sbdrop(&so->so_snd, acked); @@ -1074,10 +1077,12 @@ findpcb: if (tp->sack_enable && tp->rcv_numsacks) tcp_clean_sackreport(tp); #endif /* TCP_SACK */ - ++tcpstat.tcps_preddat; tp->rcv_nxt += tlen; - tcpstat.tcps_rcvpack++; - tcpstat.tcps_rcvbyte += tlen; + tcpc = counters_enter(&r, tcpcounters); + tcpc[tcpc_preddat]++; + tcpc[tcpc_rcvpack]++; + tcpc[tcpc_rcvbyte] += tlen; + counters_leave(&r, tcpcounters); ND6_HINT(tp); TCP_SETUP_ACK(tp, tiflags, m); @@ -1144,7 +1149,7 @@ findpcb: case TCPS_SYN_RECEIVED: if (tiflags & TH_ACK) { if (tiflags & TH_SYN) { - tcpstat.tcps_badsyn++; + tcpc_inc(tcpc_badsyn); goto dropwithreset; } if (SEQ_LEQ(th->th_ack, tp->snd_una) || @@ -1216,13 +1221,13 @@ findpcb: case TH_ECE|TH_CWR: tp->t_flags |= TF_ECN_PERMIT; tiflags &= ~(TH_ECE|TH_CWR); - tcpstat.tcps_ecn_accepts++; + tcpc_inc(tcpc_ecn_accepts); } } #endif if (tiflags & TH_ACK && SEQ_GT(tp->snd_una, tp->iss)) { - tcpstat.tcps_connects++; + tcpc_inc(tcpc_connects); soisconnected(so); tp->t_state = TCPS_ESTABLISHED; TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepidle); @@ -1266,8 +1271,8 @@ trimthenstep6: m_adj(m, -todrop); tlen = tp->rcv_wnd; tiflags &= ~TH_FIN; - tcpstat.tcps_rcvpackafterwin++; - tcpstat.tcps_rcvbyteafterwin += todrop; + tcpc_pkt(tcpc_rcvpackafterwin, + tcpc_rcvbyteafterwin, todrop); } tp->snd_wl1 = th->th_seq - 1; tp->rcv_up = th->th_seq; @@ -1333,9 +1338,11 @@ trimthenstep6: */ tp->ts_recent = 0; } else { - tcpstat.tcps_rcvduppack++; - tcpstat.tcps_rcvdupbyte += tlen; - tcpstat.tcps_pawsdrop++; + tcpc = counters_enter(&r, tcpcounters); + tcpc[tcpc_pawsdrop]++; + tcpc[tcpc_rcvduppack]++; + tcpc[tcpc_rcvdupbyte] += tlen; + counters_leave(&r, tcpcounters); goto dropafterack; } } @@ -1364,11 +1371,12 @@ trimthenstep6: * but keep on processing for RST or ACK. */ tp->t_flags |= TF_ACKNOW; - tcpstat.tcps_rcvdupbyte += todrop = tlen; - tcpstat.tcps_rcvduppack++; + todrop = tlen; + tcpc_pkt(tcpc_rcvduppack, + tcpc_rcvdupbyte, todrop); } else { - tcpstat.tcps_rcvpartduppack++; - tcpstat.tcps_rcvpartdupbyte += todrop; + tcpc_pkt(tcpc_rcvpartduppack, + tcpc_rcvpartdupbyte, todrop); } hdroptlen += todrop; /* drop from head afterwards */ th->th_seq += todrop; @@ -1388,7 +1396,7 @@ trimthenstep6: if ((so->so_state & SS_NOFDREF) && tp->t_state > TCPS_CLOSE_WAIT && tlen) { tp = tcp_close(tp); - tcpstat.tcps_rcvafterclose++; + tcpc_inc(tcpc_rcvafterclose); goto dropwithreset; } @@ -1398,9 +1406,10 @@ trimthenstep6: */ todrop = (th->th_seq + tlen) - (tp->rcv_nxt+tp->rcv_wnd); if (todrop > 0) { - tcpstat.tcps_rcvpackafterwin++; + tcpc = counters_enter(&r, tcpcounters); + tcpc[tcpc_rcvpackafterwin]++; if (todrop >= tlen) { - tcpstat.tcps_rcvbyteafterwin += tlen; + tcpc[tcpc_rcvbyteafterwin] += tlen; /* * If window is closed can only take segments at * window edge, and have to drop data and PUSH from @@ -1410,11 +1419,14 @@ trimthenstep6: */ if (tp->rcv_wnd == 0 && th->th_seq == tp->rcv_nxt) { tp->t_flags |= TF_ACKNOW; - tcpstat.tcps_rcvwinprobe++; - } else + tcpc[tcpc_rcvwinprobe]++; + } else { + counters_leave(&r, tcpcounters); goto dropafterack; + } } else - tcpstat.tcps_rcvbyteafterwin += todrop; + tcpc[tcpc_rcvbyteafterwin] += todrop; + counters_leave(&r, tcpcounters); m_adj(m, -todrop); tlen -= todrop; tiflags &= ~(TH_PUSH|TH_FIN); @@ -1468,7 +1480,7 @@ trimthenstep6: so->so_error = ECONNRESET; close: tp->t_state = TCPS_CLOSED; - tcpstat.tcps_drops++; + tcpc_inc(tcpc_drops); tp = tcp_close(tp); goto drop; case TCPS_CLOSING: @@ -1507,7 +1519,7 @@ trimthenstep6: * The ACK was checked above. */ case TCPS_SYN_RECEIVED: - tcpstat.tcps_connects++; + tcpc_inc(tcpc_connects); soisconnected(so); tp->t_state = TCPS_ESTABLISHED; TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepidle); @@ -1555,10 +1567,10 @@ trimthenstep6: tp->snd_cwnd = tp->snd_ssthresh; tp->snd_last = tp->snd_max; tp->t_flags |= TF_SEND_CWR; - tcpstat.tcps_cwr_ecn++; + tcpc_inc(tcpc_cwr_ecn); } } - tcpstat.tcps_ecn_rcvece++; + tcpc_inc(tcpc_ecn_rcvece); } /* * if we receive CWR, we know that the peer has reduced @@ -1566,7 +1578,7 @@ trimthenstep6: */ if ((tiflags & TH_CWR)) { tp->t_flags &= ~TF_RCVD_CE; - tcpstat.tcps_ecn_rcvcwr++; + tcpc_inc(tcpc_ecn_rcvcwr); } #endif /* TCP_ECN */ @@ -1588,7 +1600,7 @@ trimthenstep6: if (th->th_seq != tp->rcv_nxt && SEQ_LT(th->th_ack, tp->snd_una - tp->max_sndwnd)) { - tcpstat.tcps_rcvacktooold++; + tcpc_inc(tcpc_rcvacktooold); goto drop; } break; @@ -1604,7 +1616,7 @@ trimthenstep6: break; } if (tiwin == tp->snd_wnd) { - tcpstat.tcps_rcvdupack++; + tcpc_inc(tcpc_rcvdupack); /* * If we have outstanding data (other than * a window probe), this is a completely @@ -1669,8 +1681,12 @@ trimthenstep6: #ifdef TCP_ECN tp->t_flags |= TF_SEND_CWR; #endif - tcpstat.tcps_cwr_frecovery++; - tcpstat.tcps_sack_recovery_episode++; + tcpc = counters_enter(&r, + tcpcounters); + tcpc[tcpc_cwr_frecovery]++; + tcpc[tcpc_sack_recovery_episode]++; + counters_leave(&r, + tcpcounters); #if defined(TCP_SACK) && defined(TCP_FACK) tp->t_dupacks = tcprexmtthresh; (void) tcp_output(tp); @@ -1698,8 +1714,10 @@ trimthenstep6: #ifdef TCP_ECN tp->t_flags |= TF_SEND_CWR; #endif - tcpstat.tcps_cwr_frecovery++; - tcpstat.tcps_sndrexmitfast++; + tcpc = counters_enter(&r, tcpcounters); + tcpc[tcpc_cwr_frecovery]++; + tcpc[tcpc_sndrexmitfast]++; + counters_leave(&r, tcpcounters); (void) tcp_output(tp); tp->snd_cwnd = tp->snd_ssthresh + @@ -1788,12 +1806,11 @@ trimthenstep6: tp->t_dupacks = 0; #endif if (SEQ_GT(th->th_ack, tp->snd_max)) { - tcpstat.tcps_rcvacktoomuch++; + tcpc_inc(tcpc_rcvacktoomuch); goto dropafterack_ratelim; } acked = th->th_ack - tp->snd_una; - tcpstat.tcps_rcvackpack++; - tcpstat.tcps_rcvackbyte += acked; + tcpc_pkt(tcpc_rcvackpack, tcpc_rcvackbyte, acked); /* * If we have a timestamp reply, update smoothed @@ -1966,7 +1983,7 @@ step6: /* keep track of pure window updates */ if (tlen == 0 && tp->snd_wl2 == th->th_ack && tiwin > tp->snd_wnd) - tcpstat.tcps_rcvwinupd++; + tcpc_inc(tcpc_rcvwinupd); tp->snd_wnd = tiwin; tp->snd_wl1 = th->th_seq; tp->snd_wl2 = th->th_ack; @@ -2052,8 +2069,7 @@ dodata: /* XXX */ TCP_SETUP_ACK(tp, tiflags, m); tp->rcv_nxt += tlen; tiflags = th->th_flags & TH_FIN; - tcpstat.tcps_rcvpack++; - tcpstat.tcps_rcvbyte += tlen; + tcpc_pkt(tcpc_rcvpack, tcpc_rcvbyte, tlen); ND6_HINT(tp); if (so->so_state & SS_CANTRCVMORE) m_freem(m); @@ -2165,7 +2181,7 @@ badsyn: /* * Received a bad SYN. Increment counters and dropwithreset. */ - tcpstat.tcps_badsyn++; + tcpc_inc(tcpc_badsyn); tp = NULL; goto dropwithreset; @@ -2392,7 +2408,7 @@ tcp_dooptions(struct tcpcb *tp, u_char * } if ((sigp ? TF_SIGNATURE : 0) ^ (tp->t_flags & TF_SIGNATURE)) { - tcpstat.tcps_rcvbadsig++; + tcpc_inc(tcpc_rcvbadsig); return (-1); } @@ -2400,7 +2416,7 @@ tcp_dooptions(struct tcpcb *tp, u_char * char sig[16]; if (tdb == NULL) { - tcpstat.tcps_rcvbadsig++; + tcpc_inc(tcpc_rcvbadsig); return (-1); } @@ -2408,11 +2424,11 @@ tcp_dooptions(struct tcpcb *tp, u_char * return (-1); if (timingsafe_bcmp(sig, sigp, 16)) { - tcpstat.tcps_rcvbadsig++; + tcpc_inc(tcpc_rcvbadsig); return (-1); } - tcpstat.tcps_rcvgoodsig++; + tcpc_inc(tcpc_rcvgoodsig); } #endif /* TCP_SIGNATURE */ @@ -2550,7 +2566,7 @@ tcp_sack_option(struct tcpcb *tp, struct /* Note: TCPOLEN_SACK must be 2*sizeof(tcp_seq) */ tmp_cp = cp + 2; tmp_olen = optlen - 2; - tcpstat.tcps_sack_rcv_opts++; + tcpc_inc(tcpc_sack_rcv_opts); if (tp->snd_numholes < 0) tp->snd_numholes = 0; if (tp->t_maxseg == 0) @@ -2870,7 +2886,7 @@ tcp_xmit_timer(struct tcpcb *tp, int rtt else if (rtt > TCP_RTT_MAX) rtt = TCP_RTT_MAX; - tcpstat.tcps_rttupdated++; + tcpc_inc(tcpc_rttupdated); if (tp->t_srtt != 0) { /* * delta is fixed point with 2 (TCP_RTT_BASE_SHIFT) bits @@ -3272,6 +3288,7 @@ int tcp_syn_bucket_limit = 3*TCP_SYN_BUC int tcp_syn_use_limit = 100000; struct syn_cache_set tcp_syn_cache[2]; +struct tcpscstat syn_cache_stat; int tcp_syn_cache_active; #define SYN_HASH(sa, sp, dp, rand) \ @@ -3412,7 +3429,7 @@ syn_cache_insert(struct syn_cache *sc, s } } arc4random_buf(set->scs_random, sizeof(set->scs_random)); - tcpstat.tcps_sc_seedrandom++; + syn_cache_stat.tcpsc_seedrandom++; } SYN_HASHALL(sc->sc_hash, &sc->sc_src.sa, &sc->sc_dst.sa, @@ -3425,7 +3442,7 @@ syn_cache_insert(struct syn_cache *sc, s * limit or the total cache size limit. */ if (scp->sch_length >= tcp_syn_bucket_limit) { - tcpstat.tcps_sc_bucketoverflow++; + syn_cache_stat.tcpsc_bucketoverflow++; /* * Someone might attack our bucket hash function. Reseed * with random as soon as the passive syn cache gets empty. @@ -3449,7 +3466,7 @@ syn_cache_insert(struct syn_cache *sc, s } else if (set->scs_count >= tcp_syn_cache_limit) { struct syn_cache_head *scp2, *sce; - tcpstat.tcps_sc_overflowed++; + syn_cache_stat.tcpsc_overflowed++; /* * The cache is full. Toss the oldest entry in the * first non-empty bucket we can find. @@ -3499,7 +3516,7 @@ syn_cache_insert(struct syn_cache *sc, s set->scs_count++; set->scs_use--; - tcpstat.tcps_sc_added++; + syn_cache_stat.tcpsc_added++; /* * If the active cache has exceeded its use limit and @@ -3543,7 +3560,7 @@ syn_cache_timer(void *arg) if (sc->sc_rxttot >= tcptv_keep_init) goto dropit; - tcpstat.tcps_sc_retransmitted++; + syn_cache_stat.tcpsc_retransmitted++; (void) syn_cache_respond(sc, NULL); /* Advance the timer back-off. */ @@ -3554,7 +3571,7 @@ syn_cache_timer(void *arg) return; dropit: - tcpstat.tcps_sc_timed_out++; + syn_cache_stat.tcpsc_timed_out++; syn_cache_rm(sc); syn_cache_put(sc); splx(s); @@ -3835,7 +3852,7 @@ syn_cache_get(struct sockaddr *src, stru #ifdef TCP_ECN if (sc->sc_flags & SCF_ECN_PERMIT) { tp->t_flags |= TF_ECN_PERMIT; - tcpstat.tcps_ecn_accepts++; + tcpc_inc(tcpc_ecn_accepts); } #endif #ifdef TCP_SACK @@ -3850,7 +3867,7 @@ syn_cache_get(struct sockaddr *src, stru tp->t_state = TCPS_SYN_RECEIVED; tp->t_rcvtime = tcp_now; TCP_TIMER_ARM(tp, TCPT_KEEP, tcptv_keep_init); - tcpstat.tcps_accepts++; + tcpc_inc(tcpc_accepts); tcp_mss(tp, sc->sc_peermaxseg); /* sets t_maxseg */ if (sc->sc_peermaxseg) @@ -3872,7 +3889,7 @@ syn_cache_get(struct sockaddr *src, stru tp->rcv_adv = tp->rcv_nxt + sc->sc_win; tp->last_ack_sent = tp->rcv_nxt; - tcpstat.tcps_sc_completed++; + syn_cache_stat.tcpsc_completed++; syn_cache_put(sc); return (so); @@ -3884,7 +3901,7 @@ abort: if (so != NULL) (void) soabort(so); syn_cache_put(sc); - tcpstat.tcps_sc_aborted++; + syn_cache_stat.tcpsc_aborted++; return ((struct socket *)(-1)); } @@ -3913,7 +3930,7 @@ syn_cache_reset(struct sockaddr *src, st } syn_cache_rm(sc); splx(s); - tcpstat.tcps_sc_reset++; + syn_cache_stat.tcpsc_reset++; syn_cache_put(sc); } @@ -3952,7 +3969,7 @@ syn_cache_unreach(struct sockaddr *src, syn_cache_rm(sc); splx(s); - tcpstat.tcps_sc_unreach++; + syn_cache_stat.tcpsc_unreach++; syn_cache_put(sc); } @@ -3980,6 +3997,8 @@ syn_cache_add(struct sockaddr *src, stru struct syn_cache *sc; struct syn_cache_head *scp; struct mbuf *ipopts; + struct counters_ref r; + uint64_t *tcpc; tp = sototcpcb(so); @@ -4035,7 +4054,7 @@ syn_cache_add(struct sockaddr *src, stru */ if ((sc = syn_cache_lookup(src, dst, &scp, sotoinpcb(so)->inp_rtableid)) != NULL) { - tcpstat.tcps_sc_dupesyn++; + syn_cache_stat.tcpsc_dupesyn++; if (ipopts) { /* * If we were remembering a previous source route, @@ -4047,8 +4066,10 @@ syn_cache_add(struct sockaddr *src, stru } sc->sc_timestamp = tb.ts_recent; if (syn_cache_respond(sc, m) == 0) { - tcpstat.tcps_sndacks++; - tcpstat.tcps_sndtotal++; + tcpc = counters_enter(&r, tcpcounters); + tcpc[tcpc_sndacks]++; + tcpc[tcpc_sndtotal]++; + counters_leave(&r, tcpcounters); } return (0); } @@ -4135,11 +4156,13 @@ syn_cache_add(struct sockaddr *src, stru sc->sc_tp = tp; if (syn_cache_respond(sc, m) == 0) { syn_cache_insert(sc, tp); - tcpstat.tcps_sndacks++; - tcpstat.tcps_sndtotal++; + tcpc = counters_enter(&r, tcpcounters); + tcpc[tcpc_sndacks]++; + tcpc[tcpc_sndtotal]++; + counters_leave(&r, tcpcounters); } else { syn_cache_put(sc); - tcpstat.tcps_sc_dropped++; + syn_cache_stat.tcpsc_dropped++; } return (0); Index: netinet/tcp_output.c =================================================================== RCS file: /cvs/src/sys/netinet/tcp_output.c,v retrieving revision 1.118 diff -u -p -r1.118 tcp_output.c --- netinet/tcp_output.c 19 Jul 2016 21:28:43 -0000 1.118 +++ netinet/tcp_output.c 17 Aug 2016 11:10:09 -0000 @@ -211,6 +211,8 @@ tcp_output(struct tcpcb *tp) u_char *opt = (u_char *)optbuf; unsigned int optlen, hdrlen, packetlen; int idle, sendalot = 0; + struct counters_ref r; + uint64_t *tcpc; #ifdef TCP_SACK int i, sack_rxmit = 0; struct sackhole *p; @@ -641,7 +643,7 @@ send: int count = 0; /* actual number of SACKs inserted */ int maxsack = (MAX_TCPOPTLEN - (optlen + 4))/TCPOLEN_SACK; - tcpstat.tcps_sack_snd_opts++; + tcpc_inc(tcpc_sack_snd_opts); maxsack = min(maxsack, TCP_MAX_SACK); for (i = 0; (i < tp->rcv_numsacks && count < maxsack); i++) { struct sackblk sack = tp->sackblks[i]; @@ -684,15 +686,17 @@ send: * the template for sends on this connection. */ if (len) { + tcpc = counters_enter(&r, tcpcounters); if (tp->t_force && len == 1) - tcpstat.tcps_sndprobe++; + tcpc[tcpc_sndprobe]++; else if (SEQ_LT(tp->snd_nxt, tp->snd_max)) { - tcpstat.tcps_sndrexmitpack++; - tcpstat.tcps_sndrexmitbyte += len; + tcpc[tcpc_sndrexmitpack]++; + tcpc[tcpc_sndrexmitbyte] += len; } else { - tcpstat.tcps_sndpack++; - tcpstat.tcps_sndbyte += len; + tcpc[tcpc_sndpack]++; + tcpc[tcpc_sndbyte] += len; } + counters_leave(&r, tcpcounters); #ifdef notyet if ((m = m_copypack(so->so_snd.sb_mb, off, (int)len, max_linkhdr + hdrlen)) == 0) { @@ -745,14 +749,16 @@ send: if (off + len == so->so_snd.sb_cc && !soissending(so)) flags |= TH_PUSH; } else { + tcpc = counters_enter(&r, tcpcounters); if (tp->t_flags & TF_ACKNOW) - tcpstat.tcps_sndacks++; + tcpc[tcpc_sndacks]++; else if (flags & (TH_SYN|TH_FIN|TH_RST)) - tcpstat.tcps_sndctrl++; + tcpc[tcpc_sndctrl]++; else if (SEQ_GT(tp->snd_up, tp->snd_una)) - tcpstat.tcps_sndurg++; + tcpc[tcpc_sndurg]++; else - tcpstat.tcps_sndwinup++; + tcpc[tcpc_sndwinup]++; + counters_leave(&r, tcpcounters); MGETHDR(m, M_DONTWAIT, MT_HEADER); if (m != NULL && max_linkhdr + hdrlen > MHLEN) { @@ -823,8 +829,7 @@ send: #if defined(TCP_SACK) && defined(TCP_FACK) tp->retran_data += len; #endif /* TCP_FACK */ - tcpstat.tcps_sack_rexmits++; - tcpstat.tcps_sack_rexmit_bytes += len; + tcpc_pkt(tcpc_sack_rexmits, tcpc_sack_rexmit_bytes, len); } #endif /* TCP_SACK */ @@ -841,7 +846,7 @@ send: */ if (tp->t_flags & TF_RCVD_CE) { flags |= TH_ECE; - tcpstat.tcps_ecn_sndece++; + tcpc_inc(tcpc_ecn_sndece); } if (!(tp->t_flags & TF_DISABLE_ECN)) { /* @@ -862,7 +867,7 @@ send: (tp->t_flags & TF_SEND_CWR)) { flags |= TH_CWR; tp->t_flags &= ~TF_SEND_CWR; - tcpstat.tcps_ecn_sndcwr++; + tcpc_inc(tcpc_ecn_sndcwr); } } #endif @@ -982,7 +987,7 @@ send: if (tp->t_rtttime == 0) { tp->t_rtttime = tcp_now; tp->t_rtseq = startseq; - tcpstat.tcps_segstimed++; + tcpc_inc(tcpc_segstimed); } } @@ -1073,7 +1078,7 @@ send: /* don't set ECT */ } else { needect = 1; - tcpstat.tcps_ecn_sndect++; + tcpc_inc(tcpc_ecn_sndect); } } #endif @@ -1172,9 +1177,11 @@ out: if (packetlen > tp->t_pmtud_mtu_sent) tp->t_pmtud_mtu_sent = packetlen; - tcpstat.tcps_sndtotal++; + tcpc = counters_enter(&r, tcpcounters); + tcpc[tcpc_sndtotal]++; if (tp->t_flags & TF_DELACK) - tcpstat.tcps_delack++; + tcpc[tcpc_delack]++; + counters_leave(&r, tcpcounters); /* * Data sent (as far as we can tell). Index: netinet/tcp_subr.c =================================================================== RCS file: /cvs/src/sys/netinet/tcp_subr.c,v retrieving revision 1.151 diff -u -p -r1.151 tcp_subr.c --- netinet/tcp_subr.c 7 Mar 2016 18:44:00 -0000 1.151 +++ netinet/tcp_subr.c 17 Aug 2016 11:10:09 -0000 @@ -131,7 +131,7 @@ struct pool tcpqe_pool; struct pool sackhl_pool; #endif -struct tcpstat tcpstat; /* tcp statistics */ +struct cpumem *tcpcounters; /* tcp statistics */ tcp_seq tcp_iss; /* @@ -140,6 +140,7 @@ tcp_seq tcp_iss; void tcp_init(void) { + tcpcounters = counters_alloc(tcpc_ncounters, M_PCB); tcp_iss = 1; /* wrong */ pool_init(&tcpcb_pool, sizeof(struct tcpcb), 0, 0, 0, "tcpcb", NULL); pool_init(&tcpqe_pool, sizeof(struct tcpqent), 0, 0, 0, "tcpqe", NULL); @@ -498,9 +499,9 @@ tcp_drop(tp, errno) if (TCPS_HAVERCVDSYN(tp->t_state)) { tp->t_state = TCPS_CLOSED; (void) tcp_output(tp); - tcpstat.tcps_drops++; + tcpc_inc(tcpc_drops); } else - tcpstat.tcps_conndrops++; + tcpc_inc(tcpc_conndrops); if (errno == ETIMEDOUT && tp->t_softerror) errno = tp->t_softerror; so->so_error = errno; @@ -559,7 +560,7 @@ tcp_reaper(void *arg) s = splsoftnet(); pool_put(&tcpcb_pool, tp); splx(s); - tcpstat.tcps_closed++; + tcpc_inc(tcpc_closed); } int Index: netinet/tcp_timer.c =================================================================== RCS file: /cvs/src/sys/netinet/tcp_timer.c,v retrieving revision 1.49 diff -u -p -r1.49 tcp_timer.c --- netinet/tcp_timer.c 7 Mar 2016 18:44:00 -0000 1.49 +++ netinet/tcp_timer.c 17 Aug 2016 11:10:09 -0000 @@ -234,12 +234,12 @@ tcp_timer_rexmt(void *arg) #endif if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) { tp->t_rxtshift = TCP_MAXRXTSHIFT; - tcpstat.tcps_timeoutdrop++; + tcpc_inc(tcpc_timeoutdrop); (void)tcp_drop(tp, tp->t_softerror ? tp->t_softerror : ETIMEDOUT); goto out; } - tcpstat.tcps_rexmttimeo++; + tcpc_inc(tcpc_rexmttimeo); rto = TCP_REXMTVAL(tp); if (rto < tp->t_rttmin) rto = tp->t_rttmin; @@ -371,7 +371,7 @@ tcp_timer_rexmt(void *arg) tp->t_flags |= TF_SEND_CWR; #endif #if 1 /* TCP_ECN */ - tcpstat.tcps_cwr_timeout++; + tcpc_inc(tcpc_cwr_timeout); #endif } (void) tcp_output(tp); @@ -393,7 +393,7 @@ tcp_timer_persist(void *arg) splx(s); return; } - tcpstat.tcps_persisttimeo++; + tcpc_inc(tcpc_persisttimeo); /* * Hack: if the peer is dead/unreachable, we do not * time out if the window is closed. After a full @@ -407,7 +407,7 @@ tcp_timer_persist(void *arg) if (tp->t_rxtshift == TCP_MAXRXTSHIFT && ((tcp_now - tp->t_rcvtime) >= tcp_maxpersistidle || (tcp_now - tp->t_rcvtime) >= rto * tcp_totbackoff)) { - tcpstat.tcps_persistdrop++; + tcpc_inc(tcpc_persistdrop); tp = tcp_drop(tp, ETIMEDOUT); goto out; } @@ -431,7 +431,7 @@ tcp_timer_keep(void *arg) return; } - tcpstat.tcps_keeptimeo++; + tcpc_inc(tcpc_keeptimeo); if (TCPS_HAVEESTABLISHED(tp->t_state) == 0) goto dropit; if ((tcp_always_keepalive || @@ -452,7 +452,7 @@ tcp_timer_keep(void *arg) * by the protocol spec, this requires the * correspondent TCP to respond. */ - tcpstat.tcps_keepprobe++; + tcpc_inc(tcpc_keepprobe); tcp_respond(tp, mtod(tp->t_template, caddr_t), NULL, tp->rcv_nxt, tp->snd_una - 1, 0, 0); TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepintvl); @@ -463,7 +463,7 @@ tcp_timer_keep(void *arg) return; dropit: - tcpstat.tcps_keepdrops++; + tcpc_inc(tcpc_keepdrops); tp = tcp_drop(tp, ETIMEDOUT); splx(s); Index: netinet/tcp_usrreq.c =================================================================== RCS file: /cvs/src/sys/netinet/tcp_usrreq.c,v retrieving revision 1.134 diff -u -p -r1.134 tcp_usrreq.c --- netinet/tcp_usrreq.c 20 Jul 2016 19:57:53 -0000 1.134 +++ netinet/tcp_usrreq.c 17 Aug 2016 11:10:09 -0000 @@ -116,6 +116,7 @@ int *tcpctl_vars[TCPCTL_MAXID] = TCPCTL_ struct inpcbtable tcbtable; int tcp_ident(void *, size_t *, void *, size_t, int); +int tcp_stats(void *, size_t *, void *, size_t); /* * Process a TCP user request for TCP tb. If this is a send request @@ -288,7 +289,7 @@ tcp_usrreq(so, req, m, nam, control, p) tcp_rscale(tp, sb_max); soisconnecting(so); - tcpstat.tcps_connattempt++; + tcpc_inc(tcpc_connattempt); tp->t_state = TCPS_SYN_SENT; TCP_TIMER_ARM(tp, TCPT_KEEP, tcptv_keep_init); tcp_set_iss_tsm(tp); @@ -825,7 +826,7 @@ tcp_ident(void *oldp, size_t *oldlenp, v } if (inp == NULL) { - ++tcpstat.tcps_pcbhashmiss; + tcpc_inc(tcpc_pcbhashmiss); switch (tir.faddr.ss_family) { #ifdef INET6 case AF_INET6: @@ -934,28 +935,7 @@ tcp_sysctl(name, namelen, oldp, oldlenp, #endif case TCPCTL_STATS: - if (newp != NULL) - return (EPERM); - { - struct syn_cache_set *set; - int i; - - set = &tcp_syn_cache[tcp_syn_cache_active]; - tcpstat.tcps_sc_hash_size = set->scs_size; - tcpstat.tcps_sc_entry_count = set->scs_count; - tcpstat.tcps_sc_entry_limit = tcp_syn_cache_limit; - tcpstat.tcps_sc_bucket_maxlen = 0; - for (i = 0; i < set->scs_size; i++) { - if (tcpstat.tcps_sc_bucket_maxlen < - set->scs_buckethead[i].sch_length) - tcpstat.tcps_sc_bucket_maxlen = - set->scs_buckethead[i].sch_length; - } - tcpstat.tcps_sc_bucket_limit = tcp_syn_bucket_limit; - tcpstat.tcps_sc_uses_left = set->scs_use; - } - return (sysctl_struct(oldp, oldlenp, newp, newlen, - &tcpstat, sizeof(tcpstat))); + return tcp_stats(oldp, oldlenp, newp, newlen); case TCPCTL_SYN_USE_LIMIT: error = sysctl_int(oldp, oldlenp, newp, newlen, @@ -1002,6 +982,152 @@ tcp_sysctl(name, namelen, oldp, oldlenp, return (ENOPROTOOPT); } /* NOTREACHED */ +} + +int +tcp_stats(void *oldp, size_t *oldlenp, void *newp, size_t newlen) +{ + struct tcpstat tcpstat; + uint64_t tcpc[tcpc_ncounters]; + extern struct tcpscstat syn_cache_stat; + struct syn_cache_set *set; + int i; + + if (newp != NULL) + return (EPERM); + + counters_read(tcpcounters, tcpc, nitems(tcpc)); + +#define TCP_C2S(n) tcpstat.tcps_##n = tcpc[tcpc_##n] +#define TCP_SC2S(n) tcpstat.tcps_sc_##n = syn_cache_stat.tcpsc_##n + + TCP_C2S(connattempt); + TCP_C2S(accepts); + TCP_C2S(connects); + TCP_C2S(drops); + TCP_C2S(conndrops); + TCP_C2S(closed); + TCP_C2S(segstimed); + TCP_C2S(rttupdated); + TCP_C2S(delack); + TCP_C2S(timeoutdrop); + TCP_C2S(rexmttimeo); + TCP_C2S(persisttimeo); + TCP_C2S(persistdrop); + TCP_C2S(keeptimeo); + TCP_C2S(keepprobe); + TCP_C2S(keepdrops); + + TCP_C2S(sndtotal); + TCP_C2S(sndpack); + TCP_C2S(sndbyte); + TCP_C2S(sndrexmitpack); + TCP_C2S(sndrexmitbyte); + TCP_C2S(sndrexmitfast); + TCP_C2S(sndacks); + TCP_C2S(sndprobe); + TCP_C2S(sndurg); + TCP_C2S(sndwinup); + TCP_C2S(sndctrl); + + TCP_C2S(rcvtotal); + TCP_C2S(rcvpack); + TCP_C2S(rcvbyte); + TCP_C2S(rcvbadsum); + TCP_C2S(rcvbadoff); + TCP_C2S(rcvmemdrop); + TCP_C2S(rcvnosec); + TCP_C2S(rcvshort); + TCP_C2S(rcvduppack); + TCP_C2S(rcvdupbyte); + TCP_C2S(rcvpartduppack); + TCP_C2S(rcvpartdupbyte); + TCP_C2S(rcvoopack); + TCP_C2S(rcvoobyte); + TCP_C2S(rcvpackafterwin); + TCP_C2S(rcvbyteafterwin); + TCP_C2S(rcvafterclose); + TCP_C2S(rcvwinprobe); + TCP_C2S(rcvdupack); + TCP_C2S(rcvacktoomuch); + TCP_C2S(rcvacktooold); + TCP_C2S(rcvackpack); + TCP_C2S(rcvackbyte); + TCP_C2S(rcvwinupd); + TCP_C2S(pawsdrop); + TCP_C2S(predack); + TCP_C2S(preddat); + + TCP_C2S(pcbhashmiss); + TCP_C2S(noport); + TCP_C2S(badsyn); + TCP_C2S(dropsyn); + + TCP_C2S(rcvbadsig); + TCP_C2S(rcvgoodsig); + TCP_C2S(inswcsum); + TCP_C2S(outswcsum); + + /* ECN stats */ + TCP_C2S(ecn_accepts); + TCP_C2S(ecn_rcvece); + TCP_C2S(ecn_rcvcwr); + TCP_C2S(ecn_rcvce); + TCP_C2S(ecn_sndect); + TCP_C2S(ecn_sndece); + TCP_C2S(ecn_sndcwr); + TCP_C2S(cwr_ecn); + TCP_C2S(cwr_frecovery); + TCP_C2S(cwr_timeout); + + /* These statistics deal with the SYN cache. */ + TCP_SC2S(added); + TCP_SC2S(completed); + TCP_SC2S(timed_out); + TCP_SC2S(overflowed); + TCP_SC2S(reset); + TCP_SC2S(unreach); + TCP_SC2S(bucketoverflow); + TCP_SC2S(aborted); + TCP_SC2S(dupesyn); + TCP_SC2S(dropped); + TCP_SC2S(collisions); + TCP_SC2S(retransmitted); + TCP_SC2S(seedrandom); + TCP_SC2S(hash_size); + TCP_SC2S(entry_count); + TCP_SC2S(bucket_maxlen); + TCP_SC2S(bucket_limit); + TCP_SC2S(uses_left); + + TCP_C2S(conndrained); + + TCP_C2S(sack_recovery_episode); + TCP_C2S(sack_rexmits); + TCP_C2S(sack_rexmit_bytes); + TCP_C2S(sack_rcv_opts); + TCP_C2S(sack_snd_opts); + +#undef TCP_C2S +#undef TCP_SC2S + + set = &tcp_syn_cache[tcp_syn_cache_active]; + tcpstat.tcps_sc_hash_size = set->scs_size; + tcpstat.tcps_sc_entry_count = set->scs_count; + tcpstat.tcps_sc_entry_limit = tcp_syn_cache_limit; + tcpstat.tcps_sc_bucket_maxlen = 0; + for (i = 0; i < set->scs_size; i++) { + if (tcpstat.tcps_sc_bucket_maxlen < + set->scs_buckethead[i].sch_length) { + tcpstat.tcps_sc_bucket_maxlen = + set->scs_buckethead[i].sch_length; + } + } + tcpstat.tcps_sc_bucket_limit = tcp_syn_bucket_limit; + tcpstat.tcps_sc_uses_left = set->scs_use; + + return (sysctl_struct(oldp, oldlenp, newp, newlen, + &tcpstat, sizeof(tcpstat))); } /* Index: netinet/tcp_var.h =================================================================== RCS file: /cvs/src/sys/netinet/tcp_var.h,v retrieving revision 1.115 diff -u -p -r1.115 tcp_var.h --- netinet/tcp_var.h 20 Jul 2016 19:57:53 -0000 1.115 +++ netinet/tcp_var.h 17 Aug 2016 11:10:09 -0000 @@ -360,6 +360,146 @@ struct syn_cache_set { * Many of these should be kept per connection, * but that's inconvenient at the moment. */ + +#ifdef _KERNEL +enum tcpcounters { + tcpc_connattempt, /* connections initiated */ + tcpc_accepts, /* connections accepted */ + tcpc_connects, /* connections established */ + tcpc_drops, /* connections dropped */ + tcpc_conndrops, /* embryonic connections dropped */ + tcpc_closed, /* conn. closed (includes drops) */ + tcpc_segstimed, /* segs where we tried to get rtt */ + tcpc_rttupdated, /* times we succeeded */ + tcpc_delack, /* delayed acks sent */ + tcpc_timeoutdrop, /* conn. dropped in rxmt timeout */ + tcpc_rexmttimeo, /* retransmit timeouts */ + tcpc_persisttimeo, /* persist timeouts */ + tcpc_persistdrop, /* connections dropped in persist */ + tcpc_keeptimeo, /* keepalive timeouts */ + tcpc_keepprobe, /* keepalive probes sent */ + tcpc_keepdrops, /* connections dropped in keepalive */ + + tcpc_sndtotal, /* total packets sent */ + tcpc_sndpack, /* data packets sent */ + tcpc_sndbyte, /* data bytes sent */ + tcpc_sndrexmitpack, /* data packets retransmitted */ + tcpc_sndrexmitbyte, /* data bytes retransmitted */ + tcpc_sndrexmitfast, /* Fast retransmits */ + tcpc_sndacks, /* ack-only packets sent */ + tcpc_sndprobe, /* window probes sent */ + tcpc_sndurg, /* packets sent with URG only */ + tcpc_sndwinup, /* window update-only packets sent */ + tcpc_sndctrl, /* control (SYN|FIN|RST) packets sent */ + + tcpc_rcvtotal, /* total packets received */ + tcpc_rcvpack, /* packets received in sequence */ + tcpc_rcvbyte, /* bytes received in sequence */ + tcpc_rcvbadsum, /* packets received with ccksum errs */ + tcpc_rcvbadoff, /* packets received with bad offset */ + tcpc_rcvmemdrop, /* packets dropped for lack of memory */ + tcpc_rcvnosec, /* packets dropped for lack of ipsec */ + tcpc_rcvshort, /* packets received too short */ + tcpc_rcvduppack, /* duplicate-only packets received */ + tcpc_rcvdupbyte, /* duplicate-only bytes received */ + tcpc_rcvpartduppack, /* packets with some duplicate data */ + tcpc_rcvpartdupbyte, /* dup. bytes in part-dup. packets */ + tcpc_rcvoopack, /* out-of-order packets received */ + tcpc_rcvoobyte, /* out-of-order bytes received */ + tcpc_rcvpackafterwin, /* packets with data after window */ + tcpc_rcvbyteafterwin, /* bytes rcvd after window */ + tcpc_rcvafterclose, /* packets rcvd after "close" */ + tcpc_rcvwinprobe, /* rcvd window probe packets */ + tcpc_rcvdupack, /* rcvd duplicate acks */ + tcpc_rcvacktoomuch, /* rcvd acks for unsent data */ + tcpc_rcvacktooold, /* rcvd acks for old data */ + tcpc_rcvackpack, /* rcvd ack packets */ + tcpc_rcvackbyte, /* bytes acked by rcvd acks */ + tcpc_rcvwinupd, /* rcvd window update packets */ + tcpc_pawsdrop, /* segments dropped due to PAWS */ + tcpc_predack, /* times hdr predict ok for acks */ + tcpc_preddat, /* times hdr predict ok for data pkts */ + + tcpc_pcbhashmiss, /* input packets missing pcb hash */ + tcpc_noport, /* no socket on port */ + tcpc_badsyn, /* SYN packet with src==dst rcv'ed */ + tcpc_dropsyn, /* SYN packet dropped */ + + tcpc_rcvbadsig, /* rcvd bad/missing TCP signatures */ + tcpc_rcvgoodsig, /* rcvd good TCP signatures */ + tcpc_inswcsum, /* input software-checksummed packets */ + tcpc_outswcsum, /* output software-checksummed packets */ + + /* ECN stats */ + tcpc_ecn_accepts, /* ecn connections accepted */ + tcpc_ecn_rcvece, /* # of rcvd ece */ + tcpc_ecn_rcvcwr, /* # of rcvd cwr */ + tcpc_ecn_rcvce, /* # of rcvd ce in ip header */ + tcpc_ecn_sndect, /* # of cwr sent */ + tcpc_ecn_sndece, /* # of ece sent */ + tcpc_ecn_sndcwr, /* # of cwr sent */ + tcpc_cwr_ecn, /* # of cwnd reduced by ecn */ + tcpc_cwr_frecovery, /* # of cwnd reduced by fastrecovery */ + tcpc_cwr_timeout, /* # of cwnd reduced by timeout */ + + tcpc_conndrained, /* # of connections drained */ + + tcpc_sack_recovery_episode, /* SACK recovery episodes */ + tcpc_sack_rexmits, /* SACK rexmit segments */ + tcpc_sack_rexmit_bytes, /* SACK rexmit bytes */ + tcpc_sack_rcv_opts, /* SACK options received */ + tcpc_sack_snd_opts, /* SACK options sent */ + + tcpc_ncounters +}; + +/* These statistics deal with the SYN cache. */ +struct tcpscstat { + uint64_t tcpsc_added; /* # of entries added */ + uint64_t tcpsc_completed; /* # of connections completed */ + uint64_t tcpsc_timed_out; /* # of entries timed out */ + uint64_t tcpsc_overflowed; /* # dropped due to overflow */ + uint64_t tcpsc_reset; /* # dropped due to RST */ + uint64_t tcpsc_unreach; /* # dropped due to ICMP unreach */ + uint64_t tcpsc_bucketoverflow; /* # dropped due to bucket overflow */ + uint64_t tcpsc_aborted; /* # of entries aborted (no mem) */ + uint64_t tcpsc_dupesyn; /* # of duplicate SYNs received */ + uint64_t tcpsc_dropped; /* # of SYNs dropped (no route/mem) */ + uint64_t tcpsc_collisions; /* # of hash collisions */ + uint64_t tcpsc_retransmitted; /* # of retransmissions */ + uint64_t tcpsc_seedrandom; /* # of syn cache seeds with random */ + uint64_t tcpsc_hash_size; /* hash buckets in current syn cache */ + uint64_t tcpsc_entry_count; /* # of entries in current syn cache */ + uint64_t tcpsc_entry_limit; /* limit of syn cache entries */ + uint64_t tcpsc_bucket_maxlen; /* maximum # of entries in any bucket */ + uint64_t tcpsc_bucket_limit; /* limit of syn cache bucket list */ + uint64_t tcpsc_uses_left; /* use counter of current syn cache */ +}; + +#define tcpc_inc(_c) do { \ + struct counters_ref r; \ + uint64_t *tcpc = counters_enter(&r, tcpcounters); \ + tcpc[_c]++; \ + counters_leave(&r, tcpcounters); \ +} while (0) + +#define tcpc_add(_c, _v) do { \ + struct counters_ref r; \ + uint64_t *tcpc = counters_enter(&r, tcpcounters); \ + tcpc[_c] += (_v); \ + counters_leave(&r, tcpcounters); \ +} while (0) + +#define tcpc_pkt(_p, _b, _v) do { \ + struct counters_ref r; \ + uint64_t *tcpc = counters_enter(&r, tcpcounters); \ + tcpc[_p]++; \ + tcpc[_b] += (_v); \ + counters_leave(&r, tcpcounters); \ +} while (0) + +#endif /* _KERNEL */ + struct tcpstat { u_int32_t tcps_connattempt; /* connections initiated */ u_int32_t tcps_accepts; /* connections accepted */ @@ -566,9 +706,10 @@ struct tcp_ident_mapping { }; #ifdef _KERNEL +#include extern struct inpcbtable tcbtable; /* head of queue of active tcpcb's */ -extern struct tcpstat tcpstat; /* tcp statistics */ -extern u_int32_t tcp_now; /* for RFC 1323 timestamps */ +extern struct cpumem *tcpcounters; /* tcp statistics */ +extern u_int32_t tcp_now; /* for RFC 1323 timestamps */ extern int tcp_do_rfc1323; /* enabled/disabled? */ extern int tcptv_keep_init; /* time to keep alive the initial SYN packet */ extern int tcp_mssdflt; /* default maximum segment size */ Index: netinet6/ip6_output.c =================================================================== RCS file: /cvs/src/sys/netinet6/ip6_output.c,v retrieving revision 1.211 diff -u -p -r1.211 ip6_output.c --- netinet6/ip6_output.c 1 Jul 2016 18:18:57 -0000 1.211 +++ netinet6/ip6_output.c 17 Aug 2016 11:10:09 -0000 @@ -2861,7 +2861,7 @@ in6_proto_cksum_out(struct mbuf *m, stru if (!ifp || !(ifp->if_capabilities & IFCAP_CSUM_TCPv6) || ip6->ip6_nxt != IPPROTO_TCP || ifp->if_bridgeport != NULL) { - tcpstat.tcps_outswcsum++; + tcpc_inc(tcpc_outswcsum); in6_delayed_cksum(m, IPPROTO_TCP); m->m_pkthdr.csum_flags &= ~M_TCP_CSUM_OUT; /* Clear */ } Index: sys/mbuf.h =================================================================== RCS file: /cvs/src/sys/sys/mbuf.h,v retrieving revision 1.216 diff -u -p -r1.216 mbuf.h --- sys/mbuf.h 19 Jul 2016 08:13:45 -0000 1.216 +++ sys/mbuf.h 17 Aug 2016 11:10:09 -0000 @@ -236,6 +236,7 @@ struct mbuf { #define MT_FTABLE 5 /* fragment reassembly header */ #define MT_CONTROL 6 /* extra-data protocol message */ #define MT_OOBDATA 7 /* expedited data */ +#define MT_NTYPES 8 /* flowid field */ #define M_FLOWID_VALID 0x8000 /* is the flowid set */ @@ -397,6 +398,12 @@ struct mbstat { u_short m_mtypes[256]; /* type specific mbuf allocations */ }; +#define MBSTAT_TYPES MT_NTYPES +#define MBSTAT_DROPS (MBSTAT_TYPES + 0) +#define MBSTAT_WAIT (MBSTAT_TYPES + 1) +#define MBSTAT_DRAIN (MBSTAT_TYPES + 2) +#define MBSTAT_COUNT (MBSTAT_TYPES + 3) + #include struct mbuf_list { @@ -414,7 +421,6 @@ struct mbuf_queue { #ifdef _KERNEL -extern struct mbstat mbstat; extern int nmbclust; /* limit on the # of clusters */ extern int mblowat; /* mbuf low water mark */ extern int mcllowat; /* mbuf cluster low water mark */ @@ -423,6 +429,7 @@ extern int max_protohdr; /* largest pro extern int max_hdr; /* largest link+protocol header */ void mbinit(void); +void mbcache(void); struct mbuf *m_copym2(struct mbuf *, int, int, int); struct mbuf *m_copym(struct mbuf *, int, int, int); struct mbuf *m_free(struct mbuf *); Index: sys/percpu.h =================================================================== RCS file: sys/percpu.h diff -N sys/percpu.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/percpu.h 17 Aug 2016 11:10:09 -0000 @@ -0,0 +1,171 @@ +/* $OpenBSD$ */ + +/* + * Copyright (c) 2016 David Gwynne + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef _SYS_PERCPU_H_ +#define _SYS_PERCPU_H_ + +#include + +#ifndef CACHELINESIZE +#define CACHELINESIZE 64 +#endif + +#ifndef __upunused /* this should go in param.h */ +#ifdef MULTIPROCESSOR +#define __upunused +#else +#define __upunused __attribute__((__unused__)) +#endif +#endif + +struct cpumem { + void *mem; +}; + +struct cpumem_iter { + unsigned int cpu; +} __upunused; + +struct counters_ref { + uint64_t g; + uint64_t *c; +}; + +#ifdef _KERNEL +struct pool; + +struct cpumem *cpumem_get(struct pool *); +void cpumem_put(struct pool *, struct cpumem *); + +struct cpumem *cpumem_malloc(size_t, int); +struct cpumem *cpumem_realloc(struct cpumem *, size_t, int); +void cpumem_free(struct cpumem *, int, size_t); + +#ifdef MULTIPROCESSOR +static inline void * +cpumem_enter(struct cpumem *cm) +{ + return (cm[cpu_number()].mem); +} + +static inline void +cpumem_leave(struct cpumem *cm, void *mem) +{ + /* KDASSERT? */ +} + +void *cpumem_first(struct cpumem_iter *, struct cpumem *); +void *cpumem_next(struct cpumem_iter *, struct cpumem *); + +#define CPUMEM_BOOT_MEMORY(_name, _sz) \ +static struct { \ + unsigned char mem[_sz]; \ + struct cpumem cpumem; \ +} __aligned(CACHELINESIZE) _name##_boot_cpumem = { \ + .cpumem = { _name##_boot_cpumem.mem } \ +} + +#define CPUMEM_BOOT_INITIALIZER(_name) \ + { &_name##_boot_cpumem.cpumem } + +#else /* MULTIPROCESSOR */ +static inline void * +cpumem_enter(struct cpumem *cm) +{ + return (cm); +} + +static inline void +cpumem_leave(struct cpumem *cm, void *mem) +{ + /* KDASSERT? */ +} + +static inline void * +cpumem_first(struct cpumem_iter *i, struct cpumem *cm) +{ + return (cm); +} + +static inline void * +cpumem_next(struct cpumem_iter *i, struct cpumem *cm) +{ + return (NULL); +} + +#define CPUMEM_BOOT_MEMORY(_name, _sz) \ +static struct { \ + unsigned char mem[_sz]; \ +} _name##_boot_cpumem + +#define CPUMEM_BOOT_INITIALIZER(_name) \ + { (struct cpumem *)&_name##_boot_cpumem.mem } + +#endif /* MULTIPROCESSOR */ + +#define CPUMEM_FOREACH(_var, _iter, _cpumem) \ + for ((_var) = cpumem_first((_iter), (_cpumem)); \ + (_var) != NULL; \ + (_var) = cpumem_next((_iter), (_cpumem))) + +struct cpumem *counters_alloc(unsigned int, int); +struct cpumem *counters_realloc(struct cpumem *, unsigned int, int); +void counters_free(struct cpumem *, int, unsigned int); +void counters_read(struct cpumem *, uint64_t *, unsigned int); +void counters_zero(struct cpumem *, unsigned int); + +#ifdef MULTIPROCESSOR +static inline uint64_t * +counters_enter(struct counters_ref *ref, struct cpumem *cm) +{ + ref->c = cpumem_enter(cm); + ref->g = ++(*ref->c); /* make the generation number odd */ + return (ref->c + 1); +} + +static inline void +counters_leave(struct counters_ref *ref, struct cpumem *cm) +{ + membar_producer(); + (*ref->c) = ++ref->g; /* make the generation number even again */ + cpumem_leave(cm, ref->c); +} +#define COUNTERS_BOOT_MEMORY(_name, _n) \ + CPUMEM_BOOT_MEMORY(_name, ((_n) + 1) * sizeof(uint64_t)) +#else +static inline uint64_t * +counters_enter(struct counters_ref *r, struct cpumem *cm) +{ + r->c = cpumem_enter(cm); + return (r->c); +} + +static inline void +counters_leave(struct counters_ref *r, struct cpumem *cm) +{ + cpumem_leave(cm, r->c); +} + +#define COUNTERS_BOOT_MEMORY(_name, _n) \ + CPUMEM_BOOT_MEMORY(_name, (_n) * sizeof(uint64_t)) +#endif + +#define COUNTERS_BOOT_INITIALIZER(_name) CPUMEM_BOOT_INITIALIZER(_name) + +#endif /* _KERNEL */ +#endif /* _SYS_PERCPU_H_ */