3 * SPDX-License-Identifier: BSD-2-Clause
5 * Copyright (c) 2016-2018 Netflix, Inc.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 #include <sys/cdefs.h>
32 #include <sys/param.h>
35 #include <sys/kernel.h>
37 #include <sys/malloc.h>
38 #include <sys/mutex.h>
39 #include <sys/qmath.h>
40 #include <sys/queue.h>
41 #include <sys/refcount.h>
42 #include <sys/rwlock.h>
43 #include <sys/socket.h>
44 #include <sys/socketvar.h>
45 #include <sys/sysctl.h>
47 #include <sys/stats.h> /* Must come after qmath.h and tree.h */
48 #include <sys/counter.h>
49 #include <dev/tcp_log/tcp_log_dev.h>
52 #include <net/if_var.h>
55 #include <netinet/in.h>
56 #include <netinet/in_pcb.h>
57 #include <netinet/in_var.h>
58 #include <netinet/tcp_var.h>
59 #include <netinet/tcp_log_buf.h>
60 #include <netinet/tcp_seq.h>
61 #include <netinet/tcp_hpts.h>
63 /* Default expiry time */
64 #define TCP_LOG_EXPIRE_TIME ((sbintime_t)60 * SBT_1S)
66 /* Max interval at which to run the expiry timer */
67 #define TCP_LOG_EXPIRE_INTVL ((sbintime_t)5 * SBT_1S)
70 static uma_zone_t tcp_log_id_bucket_zone
, tcp_log_id_node_zone
, tcp_log_zone
;
71 static int tcp_log_session_limit
= TCP_LOG_BUF_DEFAULT_SESSION_LIMIT
;
72 static uint32_t tcp_log_version
= TCP_LOG_BUF_VER
;
73 RB_HEAD(tcp_log_id_tree
, tcp_log_id_bucket
);
74 static struct tcp_log_id_tree tcp_log_id_head
;
75 static STAILQ_HEAD(, tcp_log_id_node
) tcp_log_expireq_head
=
76 STAILQ_HEAD_INITIALIZER(tcp_log_expireq_head
);
77 static struct mtx tcp_log_expireq_mtx
;
78 static struct callout tcp_log_expireq_callout
;
79 static u_long tcp_log_auto_ratio
= 0;
80 static volatile u_long tcp_log_auto_ratio_cur
= 0;
81 static uint32_t tcp_log_auto_mode
= TCP_LOG_STATE_TAIL
;
82 static bool tcp_log_auto_all
= false;
83 static uint32_t tcp_disable_all_bb_logs
= 0;
85 RB_PROTOTYPE_STATIC(tcp_log_id_tree
, tcp_log_id_bucket
, tlb_rb
, tcp_log_id_cmp
)
87 SYSCTL_NODE(_net_inet_tcp
, OID_AUTO
, bb
, CTLFLAG_RW
| CTLFLAG_MPSAFE
, 0,
88 "TCP Black Box controls");
90 SYSCTL_NODE(_net_inet_tcp_bb
, OID_AUTO
, tp
, CTLFLAG_RW
| CTLFLAG_MPSAFE
, 0,
91 "TCP Black Box Trace Point controls");
93 SYSCTL_BOOL(_net_inet_tcp_bb
, OID_AUTO
, log_verbose
, CTLFLAG_RW
, &tcp_log_verbose
,
94 0, "Force verbose logging for TCP traces");
96 SYSCTL_INT(_net_inet_tcp_bb
, OID_AUTO
, log_session_limit
,
97 CTLFLAG_RW
, &tcp_log_session_limit
, 0,
98 "Maximum number of events maintained for each TCP session");
100 uint32_t tcp_trace_point_config
= 0;
101 SYSCTL_U32(_net_inet_tcp_bb_tp
, OID_AUTO
, number
, CTLFLAG_RW
,
102 &tcp_trace_point_config
, TCP_LOG_STATE_HEAD_AUTO
,
103 "What is the trace point number to activate (0=none, 0xffffffff = all)?");
105 uint32_t tcp_trace_point_bb_mode
= TCP_LOG_STATE_CONTINUAL
;
106 SYSCTL_U32(_net_inet_tcp_bb_tp
, OID_AUTO
, bbmode
, CTLFLAG_RW
,
107 &tcp_trace_point_bb_mode
, TCP_LOG_STATE_HEAD_AUTO
,
108 "What is BB logging mode that is activated?");
110 int32_t tcp_trace_point_count
= 0;
111 SYSCTL_U32(_net_inet_tcp_bb_tp
, OID_AUTO
, count
, CTLFLAG_RW
,
112 &tcp_trace_point_count
, TCP_LOG_STATE_HEAD_AUTO
,
113 "How many connections will have BB logging turned on that hit the tracepoint?");
117 SYSCTL_UMA_MAX(_net_inet_tcp_bb
, OID_AUTO
, log_global_limit
, CTLFLAG_RW
,
118 &tcp_log_zone
, "Maximum number of events maintained for all TCP sessions");
120 SYSCTL_UMA_CUR(_net_inet_tcp_bb
, OID_AUTO
, log_global_entries
, CTLFLAG_RD
,
121 &tcp_log_zone
, "Current number of events maintained for all TCP sessions");
123 SYSCTL_UMA_MAX(_net_inet_tcp_bb
, OID_AUTO
, log_id_limit
, CTLFLAG_RW
,
124 &tcp_log_id_bucket_zone
, "Maximum number of log IDs");
126 SYSCTL_UMA_CUR(_net_inet_tcp_bb
, OID_AUTO
, log_id_entries
, CTLFLAG_RD
,
127 &tcp_log_id_bucket_zone
, "Current number of log IDs");
129 SYSCTL_UMA_MAX(_net_inet_tcp_bb
, OID_AUTO
, log_id_tcpcb_limit
, CTLFLAG_RW
,
130 &tcp_log_id_node_zone
, "Maximum number of tcpcbs with log IDs");
132 SYSCTL_UMA_CUR(_net_inet_tcp_bb
, OID_AUTO
, log_id_tcpcb_entries
, CTLFLAG_RD
,
133 &tcp_log_id_node_zone
, "Current number of tcpcbs with log IDs");
135 SYSCTL_U32(_net_inet_tcp_bb
, OID_AUTO
, log_version
, CTLFLAG_RD
, &tcp_log_version
,
136 0, "Version of log formats exported");
138 SYSCTL_U32(_net_inet_tcp_bb
, OID_AUTO
, disable_all
, CTLFLAG_RW
,
139 &tcp_disable_all_bb_logs
, 0,
140 "Disable all BB logging for all connections");
142 SYSCTL_ULONG(_net_inet_tcp_bb
, OID_AUTO
, log_auto_ratio
, CTLFLAG_RW
,
143 &tcp_log_auto_ratio
, 0, "Do auto capturing for 1 out of N sessions");
145 SYSCTL_U32(_net_inet_tcp_bb
, OID_AUTO
, log_auto_mode
, CTLFLAG_RW
,
146 &tcp_log_auto_mode
, 0,
147 "Logging mode for auto-selected sessions (default is TCP_LOG_STATE_TAIL)");
149 SYSCTL_BOOL(_net_inet_tcp_bb
, OID_AUTO
, log_auto_all
, CTLFLAG_RW
,
150 &tcp_log_auto_all
, 0,
151 "Auto-select from all sessions (rather than just those with IDs)");
153 #ifdef TCPLOG_DEBUG_COUNTERS
154 counter_u64_t tcp_log_queued
;
155 counter_u64_t tcp_log_que_fail1
;
156 counter_u64_t tcp_log_que_fail2
;
157 counter_u64_t tcp_log_que_fail3
;
158 counter_u64_t tcp_log_que_fail4
;
159 counter_u64_t tcp_log_que_fail5
;
160 counter_u64_t tcp_log_que_copyout
;
161 counter_u64_t tcp_log_que_read
;
162 counter_u64_t tcp_log_que_freed
;
164 SYSCTL_COUNTER_U64(_net_inet_tcp_bb
, OID_AUTO
, queued
, CTLFLAG_RD
,
165 &tcp_log_queued
, "Number of entries queued");
166 SYSCTL_COUNTER_U64(_net_inet_tcp_bb
, OID_AUTO
, fail1
, CTLFLAG_RD
,
167 &tcp_log_que_fail1
, "Number of entries queued but fail 1");
168 SYSCTL_COUNTER_U64(_net_inet_tcp_bb
, OID_AUTO
, fail2
, CTLFLAG_RD
,
169 &tcp_log_que_fail2
, "Number of entries queued but fail 2");
170 SYSCTL_COUNTER_U64(_net_inet_tcp_bb
, OID_AUTO
, fail3
, CTLFLAG_RD
,
171 &tcp_log_que_fail3
, "Number of entries queued but fail 3");
172 SYSCTL_COUNTER_U64(_net_inet_tcp_bb
, OID_AUTO
, fail4
, CTLFLAG_RD
,
173 &tcp_log_que_fail4
, "Number of entries queued but fail 4");
174 SYSCTL_COUNTER_U64(_net_inet_tcp_bb
, OID_AUTO
, fail5
, CTLFLAG_RD
,
175 &tcp_log_que_fail5
, "Number of entries queued but fail 4");
176 SYSCTL_COUNTER_U64(_net_inet_tcp_bb
, OID_AUTO
, copyout
, CTLFLAG_RD
,
177 &tcp_log_que_copyout
, "Number of entries copied out");
178 SYSCTL_COUNTER_U64(_net_inet_tcp_bb
, OID_AUTO
, read
, CTLFLAG_RD
,
179 &tcp_log_que_read
, "Number of entries read from the queue");
180 SYSCTL_COUNTER_U64(_net_inet_tcp_bb
, OID_AUTO
, freed
, CTLFLAG_RD
,
181 &tcp_log_que_freed
, "Number of entries freed after reading");
185 #define TCPLOG_DEBUG_RINGBUF
187 /* Number of requests to consider a PBCID "active". */
188 #define ACTIVE_REQUEST_COUNT 10
190 /* Statistic tracking for "active" PBCIDs. */
191 static counter_u64_t tcp_log_pcb_ids_cur
;
192 static counter_u64_t tcp_log_pcb_ids_tot
;
194 SYSCTL_COUNTER_U64(_net_inet_tcp_bb
, OID_AUTO
, pcb_ids_cur
, CTLFLAG_RD
,
195 &tcp_log_pcb_ids_cur
, "Number of pcb IDs allocated in the system");
196 SYSCTL_COUNTER_U64(_net_inet_tcp_bb
, OID_AUTO
, pcb_ids_tot
, CTLFLAG_RD
,
197 &tcp_log_pcb_ids_tot
, "Total number of pcb IDs that have been allocated");
201 STAILQ_ENTRY(tcp_log_mem
) tlm_queue
;
202 struct tcp_log_buffer tlm_buf
;
203 struct tcp_log_verbose tlm_v
;
204 #ifdef TCPLOG_DEBUG_RINGBUF
205 volatile int tlm_refcnt
;
209 /* 60 bytes for the header, + 16 bytes for padding */
210 static uint8_t zerobuf
[76];
219 * A. You need a lock on the Tree to add/remove buckets.
220 * B. You need a lock on the bucket to add/remove nodes from the bucket.
221 * C. To change information in a node, you need the INP lock if the tln_closed
222 * field is false. Otherwise, you need the bucket lock. (Note that the
223 * tln_closed field can change at any point, so you need to recheck the
224 * entry after acquiring the INP lock.)
225 * D. To remove a node from the bucket, you must have that entry locked,
226 * according to the criteria of Rule C. Also, the node must not be on
228 * E. The exception to C is the expiry queue fields, which are locked by
229 * the TCPLOG_EXPIREQ lock.
231 * Buckets have a reference count. Each node is a reference. Further,
232 * other callers may add reference counts to keep a bucket from disappearing.
233 * You can add a reference as long as you own a lock sufficient to keep the
234 * bucket from disappearing. For example, a common use is:
235 * a. Have a locked INP, but need to lock the TCPID_BUCKET.
236 * b. Add a refcount on the bucket. (Safe because the INP lock prevents
237 * the TCPID_BUCKET from going away.)
238 * c. Drop the INP lock.
239 * d. Acquire a lock on the TCPID_BUCKET.
240 * e. Acquire a lock on the INP.
241 * f. Drop the refcount on the bucket.
242 * (At this point, the bucket may disappear.)
245 * You can acquire this with either the bucket or INP lock. Don't reverse it.
246 * When the expire code has committed to freeing a node, it resets the expiry
247 * time to SBT_MAX. That is the signal to everyone else that they should
248 * leave that node alone.
250 static struct rwlock tcp_id_tree_lock
;
251 #define TCPID_TREE_WLOCK() rw_wlock(&tcp_id_tree_lock)
252 #define TCPID_TREE_RLOCK() rw_rlock(&tcp_id_tree_lock)
253 #define TCPID_TREE_UPGRADE() rw_try_upgrade(&tcp_id_tree_lock)
254 #define TCPID_TREE_WUNLOCK() rw_wunlock(&tcp_id_tree_lock)
255 #define TCPID_TREE_RUNLOCK() rw_runlock(&tcp_id_tree_lock)
256 #define TCPID_TREE_WLOCK_ASSERT() rw_assert(&tcp_id_tree_lock, RA_WLOCKED)
257 #define TCPID_TREE_RLOCK_ASSERT() rw_assert(&tcp_id_tree_lock, RA_RLOCKED)
258 #define TCPID_TREE_UNLOCK_ASSERT() rw_assert(&tcp_id_tree_lock, RA_UNLOCKED)
260 #define TCPID_BUCKET_LOCK_INIT(tlb) mtx_init(&((tlb)->tlb_mtx), "tcp log id bucket", NULL, MTX_DEF)
261 #define TCPID_BUCKET_LOCK_DESTROY(tlb) mtx_destroy(&((tlb)->tlb_mtx))
262 #define TCPID_BUCKET_LOCK(tlb) mtx_lock(&((tlb)->tlb_mtx))
263 #define TCPID_BUCKET_UNLOCK(tlb) mtx_unlock(&((tlb)->tlb_mtx))
264 #define TCPID_BUCKET_LOCK_ASSERT(tlb) mtx_assert(&((tlb)->tlb_mtx), MA_OWNED)
265 #define TCPID_BUCKET_UNLOCK_ASSERT(tlb) mtx_assert(&((tlb)->tlb_mtx), MA_NOTOWNED)
267 #define TCPID_BUCKET_REF(tlb) refcount_acquire(&((tlb)->tlb_refcnt))
268 #define TCPID_BUCKET_UNREF(tlb) refcount_release(&((tlb)->tlb_refcnt))
270 #define TCPLOG_EXPIREQ_LOCK() mtx_lock(&tcp_log_expireq_mtx)
271 #define TCPLOG_EXPIREQ_UNLOCK() mtx_unlock(&tcp_log_expireq_mtx)
273 SLIST_HEAD(tcp_log_id_head
, tcp_log_id_node
);
275 struct tcp_log_id_bucket
278 * tlb_id must be first. This lets us use strcmp on
279 * (struct tcp_log_id_bucket *) and (char *) interchangeably.
281 char tlb_id
[TCP_LOG_ID_LEN
];
282 char tlb_tag
[TCP_LOG_TAG_LEN
];
283 RB_ENTRY(tcp_log_id_bucket
) tlb_rb
;
284 struct tcp_log_id_head tlb_head
;
286 volatile u_int tlb_refcnt
;
287 volatile u_int tlb_reqcnt
;
288 uint32_t tlb_loglimit
;
292 struct tcp_log_id_node
294 SLIST_ENTRY(tcp_log_id_node
) tln_list
;
295 STAILQ_ENTRY(tcp_log_id_node
) tln_expireq
; /* Locked by the expireq lock */
296 sbintime_t tln_expiretime
; /* Locked by the expireq lock */
299 * If INP is NULL, that means the connection has closed. We've
300 * saved the connection endpoint information and the log entries
301 * in the tln_ie and tln_entries members. We've also saved a pointer
302 * to the enclosing bucket here. If INP is not NULL, the information is
303 * in the PCB and not here.
305 struct inpcb
*tln_inp
;
306 struct tcpcb
*tln_tp
;
307 struct tcp_log_id_bucket
*tln_bucket
;
308 struct in_endpoints tln_ie
;
309 struct tcp_log_stailq tln_entries
;
311 volatile int tln_closed
;
315 enum tree_lock_state
{
321 /* Do we want to select this session for auto-logging? */
323 tcp_log_selectauto(void)
327 * If we are doing auto-capturing, figure out whether we will capture
330 if (tcp_log_auto_ratio
&&
331 (tcp_disable_all_bb_logs
== 0) &&
332 (atomic_fetchadd_long(&tcp_log_auto_ratio_cur
, 1) %
333 tcp_log_auto_ratio
) == 0)
339 tcp_log_id_cmp(struct tcp_log_id_bucket
*a
, struct tcp_log_id_bucket
*b
)
341 KASSERT(a
!= NULL
, ("tcp_log_id_cmp: argument a is unexpectedly NULL"));
342 KASSERT(b
!= NULL
, ("tcp_log_id_cmp: argument b is unexpectedly NULL"));
343 return strncmp(a
->tlb_id
, b
->tlb_id
, TCP_LOG_ID_LEN
);
346 RB_GENERATE_STATIC(tcp_log_id_tree
, tcp_log_id_bucket
, tlb_rb
, tcp_log_id_cmp
)
349 tcp_log_id_validate_tree_lock(int tree_locked
)
353 switch (tree_locked
) {
355 TCPID_TREE_WLOCK_ASSERT();
358 TCPID_TREE_RLOCK_ASSERT();
361 TCPID_TREE_UNLOCK_ASSERT();
364 kassert_panic("%s:%d: unknown tree lock state", __func__
,
371 tcp_log_remove_bucket(struct tcp_log_id_bucket
*tlb
)
374 TCPID_TREE_WLOCK_ASSERT();
375 KASSERT(SLIST_EMPTY(&tlb
->tlb_head
),
376 ("%s: Attempt to remove non-empty bucket", __func__
));
377 if (RB_REMOVE(tcp_log_id_tree
, &tcp_log_id_head
, tlb
) == NULL
) {
379 kassert_panic("%s:%d: error removing element from tree",
383 TCPID_BUCKET_LOCK_DESTROY(tlb
);
384 counter_u64_add(tcp_log_pcb_ids_cur
, (int64_t)-1);
385 uma_zfree(tcp_log_id_bucket_zone
, tlb
);
389 * Call with a referenced and locked bucket.
390 * Will return true if the bucket was freed; otherwise, false.
391 * tlb: The bucket to unreference.
392 * tree_locked: A pointer to the state of the tree lock. If the tree lock
393 * state changes, the function will update it.
394 * inp: If not NULL and the function needs to drop the inp lock to relock the
395 * tree, it will do so. (The caller must ensure inp will not become invalid,
396 * probably by holding a reference to it.)
399 tcp_log_unref_bucket(struct tcp_log_id_bucket
*tlb
, int *tree_locked
,
403 KASSERT(tlb
!= NULL
, ("%s: called with NULL tlb", __func__
));
404 KASSERT(tree_locked
!= NULL
, ("%s: called with NULL tree_locked",
407 tcp_log_id_validate_tree_lock(*tree_locked
);
410 * Did we hold the last reference on the tlb? If so, we may need
411 * to free it. (Note that we can realistically only execute the
412 * loop twice: once without a write lock and once with a write
415 while (TCPID_BUCKET_UNREF(tlb
)) {
417 * We need a write lock on the tree to free this.
418 * If we can upgrade the tree lock, this is "easy". If we
419 * can't upgrade the tree lock, we need to do this the
420 * "hard" way: unwind all our locks and relock everything.
421 * In the meantime, anything could have changed. We even
422 * need to validate that we still need to free the bucket.
424 if (*tree_locked
== TREE_RLOCKED
&& TCPID_TREE_UPGRADE())
425 *tree_locked
= TREE_WLOCKED
;
426 else if (*tree_locked
!= TREE_WLOCKED
) {
427 TCPID_BUCKET_REF(tlb
);
430 TCPID_BUCKET_UNLOCK(tlb
);
431 if (*tree_locked
== TREE_RLOCKED
)
432 TCPID_TREE_RUNLOCK();
434 *tree_locked
= TREE_WLOCKED
;
435 TCPID_BUCKET_LOCK(tlb
);
442 * We have an empty bucket and a write lock on the tree.
443 * Remove the empty bucket.
445 tcp_log_remove_bucket(tlb
);
452 * Call with a locked bucket. This function will release the lock on the
453 * bucket before returning.
455 * The caller is responsible for freeing the tp->t_lin/tln node!
457 * Note: one of tp or both tlb and tln must be supplied.
459 * inp: A pointer to the inp. If the function needs to drop the inp lock to
460 * acquire the tree write lock, it will do so. (The caller must ensure inp
461 * will not become invalid, probably by holding a reference to it.)
462 * tp: A pointer to the tcpcb. (optional; if specified, tlb and tln are ignored)
463 * tlb: A pointer to the bucket. (optional; ignored if tp is specified)
464 * tln: A pointer to the node. (optional; ignored if tp is specified)
465 * tree_locked: A pointer to the state of the tree lock. If the tree lock
466 * state changes, the function will update it.
468 * Will return true if the INP lock was reacquired; otherwise, false.
471 tcp_log_remove_id_node(struct inpcb
*inp
, struct tcpcb
*tp
,
472 struct tcp_log_id_bucket
*tlb
, struct tcp_log_id_node
*tln
,
475 int orig_tree_locked
;
477 KASSERT(tp
!= NULL
|| (tlb
!= NULL
&& tln
!= NULL
),
478 ("%s: called with tp=%p, tlb=%p, tln=%p", __func__
,
480 KASSERT(tree_locked
!= NULL
, ("%s: called with NULL tree_locked",
486 KASSERT(tlb
!= NULL
, ("%s: unexpectedly NULL tlb", __func__
));
487 KASSERT(tln
!= NULL
, ("%s: unexpectedly NULL tln", __func__
));
490 tcp_log_id_validate_tree_lock(*tree_locked
);
491 TCPID_BUCKET_LOCK_ASSERT(tlb
);
494 * Remove the node, clear the log bucket and node from the TCPCB, and
495 * decrement the bucket refcount. In the process, if this is the
496 * last reference, the bucket will be freed.
498 SLIST_REMOVE(&tlb
->tlb_head
, tln
, tcp_log_id_node
, tln_list
);
503 orig_tree_locked
= *tree_locked
;
504 if (!tcp_log_unref_bucket(tlb
, tree_locked
, inp
))
505 TCPID_BUCKET_UNLOCK(tlb
);
506 return (*tree_locked
!= orig_tree_locked
);
509 #define RECHECK_INP_CLEAN(cleanup) do { \
510 if (inp->inp_flags & INP_DROPPED) { \
515 tp = intotcpcb(inp); \
518 #define RECHECK_INP() RECHECK_INP_CLEAN(/* noop */)
521 tcp_log_grow_tlb(char *tlb_id
, struct tcpcb
*tp
)
524 INP_WLOCK_ASSERT(tptoinpcb(tp
));
527 if (V_tcp_perconn_stats_enable
== 2 && tp
->t_stats
== NULL
)
528 (void)tcp_stats_sample_rollthedice(tp
, tlb_id
, strlen(tlb_id
));
533 tcp_log_increment_reqcnt(struct tcp_log_id_bucket
*tlb
)
536 atomic_fetchadd_int(&tlb
->tlb_reqcnt
, 1);
540 tcp_log_apply_ratio(struct tcpcb
*tp
, int ratio
)
542 struct tcp_log_id_bucket
*tlb
;
543 struct inpcb
*inp
= tptoinpcb(tp
);
544 uint32_t hash
, ratio_hash_thresh
;
548 tree_locked
= TREE_UNLOCKED
;
551 INP_WLOCK_ASSERT(inp
);
557 ratio_hash_thresh
= max(1, UINT32_MAX
/ ratio
);
559 ratio_hash_thresh
= 0;
560 TCPID_BUCKET_REF(tlb
);
562 TCPID_BUCKET_LOCK(tlb
);
564 hash
= hash32_buf(tlb
->tlb_id
, strlen(tlb
->tlb_id
), 0);
565 if (hash
> ratio_hash_thresh
&& tp
->_t_logstate
== TCP_LOG_STATE_OFF
&&
566 tlb
->tlb_logstate
== TCP_LOG_STATE_OFF
) {
568 * Ratio decision not to log this log ID (and this connection by
569 * way of association). We only apply a log ratio log disable
570 * decision if it would not interfere with a log enable decision
571 * made elsewhere e.g. tcp_log_selectauto() or setsockopt().
573 tlb
->tlb_logstate
= TCP_LOG_STATE_RATIO_OFF
;
576 (void)tcp_log_state_change(tp
, TCP_LOG_STATE_OFF
);
581 INP_UNLOCK_ASSERT(inp
);
582 if (!tcp_log_unref_bucket(tlb
, &tree_locked
, NULL
))
583 TCPID_BUCKET_UNLOCK(tlb
);
585 if (tree_locked
== TREE_WLOCKED
) {
586 TCPID_TREE_WLOCK_ASSERT();
587 TCPID_TREE_WUNLOCK();
588 } else if (tree_locked
== TREE_RLOCKED
) {
589 TCPID_TREE_RLOCK_ASSERT();
590 TCPID_TREE_RUNLOCK();
592 TCPID_TREE_UNLOCK_ASSERT();
598 * Associate the specified tag with a particular TCP log ID.
599 * Called with INPCB locked. Returns with it unlocked.
600 * Returns 0 on success or EOPNOTSUPP if the connection has no TCP log ID.
603 tcp_log_set_tag(struct tcpcb
*tp
, char *tag
)
605 struct inpcb
*inp
= tptoinpcb(tp
);
606 struct tcp_log_id_bucket
*tlb
;
609 INP_WLOCK_ASSERT(inp
);
611 tree_locked
= TREE_UNLOCKED
;
618 TCPID_BUCKET_REF(tlb
);
620 TCPID_BUCKET_LOCK(tlb
);
621 strlcpy(tlb
->tlb_tag
, tag
, TCP_LOG_TAG_LEN
);
622 if (!tcp_log_unref_bucket(tlb
, &tree_locked
, NULL
))
623 TCPID_BUCKET_UNLOCK(tlb
);
625 if (tree_locked
== TREE_WLOCKED
) {
626 TCPID_TREE_WLOCK_ASSERT();
627 TCPID_TREE_WUNLOCK();
628 } else if (tree_locked
== TREE_RLOCKED
) {
629 TCPID_TREE_RLOCK_ASSERT();
630 TCPID_TREE_RUNLOCK();
632 TCPID_TREE_UNLOCK_ASSERT();
638 * Set the TCP log ID for a TCPCB.
639 * Called with INPCB locked. Returns with it unlocked.
642 tcp_log_set_id(struct tcpcb
*tp
, char *id
)
644 struct tcp_log_id_bucket
*tlb
, *tmp_tlb
;
645 struct tcp_log_id_node
*tln
;
646 struct inpcb
*inp
= tptoinpcb(tp
);
648 bool bucket_locked
, same
;
652 tree_locked
= TREE_UNLOCKED
;
653 bucket_locked
= false;
656 INP_WLOCK_ASSERT(inp
);
657 /* See if the ID is unchanged. */
658 same
= ((tp
->t_lib
!= NULL
&& !strcmp(tp
->t_lib
->tlb_id
, id
)) ||
659 (tp
->t_lib
== NULL
&& *id
== 0));
660 if (tp
->_t_logstate
&& STAILQ_FIRST(&tp
->t_logs
) && !same
) {
662 * There are residual logs left we may
663 * be changing id's so dump what we can.
665 switch(tp
->_t_logstate
) {
666 case TCP_LOG_STATE_HEAD_AUTO
:
667 (void)tcp_log_dump_tp_logbuf(tp
, "auto-dumped from head at id switch",
670 case TCP_LOG_STATE_TAIL_AUTO
:
671 (void)tcp_log_dump_tp_logbuf(tp
, "auto-dumped from tail at id switch",
674 case TCP_LOG_STATE_CONTINUAL
:
675 (void)tcp_log_dump_tp_logbuf(tp
, "auto-dumped from continual at id switch",
678 case TCP_LOG_VIA_BBPOINTS
:
679 (void)tcp_log_dump_tp_logbuf(tp
, "auto-dumped from bbpoints at id switch",
685 if (tp
->t_lib
!= NULL
) {
686 tcp_log_increment_reqcnt(tp
->t_lib
);
687 if ((tp
->t_lib
->tlb_logstate
> TCP_LOG_STATE_OFF
) &&
688 (tp
->t_log_state_set
== 0)) {
689 /* Clone in any logging */
691 tp
->_t_logstate
= tp
->t_lib
->tlb_logstate
;
693 if ((tp
->t_lib
->tlb_loglimit
) &&
694 (tp
->t_log_state_set
== 0)) {
695 /* We also have a limit set */
697 tp
->t_loglimit
= tp
->t_lib
->tlb_loglimit
;
705 * If the TCPCB had a previous ID, we need to extricate it from
708 * Drop the TCPCB lock and lock the tree and the bucket.
709 * Because this is called in the socket context, we (theoretically)
710 * don't need to worry about the INPCB completely going away
713 if (tp
->t_lib
!= NULL
) {
715 TCPID_BUCKET_REF(tlb
);
718 if (tree_locked
== TREE_UNLOCKED
) {
720 tree_locked
= TREE_RLOCKED
;
722 TCPID_BUCKET_LOCK(tlb
);
723 bucket_locked
= true;
727 * Unreference the bucket. If our bucket went away, it is no
728 * longer locked or valid.
730 if (tcp_log_unref_bucket(tlb
, &tree_locked
, inp
)) {
731 bucket_locked
= false;
735 /* Validate the INP. */
739 * Evaluate whether the bucket changed while we were unlocked.
741 * Possible scenarios here:
742 * 1. Bucket is unchanged and the same one we started with.
743 * 2. The TCPCB no longer has a bucket and our bucket was
745 * 3. The TCPCB has a new bucket, whether ours was freed.
746 * 4. The TCPCB no longer has a bucket and our bucket was
749 * In cases 2-4, we will start over. In case 1, we will
750 * proceed here to remove the bucket.
752 if (tlb
== NULL
|| tp
->t_lib
!= tlb
) {
753 KASSERT(bucket_locked
|| tlb
== NULL
,
754 ("%s: bucket_locked (%d) and tlb (%p) are "
755 "inconsistent", __func__
, bucket_locked
, tlb
));
758 TCPID_BUCKET_UNLOCK(tlb
);
759 bucket_locked
= false;
766 * Store the (struct tcp_log_id_node) for reuse. Then, remove
767 * it from the bucket. In the process, we may end up relocking.
768 * If so, we need to validate that the INP is still valid, and
769 * the TCPCB entries match we expect.
771 * We will clear tlb and change the bucket_locked state just
772 * before calling tcp_log_remove_id_node(), since that function
773 * will unlock the bucket.
776 uma_zfree(tcp_log_id_node_zone
, tln
);
779 bucket_locked
= false;
780 if (tcp_log_remove_id_node(inp
, tp
, NULL
, NULL
, &tree_locked
)) {
784 * If the TCPCB moved to a new bucket while we had
785 * dropped the lock, restart.
787 if (tp
->t_lib
!= NULL
|| tp
->t_lin
!= NULL
)
792 * Yay! We successfully removed the TCPCB from its old
795 * On to bigger and better things...
799 /* At this point, the TCPCB should not be in any bucket. */
800 KASSERT(tp
->t_lib
== NULL
, ("%s: tp->t_lib is not NULL", __func__
));
803 * If the new ID is not empty, we need to now assign this TCPCB to a
807 /* Get a new tln, if we don't already have one to reuse. */
809 tln
= uma_zalloc(tcp_log_id_node_zone
,
820 * Drop the INP lock for a bit. We don't need it, and dropping
821 * it prevents lock order reversals.
825 /* Make sure we have at least a read lock on the tree. */
826 tcp_log_id_validate_tree_lock(tree_locked
);
827 if (tree_locked
== TREE_UNLOCKED
) {
829 tree_locked
= TREE_RLOCKED
;
834 * Remember that we constructed (struct tcp_log_id_node) so
835 * we can safely cast the id to it for the purposes of finding.
837 KASSERT(tlb
== NULL
, ("%s:%d tlb unexpectedly non-NULL",
838 __func__
, __LINE__
));
839 tmp_tlb
= RB_FIND(tcp_log_id_tree
, &tcp_log_id_head
,
840 (struct tcp_log_id_bucket
*) id
);
843 * If we didn't find a matching bucket, we need to add a new
844 * one. This requires a write lock. But, of course, we will
845 * need to recheck some things when we re-acquire the lock.
847 if (tmp_tlb
== NULL
&& tree_locked
!= TREE_WLOCKED
) {
848 tree_locked
= TREE_WLOCKED
;
849 if (!TCPID_TREE_UPGRADE()) {
850 TCPID_TREE_RUNLOCK();
854 * The tree may have changed while we were
861 /* If we need to add a new bucket, do it now. */
862 if (tmp_tlb
== NULL
) {
863 /* Allocate new bucket. */
864 tlb
= uma_zalloc(tcp_log_id_bucket_zone
, M_NOWAIT
);
869 counter_u64_add(tcp_log_pcb_ids_cur
, 1);
870 counter_u64_add(tcp_log_pcb_ids_tot
, 1);
872 if ((tcp_log_auto_all
== false) &&
874 tcp_log_selectauto()) {
875 /* Save off the log state */
876 tlb
->tlb_logstate
= tcp_log_auto_mode
;
878 tlb
->tlb_logstate
= TCP_LOG_STATE_OFF
;
879 tlb
->tlb_loglimit
= 0;
880 tlb
->tlb_tag
[0] = '\0'; /* Default to an empty tag. */
883 * Copy the ID to the bucket.
884 * NB: Don't use strlcpy() unless you are sure
885 * we've always validated NULL termination.
887 * TODO: When I'm done writing this, see if we
888 * we have correctly validated NULL termination and
889 * can use strlcpy(). :-)
891 strncpy(tlb
->tlb_id
, id
, TCP_LOG_ID_LEN
- 1);
892 tlb
->tlb_id
[TCP_LOG_ID_LEN
- 1] = '\0';
895 * Take the refcount for the first node and go ahead
896 * and lock this. Note that we zero the tlb_mtx
897 * structure, since 0xdeadc0de flips the right bits
898 * for the code to think that this mutex has already
899 * been initialized. :-(
901 SLIST_INIT(&tlb
->tlb_head
);
902 refcount_init(&tlb
->tlb_refcnt
, 1);
904 memset(&tlb
->tlb_mtx
, 0, sizeof(struct mtx
));
905 TCPID_BUCKET_LOCK_INIT(tlb
);
906 TCPID_BUCKET_LOCK(tlb
);
907 bucket_locked
= true;
909 #define FREE_NEW_TLB() do { \
910 TCPID_BUCKET_LOCK_DESTROY(tlb); \
911 uma_zfree(tcp_log_id_bucket_zone, tlb); \
912 counter_u64_add(tcp_log_pcb_ids_cur, (int64_t)-1); \
913 counter_u64_add(tcp_log_pcb_ids_tot, (int64_t)-1); \
914 bucket_locked = false; \
918 * Relock the INP and make sure we are still
922 RECHECK_INP_CLEAN(FREE_NEW_TLB());
923 if (tp
->t_lib
!= NULL
) {
928 /* Add the new bucket to the tree. */
929 tmp_tlb
= RB_INSERT(tcp_log_id_tree
, &tcp_log_id_head
,
931 KASSERT(tmp_tlb
== NULL
,
932 ("%s: Unexpected conflicting bucket (%p) while "
933 "adding new bucket (%p)", __func__
, tmp_tlb
, tlb
));
936 * If we found a conflicting bucket, free the new
937 * one we made and fall through to use the existing
940 if (tmp_tlb
!= NULL
) {
947 /* If we found an existing bucket, use it. */
948 if (tmp_tlb
!= NULL
) {
950 TCPID_BUCKET_LOCK(tlb
);
951 bucket_locked
= true;
954 * Relock the INP and make sure we are still
957 INP_UNLOCK_ASSERT(inp
);
960 if (tp
->t_lib
!= NULL
) {
961 TCPID_BUCKET_UNLOCK(tlb
);
962 bucket_locked
= false;
967 /* Take a reference on the bucket. */
968 TCPID_BUCKET_REF(tlb
);
970 /* Record the request. */
971 tcp_log_increment_reqcnt(tlb
);
974 tcp_log_grow_tlb(tlb
->tlb_id
, tp
);
976 /* Add the new node to the list. */
977 SLIST_INSERT_HEAD(&tlb
->tlb_head
, tln
, tln_list
);
980 if (tp
->t_lib
->tlb_logstate
> TCP_LOG_STATE_OFF
) {
981 /* Clone in any logging */
983 tp
->_t_logstate
= tp
->t_lib
->tlb_logstate
;
985 if (tp
->t_lib
->tlb_loglimit
) {
986 /* The loglimit too */
988 tp
->t_loglimit
= tp
->t_lib
->tlb_loglimit
;
996 /* Unlock things, as needed, and return. */
999 INP_UNLOCK_ASSERT(inp
);
1000 if (bucket_locked
) {
1001 TCPID_BUCKET_LOCK_ASSERT(tlb
);
1002 TCPID_BUCKET_UNLOCK(tlb
);
1003 } else if (tlb
!= NULL
)
1004 TCPID_BUCKET_UNLOCK_ASSERT(tlb
);
1005 if (tree_locked
== TREE_WLOCKED
) {
1006 TCPID_TREE_WLOCK_ASSERT();
1007 TCPID_TREE_WUNLOCK();
1008 } else if (tree_locked
== TREE_RLOCKED
) {
1009 TCPID_TREE_RLOCK_ASSERT();
1010 TCPID_TREE_RUNLOCK();
1012 TCPID_TREE_UNLOCK_ASSERT();
1014 uma_zfree(tcp_log_id_node_zone
, tln
);
1019 * Get the TCP log ID for a TCPCB.
1020 * Called with INPCB locked.
1021 * 'buf' must point to a buffer that is at least TCP_LOG_ID_LEN bytes long.
1022 * Returns number of bytes copied.
1025 tcp_log_get_id(struct tcpcb
*tp
, char *buf
)
1029 INP_LOCK_ASSERT(tptoinpcb(tp
));
1030 if (tp
->t_lib
!= NULL
) {
1031 len
= strlcpy(buf
, tp
->t_lib
->tlb_id
, TCP_LOG_ID_LEN
);
1032 KASSERT(len
< TCP_LOG_ID_LEN
,
1033 ("%s:%d: tp->t_lib->tlb_id too long (%zu)",
1034 __func__
, __LINE__
, len
));
1043 * Get the tag associated with the TCPCB's log ID.
1044 * Called with INPCB locked. Returns with it unlocked.
1045 * 'buf' must point to a buffer that is at least TCP_LOG_TAG_LEN bytes long.
1046 * Returns number of bytes copied.
1049 tcp_log_get_tag(struct tcpcb
*tp
, char *buf
)
1051 struct inpcb
*inp
= tptoinpcb(tp
);
1052 struct tcp_log_id_bucket
*tlb
;
1056 INP_WLOCK_ASSERT(inp
);
1058 tree_locked
= TREE_UNLOCKED
;
1062 TCPID_BUCKET_REF(tlb
);
1064 TCPID_BUCKET_LOCK(tlb
);
1065 len
= strlcpy(buf
, tlb
->tlb_tag
, TCP_LOG_TAG_LEN
);
1066 KASSERT(len
< TCP_LOG_TAG_LEN
,
1067 ("%s:%d: tp->t_lib->tlb_tag too long (%zu)",
1068 __func__
, __LINE__
, len
));
1069 if (!tcp_log_unref_bucket(tlb
, &tree_locked
, NULL
))
1070 TCPID_BUCKET_UNLOCK(tlb
);
1072 if (tree_locked
== TREE_WLOCKED
) {
1073 TCPID_TREE_WLOCK_ASSERT();
1074 TCPID_TREE_WUNLOCK();
1075 } else if (tree_locked
== TREE_RLOCKED
) {
1076 TCPID_TREE_RLOCK_ASSERT();
1077 TCPID_TREE_RUNLOCK();
1079 TCPID_TREE_UNLOCK_ASSERT();
1090 * Get number of connections with the same log ID.
1091 * Log ID is taken from given TCPCB.
1092 * Called with INPCB locked.
1095 tcp_log_get_id_cnt(struct tcpcb
*tp
)
1098 INP_WLOCK_ASSERT(tptoinpcb(tp
));
1099 return ((tp
->t_lib
== NULL
) ? 0 : tp
->t_lib
->tlb_refcnt
);
1102 #ifdef TCPLOG_DEBUG_RINGBUF
1104 * Functions/macros to increment/decrement reference count for a log
1105 * entry. This should catch when we do a double-free/double-remove or
1109 _tcp_log_entry_refcnt_add(struct tcp_log_mem
*log_entry
, const char *func
,
1114 refcnt
= atomic_fetchadd_int(&log_entry
->tlm_refcnt
, 1);
1116 panic("%s:%d: log_entry(%p)->tlm_refcnt is %d (expected 0)",
1117 func
, line
, log_entry
, refcnt
);
1119 #define tcp_log_entry_refcnt_add(l) \
1120 _tcp_log_entry_refcnt_add((l), __func__, __LINE__)
1123 _tcp_log_entry_refcnt_rem(struct tcp_log_mem
*log_entry
, const char *func
,
1128 refcnt
= atomic_fetchadd_int(&log_entry
->tlm_refcnt
, -1);
1130 panic("%s:%d: log_entry(%p)->tlm_refcnt is %d (expected 1)",
1131 func
, line
, log_entry
, refcnt
);
1133 #define tcp_log_entry_refcnt_rem(l) \
1134 _tcp_log_entry_refcnt_rem((l), __func__, __LINE__)
1136 #else /* !TCPLOG_DEBUG_RINGBUF */
1138 #define tcp_log_entry_refcnt_add(l)
1139 #define tcp_log_entry_refcnt_rem(l)
1144 * Cleanup after removing a log entry, but only decrement the count if we
1145 * are running INVARIANTS.
1148 tcp_log_free_log_common(struct tcp_log_mem
*log_entry
, int *count __unused
)
1151 uma_zfree(tcp_log_zone
, log_entry
);
1154 KASSERT(*count
>= 0,
1155 ("%s: count unexpectedly negative", __func__
));
1160 tcp_log_free_entries(struct tcp_log_stailq
*head
, int *count
)
1162 struct tcp_log_mem
*log_entry
;
1164 /* Free the entries. */
1165 while ((log_entry
= STAILQ_FIRST(head
)) != NULL
) {
1166 STAILQ_REMOVE_HEAD(head
, tlm_queue
);
1167 tcp_log_entry_refcnt_rem(log_entry
);
1168 tcp_log_free_log_common(log_entry
, count
);
1172 /* Cleanup after removing a log entry. */
1174 tcp_log_remove_log_cleanup(struct tcpcb
*tp
, struct tcp_log_mem
*log_entry
)
1176 uma_zfree(tcp_log_zone
, log_entry
);
1178 KASSERT(tp
->t_lognum
>= 0,
1179 ("%s: tp->t_lognum unexpectedly negative", __func__
));
1182 /* Remove a log entry from the head of a list. */
1184 tcp_log_remove_log_head(struct tcpcb
*tp
, struct tcp_log_mem
*log_entry
)
1187 KASSERT(log_entry
== STAILQ_FIRST(&tp
->t_logs
),
1188 ("%s: attempt to remove non-HEAD log entry", __func__
));
1189 STAILQ_REMOVE_HEAD(&tp
->t_logs
, tlm_queue
);
1190 tcp_log_entry_refcnt_rem(log_entry
);
1191 tcp_log_remove_log_cleanup(tp
, log_entry
);
1194 #ifdef TCPLOG_DEBUG_RINGBUF
1196 * Initialize the log entry's reference count, which we want to
1197 * survive allocations.
1200 tcp_log_zone_init(void *mem
, int size
, int flags __unused
)
1202 struct tcp_log_mem
*tlm
;
1204 KASSERT(size
>= sizeof(struct tcp_log_mem
),
1205 ("%s: unexpectedly short (%d) allocation", __func__
, size
));
1206 tlm
= (struct tcp_log_mem
*)mem
;
1207 tlm
->tlm_refcnt
= 0;
1212 * Double check that the refcnt is zero on allocation and return.
1215 tcp_log_zone_ctor(void *mem
, int size
, void *args __unused
, int flags __unused
)
1217 struct tcp_log_mem
*tlm
;
1219 KASSERT(size
>= sizeof(struct tcp_log_mem
),
1220 ("%s: unexpectedly short (%d) allocation", __func__
, size
));
1221 tlm
= (struct tcp_log_mem
*)mem
;
1222 if (tlm
->tlm_refcnt
!= 0)
1223 panic("%s:%d: tlm(%p)->tlm_refcnt is %d (expected 0)",
1224 __func__
, __LINE__
, tlm
, tlm
->tlm_refcnt
);
1229 tcp_log_zone_dtor(void *mem
, int size
, void *args __unused
)
1231 struct tcp_log_mem
*tlm
;
1233 KASSERT(size
>= sizeof(struct tcp_log_mem
),
1234 ("%s: unexpectedly short (%d) allocation", __func__
, size
));
1235 tlm
= (struct tcp_log_mem
*)mem
;
1236 if (tlm
->tlm_refcnt
!= 0)
1237 panic("%s:%d: tlm(%p)->tlm_refcnt is %d (expected 0)",
1238 __func__
, __LINE__
, tlm
, tlm
->tlm_refcnt
);
1240 #endif /* TCPLOG_DEBUG_RINGBUF */
1242 /* Do global initialization. */
1247 tcp_log_zone
= uma_zcreate("tcp_log", sizeof(struct tcp_log_mem
),
1248 #ifdef TCPLOG_DEBUG_RINGBUF
1249 tcp_log_zone_ctor
, tcp_log_zone_dtor
, tcp_log_zone_init
,
1253 NULL
, UMA_ALIGN_PTR
, 0);
1254 (void)uma_zone_set_max(tcp_log_zone
, TCP_LOG_BUF_DEFAULT_GLOBAL_LIMIT
);
1255 tcp_log_id_bucket_zone
= uma_zcreate("tcp_log_id_bucket",
1256 sizeof(struct tcp_log_id_bucket
), NULL
, NULL
, NULL
, NULL
,
1258 tcp_log_id_node_zone
= uma_zcreate("tcp_log_id_node",
1259 sizeof(struct tcp_log_id_node
), NULL
, NULL
, NULL
, NULL
,
1261 #ifdef TCPLOG_DEBUG_COUNTERS
1262 tcp_log_queued
= counter_u64_alloc(M_WAITOK
);
1263 tcp_log_que_fail1
= counter_u64_alloc(M_WAITOK
);
1264 tcp_log_que_fail2
= counter_u64_alloc(M_WAITOK
);
1265 tcp_log_que_fail3
= counter_u64_alloc(M_WAITOK
);
1266 tcp_log_que_fail4
= counter_u64_alloc(M_WAITOK
);
1267 tcp_log_que_fail5
= counter_u64_alloc(M_WAITOK
);
1268 tcp_log_que_copyout
= counter_u64_alloc(M_WAITOK
);
1269 tcp_log_que_read
= counter_u64_alloc(M_WAITOK
);
1270 tcp_log_que_freed
= counter_u64_alloc(M_WAITOK
);
1272 tcp_log_pcb_ids_cur
= counter_u64_alloc(M_WAITOK
);
1273 tcp_log_pcb_ids_tot
= counter_u64_alloc(M_WAITOK
);
1275 rw_init_flags(&tcp_id_tree_lock
, "TCP ID tree", RW_NEW
);
1276 mtx_init(&tcp_log_expireq_mtx
, "TCP log expireq", NULL
, MTX_DEF
);
1277 callout_init(&tcp_log_expireq_callout
, 1);
1280 /* Do per-TCPCB initialization. */
1282 tcp_log_tcpcbinit(struct tcpcb
*tp
)
1285 /* A new TCPCB should start out zero-initialized. */
1286 STAILQ_INIT(&tp
->t_logs
);
1289 * If we are doing auto-capturing, figure out whether we will capture
1292 tp
->t_loglimit
= tcp_log_session_limit
;
1293 if ((tcp_log_auto_all
== true) &&
1294 tcp_log_auto_mode
&&
1295 tcp_log_selectauto()) {
1296 tp
->_t_logstate
= tcp_log_auto_mode
;
1297 tp
->t_flags2
|= TF2_LOG_AUTO
;
1301 /* Remove entries */
1303 tcp_log_expire(void *unused __unused
)
1305 struct tcp_log_id_bucket
*tlb
;
1306 struct tcp_log_id_node
*tln
;
1307 sbintime_t expiry_limit
;
1310 TCPLOG_EXPIREQ_LOCK();
1311 if (callout_pending(&tcp_log_expireq_callout
)) {
1312 /* Callout was reset. */
1313 TCPLOG_EXPIREQ_UNLOCK();
1318 * Process entries until we reach one that expires too far in the
1319 * future. Look one second in the future.
1321 expiry_limit
= getsbinuptime() + SBT_1S
;
1322 tree_locked
= TREE_UNLOCKED
;
1324 while ((tln
= STAILQ_FIRST(&tcp_log_expireq_head
)) != NULL
&&
1325 tln
->tln_expiretime
<= expiry_limit
) {
1326 if (!callout_active(&tcp_log_expireq_callout
)) {
1328 * Callout was stopped. I guess we should
1329 * just quit at this point.
1331 TCPLOG_EXPIREQ_UNLOCK();
1336 * Remove the node from the head of the list and unlock
1337 * the list. Change the expiry time to SBT_MAX as a signal
1338 * to other threads that we now own this.
1340 STAILQ_REMOVE_HEAD(&tcp_log_expireq_head
, tln_expireq
);
1341 tln
->tln_expiretime
= SBT_MAX
;
1342 TCPLOG_EXPIREQ_UNLOCK();
1345 * Remove the node from the bucket.
1347 tlb
= tln
->tln_bucket
;
1348 TCPID_BUCKET_LOCK(tlb
);
1349 if (tcp_log_remove_id_node(NULL
, NULL
, tlb
, tln
, &tree_locked
)) {
1350 tcp_log_id_validate_tree_lock(tree_locked
);
1351 if (tree_locked
== TREE_WLOCKED
)
1352 TCPID_TREE_WUNLOCK();
1354 TCPID_TREE_RUNLOCK();
1355 tree_locked
= TREE_UNLOCKED
;
1358 /* Drop the INP reference. */
1359 INP_WLOCK(tln
->tln_inp
);
1360 if (!in_pcbrele_wlocked(tln
->tln_inp
))
1361 INP_WUNLOCK(tln
->tln_inp
);
1363 /* Free the log records. */
1364 tcp_log_free_entries(&tln
->tln_entries
, &tln
->tln_count
);
1366 /* Free the node. */
1367 uma_zfree(tcp_log_id_node_zone
, tln
);
1369 /* Relock the expiry queue. */
1370 TCPLOG_EXPIREQ_LOCK();
1374 * We've expired all the entries we can. Do we need to reschedule
1377 callout_deactivate(&tcp_log_expireq_callout
);
1380 * Get max(now + TCP_LOG_EXPIRE_INTVL, tln->tln_expiretime) and
1381 * set the next callout to that. (This helps ensure we generally
1382 * run the callout no more often than desired.)
1384 expiry_limit
= getsbinuptime() + TCP_LOG_EXPIRE_INTVL
;
1385 if (expiry_limit
< tln
->tln_expiretime
)
1386 expiry_limit
= tln
->tln_expiretime
;
1387 callout_reset_sbt(&tcp_log_expireq_callout
, expiry_limit
,
1388 SBT_1S
, tcp_log_expire
, NULL
, C_ABSOLUTE
);
1392 TCPLOG_EXPIREQ_UNLOCK();
1397 * Move log data from the TCPCB to a new node. This will reset the TCPCB log
1398 * entries and log count; however, it will not touch other things from the
1399 * TCPCB (e.g. t_lin, t_lib).
1401 * NOTE: Must hold a lock on the INP.
1404 tcp_log_move_tp_to_node(struct tcpcb
*tp
, struct tcp_log_id_node
*tln
)
1406 struct inpcb
*inp
= tptoinpcb(tp
);
1408 INP_WLOCK_ASSERT(inp
);
1410 tln
->tln_ie
= inp
->inp_inc
.inc_ie
;
1411 if (inp
->inp_inc
.inc_flags
& INC_ISIPV6
)
1412 tln
->tln_af
= AF_INET6
;
1414 tln
->tln_af
= AF_INET
;
1415 tln
->tln_entries
= tp
->t_logs
;
1416 tln
->tln_count
= tp
->t_lognum
;
1417 tln
->tln_bucket
= tp
->t_lib
;
1419 /* Clear information from the PCB. */
1420 STAILQ_INIT(&tp
->t_logs
);
1424 /* Do per-TCPCB cleanup */
1426 tcp_log_tcpcbfini(struct tcpcb
*tp
)
1428 struct tcp_log_id_node
*tln
, *tln_first
;
1429 struct tcp_log_mem
*log_entry
;
1430 sbintime_t callouttime
;
1433 INP_WLOCK_ASSERT(tptoinpcb(tp
));
1434 if (tp
->_t_logstate
) {
1435 union tcp_log_stackspecific log
;
1437 #ifdef TCP_ACCOUNTING
1438 struct tcp_log_buffer
*lgb
;
1441 memset(&log
, 0, sizeof(log
));
1442 if (tp
->t_flags2
& TF2_TCP_ACCOUNTING
) {
1443 for (i
= 0; i
< TCP_NUM_CNT_COUNTERS
; i
++) {
1444 log
.u_raw
.u64_flex
[i
] = tp
->tcp_cnt_counters
[i
];
1446 lgb
= tcp_log_event(tp
, NULL
,
1449 TCP_LOG_ACCOUNTING
, 0,
1450 0, &log
, false, NULL
, NULL
, 0, &tv
);
1452 lgb
->tlb_flex1
= TCP_NUM_CNT_COUNTERS
;
1456 for (i
= 0; i
<TCP_NUM_CNT_COUNTERS
; i
++) {
1457 log
.u_raw
.u64_flex
[i
] = tp
->tcp_proc_time
[i
];
1459 lgb
= tcp_log_event(tp
, NULL
,
1462 TCP_LOG_ACCOUNTING
, 0,
1463 0, &log
, false, NULL
, NULL
, 0, &tv
);
1465 lgb
->tlb_flex1
= TCP_NUM_CNT_COUNTERS
;
1471 log
.u_bbr
.timeStamp
= tcp_get_usecs(&tv
);
1472 log
.u_bbr
.cur_del_rate
= tp
->t_end_info
;
1473 (void)tcp_log_event(tp
, NULL
,
1477 0, &log
, false, NULL
, NULL
, 0, &tv
);
1480 * If we were gathering packets to be automatically dumped, try to do
1481 * it now. If this succeeds, the log information in the TCPCB will be
1482 * cleared. Otherwise, we'll handle the log information as we do
1485 switch(tp
->_t_logstate
) {
1486 case TCP_LOG_STATE_HEAD_AUTO
:
1487 (void)tcp_log_dump_tp_logbuf(tp
, "auto-dumped from head",
1490 case TCP_LOG_STATE_TAIL_AUTO
:
1491 (void)tcp_log_dump_tp_logbuf(tp
, "auto-dumped from tail",
1494 case TCP_LOG_VIA_BBPOINTS
:
1495 (void)tcp_log_dump_tp_logbuf(tp
, "auto-dumped from bbpoints",
1498 case TCP_LOG_STATE_CONTINUAL
:
1499 (void)tcp_log_dump_tp_logbuf(tp
, "auto-dumped from continual",
1505 * There are two ways we could keep logs: per-socket or per-ID. If
1506 * we are tracking logs with an ID, then the logs survive the
1507 * destruction of the TCPCB.
1509 * If the TCPCB is associated with an ID node, move the logs from the
1510 * TCPCB to the ID node. In theory, this is safe, for reasons which I
1511 * will now explain for my own benefit when I next need to figure out
1514 * We own the INP lock. Therefore, no one else can change the contents
1515 * of this node (Rule C). Further, no one can remove this node from
1516 * the bucket while we hold the lock (Rule D). Basically, no one can
1517 * mess with this node. That leaves two states in which we could be:
1519 * 1. Another thread is currently waiting to acquire the INP lock, with
1520 * plans to do something with this node. When we drop the INP lock,
1521 * they will have a chance to do that. They will recheck the
1522 * tln_closed field (see note to Rule C) and then acquire the
1523 * bucket lock before proceeding further.
1525 * 2. Another thread will try to acquire a lock at some point in the
1526 * future. If they try to acquire a lock before we set the
1527 * tln_closed field, they will follow state #1. If they try to
1528 * acquire a lock after we set the tln_closed field, they will be
1529 * able to make changes to the node, at will, following Rule C.
1531 * Therefore, we currently own this node and can make any changes
1532 * we want. But, as soon as we set the tln_closed field to true, we
1533 * have effectively dropped our lock on the node. (For this reason, we
1534 * also need to make sure our writes are ordered correctly. An atomic
1535 * operation with "release" semantics should be sufficient.)
1538 if (tp
->t_lin
!= NULL
) {
1539 struct inpcb
*inp
= tptoinpcb(tp
);
1541 /* Copy the relevant information to the log entry. */
1543 KASSERT(tln
->tln_inp
== inp
,
1544 ("%s: Mismatched inp (tln->tln_inp=%p, tp inpcb=%p)",
1545 __func__
, tln
->tln_inp
, inp
));
1546 tcp_log_move_tp_to_node(tp
, tln
);
1548 /* Clear information from the PCB. */
1553 * Take a reference on the INP. This ensures that the INP
1554 * remains valid while the node is on the expiry queue. This
1555 * ensures the INP is valid for other threads that may be
1556 * racing to lock this node when we move it to the expire
1562 * Store the entry on the expiry list. The exact behavior
1563 * depends on whether we have entries to keep. If so, we
1564 * put the entry at the tail of the list and expire in
1565 * TCP_LOG_EXPIRE_TIME. Otherwise, we expire "now" and put
1566 * the entry at the head of the list. (Handling the cleanup
1567 * via the expiry timer lets us avoid locking messy-ness here.)
1569 tln
->tln_expiretime
= getsbinuptime();
1570 TCPLOG_EXPIREQ_LOCK();
1571 if (tln
->tln_count
) {
1572 tln
->tln_expiretime
+= TCP_LOG_EXPIRE_TIME
;
1573 if (STAILQ_EMPTY(&tcp_log_expireq_head
) &&
1574 !callout_active(&tcp_log_expireq_callout
)) {
1576 * We are adding the first entry and a callout
1577 * is not currently scheduled; therefore, we
1578 * need to schedule one.
1580 callout_reset_sbt(&tcp_log_expireq_callout
,
1581 tln
->tln_expiretime
, SBT_1S
, tcp_log_expire
,
1584 STAILQ_INSERT_TAIL(&tcp_log_expireq_head
, tln
,
1587 callouttime
= tln
->tln_expiretime
+
1588 TCP_LOG_EXPIRE_INTVL
;
1589 tln_first
= STAILQ_FIRST(&tcp_log_expireq_head
);
1591 if ((tln_first
== NULL
||
1592 callouttime
< tln_first
->tln_expiretime
) &&
1593 (callout_pending(&tcp_log_expireq_callout
) ||
1594 !callout_active(&tcp_log_expireq_callout
))) {
1596 * The list is empty, or we want to run the
1597 * expire code before the first entry's timer
1598 * fires. Also, we are in a case where a callout
1599 * is not actively running. We want to reset
1600 * the callout to occur sooner.
1602 callout_reset_sbt(&tcp_log_expireq_callout
,
1603 callouttime
, SBT_1S
, tcp_log_expire
, NULL
,
1608 * Insert to the head, or just after the head, as
1609 * appropriate. (This might result in small
1610 * mis-orderings as a bunch of "expire now" entries
1611 * gather at the start of the list, but that should
1612 * not produce big problems, since the expire timer
1613 * will walk through all of them.)
1615 if (tln_first
== NULL
||
1616 tln
->tln_expiretime
< tln_first
->tln_expiretime
)
1617 STAILQ_INSERT_HEAD(&tcp_log_expireq_head
, tln
,
1620 STAILQ_INSERT_AFTER(&tcp_log_expireq_head
,
1621 tln_first
, tln
, tln_expireq
);
1623 TCPLOG_EXPIREQ_UNLOCK();
1626 * We are done messing with the tln. After this point, we
1627 * can't touch it. (Note that the "release" semantics should
1628 * be included with the TCPLOG_EXPIREQ_UNLOCK() call above.
1629 * Therefore, they should be unnecessary here. However, it
1630 * seems like a good idea to include them anyway, since we
1631 * really are releasing a lock here.)
1633 atomic_store_rel_int(&tln
->tln_closed
, 1);
1635 /* Remove log entries. */
1636 while ((log_entry
= STAILQ_FIRST(&tp
->t_logs
)) != NULL
)
1637 tcp_log_remove_log_head(tp
, log_entry
);
1638 KASSERT(tp
->t_lognum
== 0,
1639 ("%s: After freeing entries, tp->t_lognum=%d (expected 0)",
1640 __func__
, tp
->t_lognum
));
1644 * Change the log state to off (just in case anything tries to sneak
1645 * in a last-minute log).
1647 tp
->_t_logstate
= TCP_LOG_STATE_OFF
;
1651 tcp_log_purge_tp_logbuf(struct tcpcb
*tp
)
1653 struct tcp_log_mem
*log_entry
;
1655 INP_WLOCK_ASSERT(tptoinpcb(tp
));
1656 if (tp
->t_lognum
== 0)
1659 while ((log_entry
= STAILQ_FIRST(&tp
->t_logs
)) != NULL
)
1660 tcp_log_remove_log_head(tp
, log_entry
);
1661 KASSERT(tp
->t_lognum
== 0,
1662 ("%s: After freeing entries, tp->t_lognum=%d (expected 0)",
1663 __func__
, tp
->t_lognum
));
1664 tp
->_t_logstate
= TCP_LOG_STATE_OFF
;
1668 * This logs an event for a TCP socket. Normally, this is called via
1669 * TCP_LOG_EVENT or TCP_LOG_EVENT_VERBOSE. See the documentation for
1673 struct tcp_log_buffer
*
1674 tcp_log_event(struct tcpcb
*tp
, struct tcphdr
*th
, struct sockbuf
*rxbuf
,
1675 struct sockbuf
*txbuf
, uint8_t eventid
, int errornum
, uint32_t len
,
1676 union tcp_log_stackspecific
*stackinfo
, int th_hostorder
,
1677 const char *output_caller
, const char *func
, int line
, const struct timeval
*itv
)
1679 struct tcp_log_mem
*log_entry
;
1680 struct tcp_log_buffer
*log_buf
;
1681 int attempt_count
= 0;
1682 struct tcp_log_verbose
*log_verbose
;
1685 KASSERT((func
== NULL
&& line
== 0) || (func
!= NULL
&& line
> 0),
1686 ("%s called with inconsistent func (%p) and line (%d) arguments",
1687 __func__
, func
, line
));
1689 INP_WLOCK_ASSERT(tptoinpcb(tp
));
1690 if (tcp_disable_all_bb_logs
) {
1692 * The global shutdown logging
1693 * switch has been thrown. Call
1694 * the purge function that frees
1695 * purges out the logs and
1696 * turns off logging.
1698 tcp_log_purge_tp_logbuf(tp
);
1701 KASSERT(tp
->_t_logstate
== TCP_LOG_STATE_HEAD
||
1702 tp
->_t_logstate
== TCP_LOG_STATE_TAIL
||
1703 tp
->_t_logstate
== TCP_LOG_STATE_CONTINUAL
||
1704 tp
->_t_logstate
== TCP_LOG_STATE_HEAD_AUTO
||
1705 tp
->_t_logstate
== TCP_LOG_VIA_BBPOINTS
||
1706 tp
->_t_logstate
== TCP_LOG_STATE_TAIL_AUTO
,
1707 ("%s called with unexpected tp->_t_logstate (%d)", __func__
,
1711 * Get the serial number. We do this early so it will
1712 * increment even if we end up skipping the log entry for some
1715 logsn
= tp
->t_logsn
++;
1718 * Can we get a new log entry? If so, increment the lognum counter
1722 if (tp
->t_lognum
< tp
->t_loglimit
) {
1723 if ((log_entry
= uma_zalloc(tcp_log_zone
, M_NOWAIT
)) != NULL
)
1728 /* Do we need to try to reuse? */
1729 if (log_entry
== NULL
) {
1731 * Sacrifice auto-logged sessions without a log ID if
1732 * tcp_log_auto_all is false. (If they don't have a log
1733 * ID by now, it is probable that either they won't get one
1734 * or we are resource-constrained.)
1736 if (tp
->t_lib
== NULL
&& (tp
->t_flags2
& TF2_LOG_AUTO
) &&
1737 !tcp_log_auto_all
) {
1738 if (tcp_log_state_change(tp
, TCP_LOG_STATE_CLEAR
)) {
1740 panic("%s:%d: tcp_log_state_change() failed "
1741 "to set tp %p to TCP_LOG_STATE_CLEAR",
1742 __func__
, __LINE__
, tp
);
1744 tp
->_t_logstate
= TCP_LOG_STATE_OFF
;
1749 * If we are in TCP_LOG_STATE_HEAD_AUTO state, try to dump
1750 * the buffers. If successful, deactivate tracing. Otherwise,
1751 * leave it active so we will retry.
1753 if (tp
->_t_logstate
== TCP_LOG_STATE_HEAD_AUTO
&&
1754 !tcp_log_dump_tp_logbuf(tp
, "auto-dumped from head",
1756 tp
->_t_logstate
= TCP_LOG_STATE_OFF
;
1758 } else if ((tp
->_t_logstate
== TCP_LOG_STATE_CONTINUAL
) &&
1759 !tcp_log_dump_tp_logbuf(tp
, "auto-dumped from continual",
1761 if (attempt_count
== 0) {
1765 #ifdef TCPLOG_DEBUG_COUNTERS
1766 counter_u64_add(tcp_log_que_fail4
, 1);
1770 } else if ((tp
->_t_logstate
== TCP_LOG_VIA_BBPOINTS
) &&
1771 !tcp_log_dump_tp_logbuf(tp
, "auto-dumped from bbpoints",
1773 if (attempt_count
== 0) {
1777 #ifdef TCPLOG_DEBUG_COUNTERS
1778 counter_u64_add(tcp_log_que_fail4
, 1);
1781 } else if (tp
->_t_logstate
== TCP_LOG_STATE_HEAD_AUTO
)
1784 /* If in HEAD state, just deactivate the tracing and return. */
1785 if (tp
->_t_logstate
== TCP_LOG_STATE_HEAD
) {
1786 tp
->_t_logstate
= TCP_LOG_STATE_OFF
;
1790 * Get a buffer to reuse. If that fails, just give up.
1791 * (We can't log anything without a buffer in which to
1794 * Note that we don't change the t_lognum counter
1795 * here. Because we are re-using the buffer, the total
1796 * number won't change.
1798 if ((log_entry
= STAILQ_FIRST(&tp
->t_logs
)) == NULL
)
1800 STAILQ_REMOVE_HEAD(&tp
->t_logs
, tlm_queue
);
1801 tcp_log_entry_refcnt_rem(log_entry
);
1804 KASSERT(log_entry
!= NULL
,
1805 ("%s: log_entry unexpectedly NULL", __func__
));
1807 /* Extract the log buffer and verbose buffer pointers. */
1808 log_buf
= &log_entry
->tlm_buf
;
1809 log_verbose
= &log_entry
->tlm_v
;
1811 /* Basic entries. */
1813 microuptime(&log_buf
->tlb_tv
);
1815 memcpy(&log_buf
->tlb_tv
, itv
, sizeof(struct timeval
));
1816 log_buf
->tlb_ticks
= ticks
;
1817 log_buf
->tlb_sn
= logsn
;
1818 log_buf
->tlb_stackid
= tp
->t_fb
->tfb_id
;
1819 log_buf
->tlb_eventid
= eventid
;
1820 log_buf
->tlb_eventflags
= 0;
1821 log_buf
->tlb_errno
= errornum
;
1823 /* Socket buffers */
1824 if (rxbuf
!= NULL
) {
1825 log_buf
->tlb_eventflags
|= TLB_FLAG_RXBUF
;
1826 log_buf
->tlb_rxbuf
.tls_sb_acc
= rxbuf
->sb_acc
;
1827 log_buf
->tlb_rxbuf
.tls_sb_ccc
= rxbuf
->sb_ccc
;
1828 log_buf
->tlb_rxbuf
.tls_sb_spare
= 0;
1830 log_buf
->tlb_rxbuf
.tls_sb_acc
= 0;
1831 log_buf
->tlb_rxbuf
.tls_sb_ccc
= 0;
1833 if (txbuf
!= NULL
) {
1834 log_buf
->tlb_eventflags
|= TLB_FLAG_TXBUF
;
1835 log_buf
->tlb_txbuf
.tls_sb_acc
= txbuf
->sb_acc
;
1836 log_buf
->tlb_txbuf
.tls_sb_ccc
= txbuf
->sb_ccc
;
1837 log_buf
->tlb_txbuf
.tls_sb_spare
= 0;
1839 log_buf
->tlb_txbuf
.tls_sb_acc
= 0;
1840 log_buf
->tlb_txbuf
.tls_sb_ccc
= 0;
1842 /* Copy values from tp to the log entry. */
1843 #define COPY_STAT(f) log_buf->tlb_ ## f = tp->f
1844 #define COPY_STAT_T(f) log_buf->tlb_ ## f = tp->t_ ## f
1846 COPY_STAT_T(starttime
);
1851 COPY_STAT(snd_cwnd
);
1853 COPY_STAT(snd_recover
);
1855 COPY_STAT(snd_ssthresh
);
1857 COPY_STAT_T(rttvar
);
1862 COPY_STAT_T(dupacks
);
1863 COPY_STAT_T(segqlen
);
1864 COPY_STAT(snd_numholes
);
1865 COPY_STAT(snd_scale
);
1866 COPY_STAT(rcv_scale
);
1867 COPY_STAT_T(flags2
);
1868 COPY_STAT_T(fbyte_in
);
1869 COPY_STAT_T(fbyte_out
);
1872 /* Copy stack-specific info. */
1873 if (stackinfo
!= NULL
) {
1874 memcpy(&log_buf
->tlb_stackinfo
, stackinfo
,
1875 sizeof(log_buf
->tlb_stackinfo
));
1876 log_buf
->tlb_eventflags
|= TLB_FLAG_STACKINFO
;
1880 log_buf
->tlb_len
= len
;
1884 log_buf
->tlb_eventflags
|= TLB_FLAG_HDR
;
1885 log_buf
->tlb_th
= *th
;
1887 tcp_fields_to_net(&log_buf
->tlb_th
);
1888 optlen
= (th
->th_off
<< 2) - sizeof (struct tcphdr
);
1890 memcpy(log_buf
->tlb_opts
, th
+ 1, optlen
);
1892 memset(&log_buf
->tlb_th
, 0, sizeof(*th
));
1895 /* Verbose information */
1897 log_buf
->tlb_eventflags
|= TLB_FLAG_VERBOSE
;
1898 if (output_caller
!= NULL
)
1899 strlcpy(log_verbose
->tlv_snd_frm
, output_caller
,
1902 *log_verbose
->tlv_snd_frm
= 0;
1903 strlcpy(log_verbose
->tlv_trace_func
, func
, TCP_FUNC_LEN
);
1904 log_verbose
->tlv_trace_line
= line
;
1907 /* Insert the new log at the tail. */
1908 STAILQ_INSERT_TAIL(&tp
->t_logs
, log_entry
, tlm_queue
);
1909 tcp_log_entry_refcnt_add(log_entry
);
1914 * Change the logging state for a TCPCB. Returns 0 on success or an
1915 * error code on failure.
1918 tcp_log_state_change(struct tcpcb
*tp
, int state
)
1920 struct tcp_log_mem
*log_entry
;
1923 INP_WLOCK_ASSERT(tptoinpcb(tp
));
1926 case TCP_LOG_STATE_CLEAR
:
1927 while ((log_entry
= STAILQ_FIRST(&tp
->t_logs
)) != NULL
)
1928 tcp_log_remove_log_head(tp
, log_entry
);
1931 case TCP_LOG_STATE_OFF
:
1932 tp
->_t_logstate
= TCP_LOG_STATE_OFF
;
1935 case TCP_LOG_STATE_TAIL
:
1936 case TCP_LOG_STATE_HEAD
:
1937 case TCP_LOG_STATE_CONTINUAL
:
1938 case TCP_LOG_VIA_BBPOINTS
:
1939 case TCP_LOG_STATE_HEAD_AUTO
:
1940 case TCP_LOG_STATE_TAIL_AUTO
:
1942 * When the RATIO_OFF state is set for the bucket, the log ID
1943 * this tp is associated with has been probabilistically opted
1944 * out of logging per tcp_log_apply_ratio().
1946 if (tp
->t_lib
== NULL
||
1947 tp
->t_lib
->tlb_logstate
!= TCP_LOG_STATE_RATIO_OFF
) {
1948 tp
->_t_logstate
= state
;
1951 tp
->_t_logstate
= TCP_LOG_STATE_OFF
;
1958 if (tcp_disable_all_bb_logs
) {
1959 /* We are prohibited from doing any logs */
1960 tp
->_t_logstate
= TCP_LOG_STATE_OFF
;
1963 tp
->t_flags2
&= ~(TF2_LOG_AUTO
);
1968 /* If tcp_drain() is called, flush half the log entries. */
1970 tcp_log_drain(struct tcpcb
*tp
)
1972 struct tcp_log_mem
*log_entry
, *next
;
1975 INP_WLOCK_ASSERT(tptoinpcb(tp
));
1976 if ((target
= tp
->t_lognum
/ 2) == 0)
1980 * XXXRRS: At this I don't think this is wise that
1981 * we do this. All that a drain call means is that
1982 * we are hitting one of the system mbuf limits. BB
1983 * logging, or freeing of them, will not create any
1984 * more mbufs and really has nothing to do with
1985 * the system running out of mbufs. For now I
1986 * am changing this to free any "AUTO" by dumping
1987 * them out. But this should either be changed
1988 * so that it gets called when we hit the BB limit
1989 * or it should just not get called (one of the two)
1990 * since I don't think the mbuf <-> BB log cleanup
1991 * is the right thing to do here.
1994 * If we are logging the "head" packets, we want to discard
1995 * from the tail of the queue. Otherwise, we want to discard
1998 if (tp
->_t_logstate
== TCP_LOG_STATE_HEAD
) {
1999 skip
= tp
->t_lognum
- target
;
2000 STAILQ_FOREACH(log_entry
, &tp
->t_logs
, tlm_queue
)
2003 KASSERT(log_entry
!= NULL
,
2004 ("%s: skipped through all entries!", __func__
));
2005 if (log_entry
== NULL
)
2007 while ((next
= STAILQ_NEXT(log_entry
, tlm_queue
)) != NULL
) {
2008 STAILQ_REMOVE_AFTER(&tp
->t_logs
, log_entry
, tlm_queue
);
2009 tcp_log_entry_refcnt_rem(next
);
2010 tcp_log_remove_log_cleanup(tp
, next
);
2015 KASSERT(target
== 0,
2016 ("%s: After removing from tail, target was %d", __func__
,
2018 } else if (tp
->_t_logstate
== TCP_LOG_STATE_HEAD_AUTO
) {
2019 (void)tcp_log_dump_tp_logbuf(tp
, "auto-dumped from head at drain",
2021 } else if (tp
->_t_logstate
== TCP_LOG_STATE_TAIL_AUTO
) {
2022 (void)tcp_log_dump_tp_logbuf(tp
, "auto-dumped from tail at drain",
2024 } else if (tp
->_t_logstate
== TCP_LOG_VIA_BBPOINTS
) {
2025 (void)tcp_log_dump_tp_logbuf(tp
, "auto-dumped from bbpoints",
2027 } else if (tp
->_t_logstate
== TCP_LOG_STATE_CONTINUAL
) {
2028 (void)tcp_log_dump_tp_logbuf(tp
, "auto-dumped from continual",
2031 while ((log_entry
= STAILQ_FIRST(&tp
->t_logs
)) != NULL
&&
2033 tcp_log_remove_log_head(tp
, log_entry
);
2034 KASSERT(target
<= 0,
2035 ("%s: After removing from head, target was %d", __func__
,
2037 KASSERT(tp
->t_lognum
> 0,
2038 ("%s: After removing from head, tp->t_lognum was %d",
2040 KASSERT(log_entry
!= NULL
,
2041 ("%s: After removing from head, the tailq was empty",
2047 tcp_log_copyout(struct sockopt
*sopt
, void *src
, void *dst
, size_t len
)
2050 if (sopt
->sopt_td
!= NULL
)
2051 return (copyout(src
, dst
, len
));
2052 bcopy(src
, dst
, len
);
2057 tcp_log_logs_to_buf(struct sockopt
*sopt
, struct tcp_log_stailq
*log_tailqp
,
2058 struct tcp_log_buffer
**end
, int count
)
2060 struct tcp_log_buffer
*out_entry
;
2061 struct tcp_log_mem
*log_entry
;
2065 int orig_count
= count
;
2068 /* Copy the data out. */
2070 out_entry
= (struct tcp_log_buffer
*) sopt
->sopt_val
;
2071 STAILQ_FOREACH(log_entry
, log_tailqp
, tlm_queue
) {
2074 ("%s:%d: Exceeded expected count (%d) processing list %p",
2075 __func__
, __LINE__
, orig_count
, log_tailqp
));
2077 #ifdef TCPLOG_DEBUG_COUNTERS
2078 counter_u64_add(tcp_log_que_copyout
, 1);
2082 * Skip copying out the header if it isn't present.
2083 * Instead, copy out zeros (to ensure we don't leak info).
2084 * TODO: Make sure we truly do zero everything we don't
2087 if (log_entry
->tlm_buf
.tlb_eventflags
& TLB_FLAG_HDR
)
2088 entrysize
= sizeof(struct tcp_log_buffer
);
2090 entrysize
= offsetof(struct tcp_log_buffer
, tlb_th
);
2091 error
= tcp_log_copyout(sopt
, &log_entry
->tlm_buf
, out_entry
,
2095 if (!(log_entry
->tlm_buf
.tlb_eventflags
& TLB_FLAG_HDR
)) {
2096 error
= tcp_log_copyout(sopt
, zerobuf
,
2097 ((uint8_t *)out_entry
) + entrysize
,
2098 sizeof(struct tcp_log_buffer
) - entrysize
);
2102 * Copy out the verbose bit, if needed. Either way,
2103 * increment the output pointer the correct amount.
2105 if (log_entry
->tlm_buf
.tlb_eventflags
& TLB_FLAG_VERBOSE
) {
2106 error
= tcp_log_copyout(sopt
, &log_entry
->tlm_v
,
2107 out_entry
->tlb_verbose
,
2108 sizeof(struct tcp_log_verbose
));
2111 out_entry
= (struct tcp_log_buffer
*)
2112 (((uint8_t *) (out_entry
+ 1)) +
2113 sizeof(struct tcp_log_verbose
));
2118 KASSERT(error
|| count
== 0,
2119 ("%s:%d: Less than expected count (%d) processing list %p"
2120 " (%d remain)", __func__
, __LINE__
, orig_count
,
2121 log_tailqp
, count
));
2127 * Copy out the buffer. Note that we do incremental copying, so
2128 * sooptcopyout() won't work. However, the goal is to produce the same
2129 * end result as if we copied in the entire user buffer, updated it,
2130 * and then used sooptcopyout() to copy it out.
2132 * NOTE: This should be called with a write lock on the PCB; however,
2133 * the function will drop it after it extracts the data from the TCPCB.
2136 tcp_log_getlogbuf(struct sockopt
*sopt
, struct tcpcb
*tp
)
2138 struct tcp_log_stailq log_tailq
;
2139 struct tcp_log_mem
*log_entry
, *log_next
;
2140 struct tcp_log_buffer
*out_entry
;
2141 struct inpcb
*inp
= tptoinpcb(tp
);
2142 size_t outsize
, entrysize
;
2145 INP_WLOCK_ASSERT(inp
);
2148 * Determine which log entries will fit in the buffer. As an
2149 * optimization, skip this if all the entries will clearly fit
2150 * in the buffer. (However, get an exact size if we are using
2154 if (sopt
->sopt_valsize
/ (sizeof(struct tcp_log_buffer
) +
2155 sizeof(struct tcp_log_verbose
)) >= tp
->t_lognum
) {
2156 log_entry
= STAILQ_LAST(&tp
->t_logs
, tcp_log_mem
, tlm_queue
);
2159 outnum
= tp
->t_lognum
;
2162 outsize
= outnum
= 0;
2164 STAILQ_FOREACH(log_next
, &tp
->t_logs
, tlm_queue
) {
2165 entrysize
= sizeof(struct tcp_log_buffer
);
2166 if (log_next
->tlm_buf
.tlb_eventflags
&
2168 entrysize
+= sizeof(struct tcp_log_verbose
);
2169 if ((sopt
->sopt_valsize
- outsize
) < entrysize
)
2171 outsize
+= entrysize
;
2173 log_entry
= log_next
;
2175 KASSERT(outsize
<= sopt
->sopt_valsize
,
2176 ("%s: calculated output size (%zu) greater than available"
2177 "space (%zu)", __func__
, outsize
, sopt
->sopt_valsize
));
2183 * Copy traditional sooptcopyout() behavior: if sopt->sopt_val
2184 * is NULL, silently skip the copy. However, in this case, we
2185 * will leave the list alone and return. Functionally, this
2186 * gives userspace a way to poll for an approximate buffer
2187 * size they will need to get the log entries.
2189 if (sopt
->sopt_val
== NULL
) {
2192 outsize
= outnum
* (sizeof(struct tcp_log_buffer
) +
2193 sizeof(struct tcp_log_verbose
));
2195 if (sopt
->sopt_valsize
> outsize
)
2196 sopt
->sopt_valsize
= outsize
;
2201 * Break apart the list. We'll save the ones we want to copy
2202 * out locally and remove them from the TCPCB list. We can
2203 * then drop the INPCB lock while we do the copyout.
2205 * There are roughly three cases:
2206 * 1. There was nothing to copy out. That's easy: drop the
2208 * 2. We are copying out the entire list. Again, that's easy:
2209 * move the whole list.
2210 * 3. We are copying out a partial list. That's harder. We
2211 * need to update the list book-keeping entries.
2213 if (log_entry
!= NULL
&& log_next
== NULL
) {
2214 /* Move entire list. */
2215 KASSERT(outnum
== tp
->t_lognum
,
2216 ("%s:%d: outnum (%d) should match tp->t_lognum (%d)",
2217 __func__
, __LINE__
, outnum
, tp
->t_lognum
));
2218 log_tailq
= tp
->t_logs
;
2220 STAILQ_INIT(&tp
->t_logs
);
2221 } else if (log_entry
!= NULL
) {
2222 /* Move partial list. */
2223 KASSERT(outnum
< tp
->t_lognum
,
2224 ("%s:%d: outnum (%d) not less than tp->t_lognum (%d)",
2225 __func__
, __LINE__
, outnum
, tp
->t_lognum
));
2226 STAILQ_FIRST(&log_tailq
) = STAILQ_FIRST(&tp
->t_logs
);
2227 STAILQ_FIRST(&tp
->t_logs
) = STAILQ_NEXT(log_entry
, tlm_queue
);
2228 KASSERT(STAILQ_NEXT(log_entry
, tlm_queue
) != NULL
,
2229 ("%s:%d: tp->t_logs is unexpectedly shorter than expected"
2230 "(tp: %p, log_tailq: %p, outnum: %d, tp->t_lognum: %d)",
2231 __func__
, __LINE__
, tp
, &log_tailq
, outnum
, tp
->t_lognum
));
2232 STAILQ_NEXT(log_entry
, tlm_queue
) = NULL
;
2233 log_tailq
.stqh_last
= &STAILQ_NEXT(log_entry
, tlm_queue
);
2234 tp
->t_lognum
-= outnum
;
2236 STAILQ_INIT(&log_tailq
);
2238 /* Drop the PCB lock. */
2241 /* Copy the data out. */
2242 error
= tcp_log_logs_to_buf(sopt
, &log_tailq
, &out_entry
, outnum
);
2247 if ((inp
->inp_flags
& INP_DROPPED
) == 0) {
2248 tp
= intotcpcb(inp
);
2250 /* Merge the two lists. */
2251 STAILQ_CONCAT(&log_tailq
, &tp
->t_logs
);
2252 tp
->t_logs
= log_tailq
;
2253 tp
->t_lognum
+= outnum
;
2257 /* Sanity check entries */
2258 KASSERT(((caddr_t
)out_entry
- (caddr_t
)sopt
->sopt_val
) ==
2259 outsize
, ("%s: Actual output size (%zu) != "
2260 "calculated output size (%zu)", __func__
,
2261 (size_t)((caddr_t
)out_entry
- (caddr_t
)sopt
->sopt_val
),
2264 /* Free the entries we just copied out. */
2265 STAILQ_FOREACH_SAFE(log_entry
, &log_tailq
, tlm_queue
, log_next
) {
2266 tcp_log_entry_refcnt_rem(log_entry
);
2267 uma_zfree(tcp_log_zone
, log_entry
);
2271 sopt
->sopt_valsize
= (size_t)((caddr_t
)out_entry
-
2272 (caddr_t
)sopt
->sopt_val
);
2277 tcp_log_free_queue(struct tcp_log_dev_queue
*param
)
2279 struct tcp_log_dev_log_queue
*entry
;
2281 KASSERT(param
!= NULL
, ("%s: called with NULL param", __func__
));
2285 entry
= (struct tcp_log_dev_log_queue
*)param
;
2287 /* Free the entries. */
2288 tcp_log_free_entries(&entry
->tldl_entries
, &entry
->tldl_count
);
2290 /* Free the buffer, if it is allocated. */
2291 if (entry
->tldl_common
.tldq_buf
!= NULL
)
2292 free(entry
->tldl_common
.tldq_buf
, M_TCPLOGDEV
);
2294 /* Free the queue entry. */
2295 free(entry
, M_TCPLOGDEV
);
2298 static struct tcp_log_common_header
*
2299 tcp_log_expandlogbuf(struct tcp_log_dev_queue
*param
)
2301 struct tcp_log_dev_log_queue
*entry
;
2302 struct tcp_log_header
*hdr
;
2304 struct sockopt sopt
;
2307 entry
= (struct tcp_log_dev_log_queue
*)param
;
2309 /* Take a worst-case guess at space needs. */
2310 sopt
.sopt_valsize
= sizeof(struct tcp_log_header
) +
2311 entry
->tldl_count
* (sizeof(struct tcp_log_buffer
) +
2312 sizeof(struct tcp_log_verbose
));
2313 hdr
= malloc(sopt
.sopt_valsize
, M_TCPLOGDEV
, M_NOWAIT
);
2315 #ifdef TCPLOG_DEBUG_COUNTERS
2316 counter_u64_add(tcp_log_que_fail5
, entry
->tldl_count
);
2320 sopt
.sopt_val
= hdr
+ 1;
2321 sopt
.sopt_valsize
-= sizeof(struct tcp_log_header
);
2322 sopt
.sopt_td
= NULL
;
2324 error
= tcp_log_logs_to_buf(&sopt
, &entry
->tldl_entries
,
2325 (struct tcp_log_buffer
**)&end
, entry
->tldl_count
);
2327 free(hdr
, M_TCPLOGDEV
);
2331 /* Free the entries. */
2332 tcp_log_free_entries(&entry
->tldl_entries
, &entry
->tldl_count
);
2333 entry
->tldl_count
= 0;
2335 memset(hdr
, 0, sizeof(struct tcp_log_header
));
2336 hdr
->tlh_version
= TCP_LOG_BUF_VER
;
2337 hdr
->tlh_type
= TCP_LOG_DEV_TYPE_BBR
;
2338 hdr
->tlh_length
= end
- (uint8_t *)hdr
;
2339 hdr
->tlh_ie
= entry
->tldl_ie
;
2340 hdr
->tlh_af
= entry
->tldl_af
;
2341 getboottime(&hdr
->tlh_offset
);
2342 strlcpy(hdr
->tlh_id
, entry
->tldl_id
, TCP_LOG_ID_LEN
);
2343 strlcpy(hdr
->tlh_tag
, entry
->tldl_tag
, TCP_LOG_TAG_LEN
);
2344 strlcpy(hdr
->tlh_reason
, entry
->tldl_reason
, TCP_LOG_REASON_LEN
);
2345 return ((struct tcp_log_common_header
*)hdr
);
2349 * Queue the tcpcb's log buffer for transmission via the log buffer facility.
2351 * NOTE: This should be called with a write lock on the PCB.
2353 * how should be M_WAITOK or M_NOWAIT. If M_WAITOK, the function will drop
2354 * and reacquire the INP lock if it needs to do so.
2356 * If force is false, this will only dump auto-logged sessions if
2357 * tcp_log_auto_all is true or if there is a log ID defined for the session.
2360 tcp_log_dump_tp_logbuf(struct tcpcb
*tp
, char *reason
, int how
, bool force
)
2362 struct tcp_log_dev_log_queue
*entry
;
2363 struct inpcb
*inp
= tptoinpcb(tp
);
2364 #ifdef TCPLOG_DEBUG_COUNTERS
2368 INP_WLOCK_ASSERT(inp
);
2370 /* If there are no log entries, there is nothing to do. */
2371 if (tp
->t_lognum
== 0)
2374 /* Check for a log ID. */
2375 if (tp
->t_lib
== NULL
&& (tp
->t_flags2
& TF2_LOG_AUTO
) &&
2376 !tcp_log_auto_all
&& !force
) {
2377 struct tcp_log_mem
*log_entry
;
2380 * We needed a log ID and none was found. Free the log entries
2381 * and return success. Also, cancel further logging. If the
2382 * session doesn't have a log ID by now, we'll assume it isn't
2385 while ((log_entry
= STAILQ_FIRST(&tp
->t_logs
)) != NULL
)
2386 tcp_log_remove_log_head(tp
, log_entry
);
2387 KASSERT(tp
->t_lognum
== 0,
2388 ("%s: After freeing entries, tp->t_lognum=%d (expected 0)",
2389 __func__
, tp
->t_lognum
));
2390 tp
->_t_logstate
= TCP_LOG_STATE_OFF
;
2395 * Allocate memory. If we must wait, we'll need to drop the locks
2396 * and reacquire them (and do all the related business that goes
2399 entry
= malloc(sizeof(struct tcp_log_dev_log_queue
), M_TCPLOGDEV
,
2401 if (entry
== NULL
&& (how
& M_NOWAIT
)) {
2402 #ifdef TCPLOG_DEBUG_COUNTERS
2403 counter_u64_add(tcp_log_que_fail3
, 1);
2407 if (entry
== NULL
) {
2409 entry
= malloc(sizeof(struct tcp_log_dev_log_queue
),
2410 M_TCPLOGDEV
, M_WAITOK
);
2413 * Note that this check is slightly overly-restrictive in
2414 * that the TCB can survive either of these events.
2415 * However, there is currently not a good way to ensure
2416 * that is the case. So, if we hit this M_WAIT path, we
2417 * may end up dropping some entries. That seems like a
2418 * small price to pay for safety.
2420 if (inp
->inp_flags
& INP_DROPPED
) {
2421 free(entry
, M_TCPLOGDEV
);
2422 #ifdef TCPLOG_DEBUG_COUNTERS
2423 counter_u64_add(tcp_log_que_fail2
, 1);
2425 return (ECONNRESET
);
2427 tp
= intotcpcb(inp
);
2428 if (tp
->t_lognum
== 0) {
2429 free(entry
, M_TCPLOGDEV
);
2434 /* Fill in the unique parts of the queue entry. */
2435 if (tp
->t_lib
!= NULL
) {
2436 strlcpy(entry
->tldl_id
, tp
->t_lib
->tlb_id
, TCP_LOG_ID_LEN
);
2437 strlcpy(entry
->tldl_tag
, tp
->t_lib
->tlb_tag
, TCP_LOG_TAG_LEN
);
2439 strlcpy(entry
->tldl_id
, "UNKNOWN", TCP_LOG_ID_LEN
);
2440 strlcpy(entry
->tldl_tag
, "UNKNOWN", TCP_LOG_TAG_LEN
);
2443 strlcpy(entry
->tldl_reason
, reason
, TCP_LOG_REASON_LEN
);
2445 strlcpy(entry
->tldl_reason
, "UNKNOWN", TCP_LOG_REASON_LEN
);
2446 entry
->tldl_ie
= inp
->inp_inc
.inc_ie
;
2447 if (inp
->inp_inc
.inc_flags
& INC_ISIPV6
)
2448 entry
->tldl_af
= AF_INET6
;
2450 entry
->tldl_af
= AF_INET
;
2451 entry
->tldl_entries
= tp
->t_logs
;
2452 entry
->tldl_count
= tp
->t_lognum
;
2454 /* Fill in the common parts of the queue entry. */
2455 entry
->tldl_common
.tldq_buf
= NULL
;
2456 entry
->tldl_common
.tldq_xform
= tcp_log_expandlogbuf
;
2457 entry
->tldl_common
.tldq_dtor
= tcp_log_free_queue
;
2459 /* Clear the log data from the TCPCB. */
2460 #ifdef TCPLOG_DEBUG_COUNTERS
2461 num_entries
= tp
->t_lognum
;
2464 STAILQ_INIT(&tp
->t_logs
);
2466 /* Add the entry. If no one is listening, free the entry. */
2467 if (tcp_log_dev_add_log((struct tcp_log_dev_queue
*)entry
)) {
2468 tcp_log_free_queue((struct tcp_log_dev_queue
*)entry
);
2469 #ifdef TCPLOG_DEBUG_COUNTERS
2470 counter_u64_add(tcp_log_que_fail1
, num_entries
);
2472 counter_u64_add(tcp_log_queued
, num_entries
);
2479 * Queue the log_id_node's log buffers for transmission via the log buffer
2482 * NOTE: This should be called with the bucket locked and referenced.
2484 * how should be M_WAITOK or M_NOWAIT. If M_WAITOK, the function will drop
2485 * and reacquire the bucket lock if it needs to do so. (The caller must
2486 * ensure that the tln is no longer on any lists so no one else will mess
2487 * with this while the lock is dropped!)
2490 tcp_log_dump_node_logbuf(struct tcp_log_id_node
*tln
, char *reason
, int how
)
2492 struct tcp_log_dev_log_queue
*entry
;
2493 struct tcp_log_id_bucket
*tlb
;
2495 tlb
= tln
->tln_bucket
;
2496 TCPID_BUCKET_LOCK_ASSERT(tlb
);
2497 KASSERT(tlb
->tlb_refcnt
> 0,
2498 ("%s:%d: Called with unreferenced bucket (tln=%p, tlb=%p)",
2499 __func__
, __LINE__
, tln
, tlb
));
2500 KASSERT(tln
->tln_closed
,
2501 ("%s:%d: Called for node with tln_closed==false (tln=%p)",
2502 __func__
, __LINE__
, tln
));
2504 /* If there are no log entries, there is nothing to do. */
2505 if (tln
->tln_count
== 0)
2509 * Allocate memory. If we must wait, we'll need to drop the locks
2510 * and reacquire them (and do all the related business that goes
2513 entry
= malloc(sizeof(struct tcp_log_dev_log_queue
), M_TCPLOGDEV
,
2515 if (entry
== NULL
&& (how
& M_NOWAIT
))
2517 if (entry
== NULL
) {
2518 TCPID_BUCKET_UNLOCK(tlb
);
2519 entry
= malloc(sizeof(struct tcp_log_dev_log_queue
),
2520 M_TCPLOGDEV
, M_WAITOK
);
2521 TCPID_BUCKET_LOCK(tlb
);
2524 /* Fill in the common parts of the queue entry.. */
2525 entry
->tldl_common
.tldq_buf
= NULL
;
2526 entry
->tldl_common
.tldq_xform
= tcp_log_expandlogbuf
;
2527 entry
->tldl_common
.tldq_dtor
= tcp_log_free_queue
;
2529 /* Fill in the unique parts of the queue entry. */
2530 strlcpy(entry
->tldl_id
, tlb
->tlb_id
, TCP_LOG_ID_LEN
);
2531 strlcpy(entry
->tldl_tag
, tlb
->tlb_tag
, TCP_LOG_TAG_LEN
);
2533 strlcpy(entry
->tldl_reason
, reason
, TCP_LOG_REASON_LEN
);
2535 strlcpy(entry
->tldl_reason
, "UNKNOWN", TCP_LOG_REASON_LEN
);
2536 entry
->tldl_ie
= tln
->tln_ie
;
2537 entry
->tldl_entries
= tln
->tln_entries
;
2538 entry
->tldl_count
= tln
->tln_count
;
2539 entry
->tldl_af
= tln
->tln_af
;
2541 /* Add the entry. If no one is listening, free the entry. */
2542 if (tcp_log_dev_add_log((struct tcp_log_dev_queue
*)entry
))
2543 tcp_log_free_queue((struct tcp_log_dev_queue
*)entry
);
2549 * Queue the log buffers for all sessions in a bucket for transmissions via
2550 * the log buffer facility.
2552 * NOTE: This should be called with a locked bucket; however, the function
2553 * will drop the lock.
2555 #define LOCAL_SAVE 10
2557 tcp_log_dumpbucketlogs(struct tcp_log_id_bucket
*tlb
, char *reason
)
2559 struct tcp_log_id_node local_entries
[LOCAL_SAVE
];
2562 struct tcp_log_id_node
*cur_tln
, *prev_tln
, *tmp_tln
;
2563 int i
, num_local_entries
, tree_locked
;
2564 bool expireq_locked
;
2566 TCPID_BUCKET_LOCK_ASSERT(tlb
);
2569 * Take a reference on the bucket to keep it from disappearing until
2572 TCPID_BUCKET_REF(tlb
);
2575 * We'll try to create these without dropping locks. However, we
2576 * might very well need to drop locks to get memory. If that's the
2577 * case, we'll save up to 10 on the stack, and sacrifice the rest.
2578 * (Otherwise, we need to worry about finding our place again in a
2579 * potentially changed list. It just doesn't seem worth the trouble
2582 expireq_locked
= false;
2583 num_local_entries
= 0;
2585 tree_locked
= TREE_UNLOCKED
;
2586 SLIST_FOREACH_SAFE(cur_tln
, &tlb
->tlb_head
, tln_list
, tmp_tln
) {
2588 * If this isn't associated with a TCPCB, we can pull it off
2589 * the list now. We need to be careful that the expire timer
2590 * hasn't already taken ownership (tln_expiretime == SBT_MAX).
2591 * If so, we let the expire timer code free the data.
2593 if (cur_tln
->tln_closed
) {
2596 * Get the expireq lock so we can get a consistent
2597 * read of tln_expiretime and so we can remove this
2600 if (!expireq_locked
) {
2601 TCPLOG_EXPIREQ_LOCK();
2602 expireq_locked
= true;
2606 * We ignore entries with tln_expiretime == SBT_MAX.
2607 * The expire timer code already owns those.
2609 KASSERT(cur_tln
->tln_expiretime
> (sbintime_t
) 0,
2610 ("%s:%d: node on the expire queue without positive "
2611 "expire time", __func__
, __LINE__
));
2612 if (cur_tln
->tln_expiretime
== SBT_MAX
) {
2617 /* Remove the entry from the expireq. */
2618 STAILQ_REMOVE(&tcp_log_expireq_head
, cur_tln
,
2619 tcp_log_id_node
, tln_expireq
);
2621 /* Remove the entry from the bucket. */
2622 if (prev_tln
!= NULL
)
2623 SLIST_REMOVE_AFTER(prev_tln
, tln_list
);
2625 SLIST_REMOVE_HEAD(&tlb
->tlb_head
, tln_list
);
2628 * Drop the INP and bucket reference counts. Due to
2629 * lock-ordering rules, we need to drop the expire
2632 TCPLOG_EXPIREQ_UNLOCK();
2633 expireq_locked
= false;
2635 /* Drop the INP reference. */
2636 INP_WLOCK(cur_tln
->tln_inp
);
2637 if (!in_pcbrele_wlocked(cur_tln
->tln_inp
))
2638 INP_WUNLOCK(cur_tln
->tln_inp
);
2640 if (tcp_log_unref_bucket(tlb
, &tree_locked
, NULL
)) {
2642 panic("%s: Bucket refcount unexpectedly 0.",
2646 * Recover as best we can: free the entry we
2649 tcp_log_free_entries(&cur_tln
->tln_entries
,
2650 &cur_tln
->tln_count
);
2651 uma_zfree(tcp_log_id_node_zone
, cur_tln
);
2655 if (tcp_log_dump_node_logbuf(cur_tln
, reason
,
2658 * If we have sapce, save the entries locally.
2659 * Otherwise, free them.
2661 if (num_local_entries
< LOCAL_SAVE
) {
2662 local_entries
[num_local_entries
] =
2664 num_local_entries
++;
2666 tcp_log_free_entries(
2667 &cur_tln
->tln_entries
,
2668 &cur_tln
->tln_count
);
2672 /* No matter what, we are done with the node now. */
2673 uma_zfree(tcp_log_id_node_zone
, cur_tln
);
2676 * Because we removed this entry from the list, prev_tln
2677 * (which tracks the previous entry still on the tlb
2678 * list) remains unchanged.
2684 * If we get to this point, the session data is still held in
2685 * the TCPCB. So, we need to pull the data out of that.
2687 * We will need to drop the expireq lock so we can lock the INP.
2688 * We can then try to extract the data the "easy" way. If that
2689 * fails, we'll save the log entries for later.
2691 if (expireq_locked
) {
2692 TCPLOG_EXPIREQ_UNLOCK();
2693 expireq_locked
= false;
2696 /* Lock the INP and then re-check the state. */
2697 inp
= cur_tln
->tln_inp
;
2700 * If we caught this while it was transitioning, the data
2701 * might have moved from the TCPCB to the tln (signified by
2702 * setting tln_closed to true. If so, treat this like an
2703 * inactive connection.
2705 if (cur_tln
->tln_closed
) {
2707 * It looks like we may have caught this connection
2708 * while it was transitioning from active to inactive.
2709 * Treat this like an inactive connection.
2716 * Try to dump the data from the tp without dropping the lock.
2717 * If this fails, try to save off the data locally.
2719 tp
= cur_tln
->tln_tp
;
2720 if (tcp_log_dump_tp_logbuf(tp
, reason
, M_NOWAIT
, true) &&
2721 num_local_entries
< LOCAL_SAVE
) {
2722 tcp_log_move_tp_to_node(tp
,
2723 &local_entries
[num_local_entries
]);
2724 local_entries
[num_local_entries
].tln_closed
= 1;
2725 KASSERT(local_entries
[num_local_entries
].tln_bucket
==
2726 tlb
, ("%s: %d: bucket mismatch for node %p",
2727 __func__
, __LINE__
, cur_tln
));
2728 num_local_entries
++;
2734 * We are goint to leave the current tln on the list. It will
2735 * become the previous tln.
2740 /* Drop our locks, if any. */
2741 KASSERT(tree_locked
== TREE_UNLOCKED
,
2742 ("%s: %d: tree unexpectedly locked", __func__
, __LINE__
));
2743 switch (tree_locked
) {
2745 TCPID_TREE_WUNLOCK();
2746 tree_locked
= TREE_UNLOCKED
;
2749 TCPID_TREE_RUNLOCK();
2750 tree_locked
= TREE_UNLOCKED
;
2753 if (expireq_locked
) {
2754 TCPLOG_EXPIREQ_UNLOCK();
2755 expireq_locked
= false;
2759 * Try again for any saved entries. tcp_log_dump_node_logbuf() is
2760 * guaranteed to free the log entries within the node. And, since
2761 * the node itself is on our stack, we don't need to free it.
2763 for (i
= 0; i
< num_local_entries
; i
++)
2764 tcp_log_dump_node_logbuf(&local_entries
[i
], reason
, M_WAITOK
);
2766 /* Drop our reference. */
2767 if (!tcp_log_unref_bucket(tlb
, &tree_locked
, NULL
))
2768 TCPID_BUCKET_UNLOCK(tlb
);
2771 /* Drop our locks, if any. */
2772 switch (tree_locked
) {
2774 TCPID_TREE_WUNLOCK();
2777 TCPID_TREE_RUNLOCK();
2781 TCPLOG_EXPIREQ_UNLOCK();
2786 * Queue the log buffers for all sessions in a bucket for transmissions via
2787 * the log buffer facility.
2789 * NOTE: This should be called with a locked INP; however, the function
2790 * will drop the lock.
2793 tcp_log_dump_tp_bucket_logbufs(struct tcpcb
*tp
, char *reason
)
2795 struct inpcb
*inp
= tptoinpcb(tp
);
2796 struct tcp_log_id_bucket
*tlb
;
2799 /* Figure out our bucket and lock it. */
2800 INP_WLOCK_ASSERT(inp
);
2804 * No bucket; treat this like a request to dump a single
2807 (void)tcp_log_dump_tp_logbuf(tp
, reason
, M_WAITOK
, true);
2811 TCPID_BUCKET_REF(tlb
);
2813 TCPID_BUCKET_LOCK(tlb
);
2815 /* If we are the last reference, we have nothing more to do here. */
2816 tree_locked
= TREE_UNLOCKED
;
2817 if (tcp_log_unref_bucket(tlb
, &tree_locked
, NULL
)) {
2818 switch (tree_locked
) {
2820 TCPID_TREE_WUNLOCK();
2823 TCPID_TREE_RUNLOCK();
2829 /* Turn this over to tcp_log_dumpbucketlogs() to finish the work. */
2830 tcp_log_dumpbucketlogs(tlb
, reason
);
2834 * Mark the end of a flow with the current stack. A stack can add
2835 * stack-specific info to this trace event by overriding this
2836 * function (see bbr_log_flowend() for example).
2839 tcp_log_flowend(struct tcpcb
*tp
)
2841 if (tp
->_t_logstate
!= TCP_LOG_STATE_OFF
) {
2842 struct socket
*so
= tptosocket(tp
);
2843 TCP_LOG_EVENT(tp
, NULL
, &so
->so_rcv
, &so
->so_snd
,
2844 TCP_LOG_FLOWEND
, 0, 0, NULL
, false);
2849 tcp_log_sendfile(struct socket
*so
, off_t offset
, size_t nbytes
, int flags
)
2853 #ifdef TCP_REQUEST_TRK
2854 struct tcp_sendfile_track
*ent
;
2858 inp
= sotoinpcb(so
);
2859 KASSERT(inp
!= NULL
, ("tcp_log_sendfile: inp == NULL"));
2861 /* quick check to see if logging is enabled for this connection */
2862 tp
= intotcpcb(inp
);
2863 if ((inp
->inp_flags
& INP_DROPPED
) ||
2864 (tp
->_t_logstate
== TCP_LOG_STATE_OFF
)) {
2869 /* double check log state now that we have the lock */
2870 if (inp
->inp_flags
& INP_DROPPED
)
2872 if (tp
->_t_logstate
!= TCP_LOG_STATE_OFF
) {
2874 tcp_log_eventspecific_t log
;
2877 log
.u_sf
.offset
= offset
;
2878 log
.u_sf
.length
= nbytes
;
2879 log
.u_sf
.flags
= flags
;
2881 TCP_LOG_EVENTP(tp
, NULL
,
2882 &tptosocket(tp
)->so_rcv
,
2883 &tptosocket(tp
)->so_snd
,
2884 TCP_LOG_SENDFILE
, 0, 0, &log
, false, &tv
);
2886 #ifdef TCP_REQUEST_TRK
2887 if (tp
->t_tcpreq_req
== 0) {
2888 /* No http requests to track */
2892 if (tp
->t_tcpreq_closed
== 0) {
2893 /* No closed end req to track */
2894 goto skip_closed_req
;
2896 for(i
= 0; i
< MAX_TCP_TRK_REQ
; i
++) {
2897 /* Lets see if this one can be found */
2898 ent
= &tp
->t_tcpreq_info
[i
];
2899 if (ent
->flags
== TCP_TRK_TRACK_FLG_EMPTY
) {
2903 if (ent
->flags
& TCP_TRK_TRACK_FLG_OPEN
) {
2904 /* This pass does not consider open requests */
2907 if (ent
->flags
& TCP_TRK_TRACK_FLG_COMP
) {
2908 /* Don't look at what we have completed */
2911 /* If we reach here its a allocated closed end request */
2912 if ((ent
->start
== offset
) ||
2913 ((offset
> ent
->start
) && (offset
< ent
->end
))){
2914 /* Its within this request?? */
2919 * It is at or past the end, its complete.
2921 ent
->flags
|= TCP_TRK_TRACK_FLG_SEQV
;
2923 * When an entry completes we can take (snd_una + sb_cc) and know where
2924 * the end of the range really is. Note that this works since two
2925 * requests must be sequential and sendfile now is complete for *this* request.
2926 * we must use sb_ccc since the data may still be in-flight in TLS.
2928 * We always cautiously move the end_seq only if our calculations
2929 * show it happened (just in case sf has the call to here at the wrong
2930 * place). When we go COMP we will stop coming here and hopefully be
2931 * left with the correct end_seq.
2933 if (SEQ_GT((tp
->snd_una
+ so
->so_snd
.sb_ccc
), ent
->end_seq
))
2934 ent
->end_seq
= tp
->snd_una
+ so
->so_snd
.sb_ccc
;
2935 if ((offset
+ nbytes
) >= ent
->end
) {
2936 ent
->flags
|= TCP_TRK_TRACK_FLG_COMP
;
2937 tcp_req_log_req_info(tp
, ent
, i
, TCP_TRK_REQ_LOG_COMPLETE
, offset
, nbytes
);
2939 tcp_req_log_req_info(tp
, ent
, i
, TCP_TRK_REQ_LOG_MOREYET
, offset
, nbytes
);
2941 /* We assume that sendfile never sends overlapping requests */
2947 /* Ok now lets look for open requests */
2948 for(i
= 0; i
< MAX_TCP_TRK_REQ
; i
++) {
2949 ent
= &tp
->t_tcpreq_info
[i
];
2950 if (ent
->flags
== TCP_TRK_TRACK_FLG_EMPTY
) {
2954 if ((ent
->flags
& TCP_TRK_TRACK_FLG_OPEN
) == 0)
2956 /* If we reach here its an allocated open request */
2957 if (ent
->start
== offset
) {
2958 /* It begins this request */
2959 ent
->start_seq
= tp
->snd_una
+
2960 tptosocket(tp
)->so_snd
.sb_ccc
;
2961 ent
->flags
|= TCP_TRK_TRACK_FLG_SEQV
;
2963 } else if (offset
> ent
->start
) {
2964 ent
->flags
|= TCP_TRK_TRACK_FLG_SEQV
;