4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2016 Joyent, Inc.
24 * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
25 * Copyright (c) 2013 by Delphix. All rights reserved.
27 /* Copyright (c) 1990 Mentat Inc. */
30 #include <inet/tcp_impl.h>
31 #include <sys/multidata.h>
32 #include <sys/sunddi.h>
34 /* Max size IP datagram is 64k - 1 */
35 #define TCP_MSS_MAX_IPV4 (IP_MAXPACKET - (sizeof (ipha_t) + sizeof (tcpha_t)))
36 #define TCP_MSS_MAX_IPV6 (IP_MAXPACKET - (sizeof (ip6_t) + sizeof (tcpha_t)))
38 /* Max of the above */
39 #define TCP_MSS_MAX TCP_MSS_MAX_IPV4
42 * returns the current list of listener limit configuration.
46 tcp_listener_conf_get(netstack_t
*stack
, mod_prop_info_t
*pinfo
,
47 const char *ifname
, void *val
, uint_t psize
, uint_t flags
)
49 tcp_stack_t
*tcps
= stack
->netstack_tcp
;
52 size_t nbytes
= 0, tbytes
= 0;
59 if (flags
& (MOD_PROP_DEFAULT
|MOD_PROP_PERM
|MOD_PROP_POSSIBLE
))
62 mutex_enter(&tcps
->tcps_listener_conf_lock
);
63 for (tl
= list_head(&tcps
->tcps_listener_conf
); tl
!= NULL
;
64 tl
= list_next(&tcps
->tcps_listener_conf
, tl
)) {
66 nbytes
= snprintf(pval
, size
, "%d:%d", tl
->tl_port
,
69 nbytes
= snprintf(pval
, size
, ",%d:%d", tl
->tl_port
,
74 if (tbytes
>= psize
) {
75 /* Buffer overflow, stop copying information */
81 mutex_exit(&tcps
->tcps_listener_conf_lock
);
86 * add a new listener limit configuration.
90 tcp_listener_conf_add(netstack_t
*stack
, cred_t
*cr
, mod_prop_info_t
*pinfo
,
91 const char *ifname
, const void* pval
, uint_t flags
)
93 tcp_listener_t
*new_tl
;
98 tcp_stack_t
*tcps
= stack
->netstack_tcp
;
100 if (flags
& MOD_PROP_DEFAULT
)
103 if (ddi_strtol(pval
, &colon
, 10, &lport
) != 0 || lport
<= 0 ||
104 lport
> USHRT_MAX
|| *colon
!= ':') {
107 if (ddi_strtol(colon
+ 1, NULL
, 10, &ratio
) != 0 || ratio
<= 0)
110 mutex_enter(&tcps
->tcps_listener_conf_lock
);
111 for (tl
= list_head(&tcps
->tcps_listener_conf
); tl
!= NULL
;
112 tl
= list_next(&tcps
->tcps_listener_conf
, tl
)) {
113 /* There is an existing entry, so update its ratio value. */
114 if (tl
->tl_port
== lport
) {
115 tl
->tl_ratio
= ratio
;
116 mutex_exit(&tcps
->tcps_listener_conf_lock
);
121 if ((new_tl
= kmem_alloc(sizeof (tcp_listener_t
), KM_NOSLEEP
)) ==
123 mutex_exit(&tcps
->tcps_listener_conf_lock
);
127 new_tl
->tl_port
= lport
;
128 new_tl
->tl_ratio
= ratio
;
129 list_insert_tail(&tcps
->tcps_listener_conf
, new_tl
);
130 mutex_exit(&tcps
->tcps_listener_conf_lock
);
135 * remove a listener limit configuration.
139 tcp_listener_conf_del(netstack_t
*stack
, cred_t
*cr
, mod_prop_info_t
*pinfo
,
140 const char *ifname
, const void* pval
, uint_t flags
)
144 tcp_stack_t
*tcps
= stack
->netstack_tcp
;
146 if (flags
& MOD_PROP_DEFAULT
)
149 if (ddi_strtol(pval
, NULL
, 10, &lport
) != 0 || lport
<= 0 ||
153 mutex_enter(&tcps
->tcps_listener_conf_lock
);
154 for (tl
= list_head(&tcps
->tcps_listener_conf
); tl
!= NULL
;
155 tl
= list_next(&tcps
->tcps_listener_conf
, tl
)) {
156 if (tl
->tl_port
== lport
) {
157 list_remove(&tcps
->tcps_listener_conf
, tl
);
158 mutex_exit(&tcps
->tcps_listener_conf_lock
);
159 kmem_free(tl
, sizeof (tcp_listener_t
));
163 mutex_exit(&tcps
->tcps_listener_conf_lock
);
168 tcp_set_buf_prop(netstack_t
*stack
, cred_t
*cr
, mod_prop_info_t
*pinfo
,
169 const char *ifname
, const void *pval
, uint_t flags
)
171 return (mod_set_buf_prop(stack
->netstack_tcp
->tcps_propinfo_tbl
, stack
,
172 cr
, pinfo
, ifname
, pval
, flags
));
176 tcp_get_buf_prop(netstack_t
*stack
, mod_prop_info_t
*pinfo
, const char *ifname
,
177 void *val
, uint_t psize
, uint_t flags
)
179 return (mod_get_buf_prop(stack
->netstack_tcp
->tcps_propinfo_tbl
, stack
,
180 pinfo
, ifname
, val
, psize
, flags
));
184 * Special checkers for smallest/largest anonymous port so they don't
185 * ever happen to be (largest < smallest).
189 tcp_smallest_anon_set(netstack_t
*stack
, cred_t
*cr
, mod_prop_info_t
*pinfo
,
190 const char *ifname
, const void *pval
, uint_t flags
)
192 unsigned long new_value
;
193 tcp_stack_t
*tcps
= stack
->netstack_tcp
;
196 if ((err
= mod_uint32_value(pval
, pinfo
, flags
, &new_value
)) != 0)
198 /* mod_uint32_value() + pinfo guarantees we're in TCP port range. */
199 if ((uint32_t)new_value
> tcps
->tcps_largest_anon_port
)
201 pinfo
->prop_cur_uval
= (uint32_t)new_value
;
207 tcp_largest_anon_set(netstack_t
*stack
, cred_t
*cr
, mod_prop_info_t
*pinfo
,
208 const char *ifname
, const void *pval
, uint_t flags
)
210 unsigned long new_value
;
211 tcp_stack_t
*tcps
= stack
->netstack_tcp
;
214 if ((err
= mod_uint32_value(pval
, pinfo
, flags
, &new_value
)) != 0)
216 /* mod_uint32_value() + pinfo guarantees we're in TCP port range. */
217 if ((uint32_t)new_value
< tcps
->tcps_smallest_anon_port
)
219 pinfo
->prop_cur_uval
= (uint32_t)new_value
;
224 * All of these are alterable, within the min/max values given, at run time.
226 * Note: All those tunables which do not start with "_" are Committed and
227 * therefore are public. See PSARC 2010/080.
229 mod_prop_info_t tcp_propinfo_tbl
[] = {
231 { "_time_wait_interval", MOD_PROTO_TCP
,
232 mod_set_uint32
, mod_get_uint32
,
233 {1*SECONDS
, TCP_TIME_WAIT_MAX
, 1*MINUTES
}, {1*MINUTES
} },
235 { "_conn_req_max_q", MOD_PROTO_TCP
,
236 mod_set_uint32
, mod_get_uint32
,
237 {1, UINT32_MAX
, 128}, {128} },
239 { "_conn_req_max_q0", MOD_PROTO_TCP
,
240 mod_set_uint32
, mod_get_uint32
,
241 {0, UINT32_MAX
, 1024}, {1024} },
243 { "_conn_req_min", MOD_PROTO_TCP
,
244 mod_set_uint32
, mod_get_uint32
,
247 { "_conn_grace_period", MOD_PROTO_TCP
,
248 mod_set_uint32
, mod_get_uint32
,
249 {0*MS
, 20*SECONDS
, 0*MS
}, {0*MS
} },
251 { "_cwnd_max", MOD_PROTO_TCP
,
252 mod_set_uint32
, mod_get_uint32
,
253 {128, ULP_MAX_BUF
, 1024*1024}, {1024*1024} },
255 { "_debug", MOD_PROTO_TCP
,
256 mod_set_uint32
, mod_get_uint32
,
259 { "smallest_nonpriv_port", MOD_PROTO_TCP
,
260 mod_set_uint32
, mod_get_uint32
,
261 {1024, (32*1024), 1024}, {1024} },
263 { "_ip_abort_cinterval", MOD_PROTO_TCP
,
264 mod_set_uint32
, mod_get_uint32
,
265 {1*SECONDS
, UINT32_MAX
, 3*MINUTES
}, {3*MINUTES
} },
267 { "_ip_abort_linterval", MOD_PROTO_TCP
,
268 mod_set_uint32
, mod_get_uint32
,
269 {1*SECONDS
, UINT32_MAX
, 3*MINUTES
}, {3*MINUTES
} },
272 { "_ip_abort_interval", MOD_PROTO_TCP
,
273 mod_set_uint32
, mod_get_uint32
,
274 {500*MS
, UINT32_MAX
, 5*MINUTES
}, {5*MINUTES
} },
276 { "_ip_notify_cinterval", MOD_PROTO_TCP
,
277 mod_set_uint32
, mod_get_uint32
,
278 {1*SECONDS
, UINT32_MAX
, 10*SECONDS
},
281 { "_ip_notify_interval", MOD_PROTO_TCP
,
282 mod_set_uint32
, mod_get_uint32
,
283 {500*MS
, UINT32_MAX
, 10*SECONDS
}, {10*SECONDS
} },
285 { "_ipv4_ttl", MOD_PROTO_TCP
,
286 mod_set_uint32
, mod_get_uint32
,
287 {1, 255, 64}, {64} },
289 { "_keepalive_interval", MOD_PROTO_TCP
,
290 mod_set_uint32
, mod_get_uint32
,
291 {1*SECONDS
, 10*DAYS
, 2*HOURS
}, {2*HOURS
} },
293 { "_maxpsz_multiplier", MOD_PROTO_TCP
,
294 mod_set_uint32
, mod_get_uint32
,
295 {0, 100, 10}, {10} },
297 { "_mss_def_ipv4", MOD_PROTO_TCP
,
298 mod_set_uint32
, mod_get_uint32
,
299 {1, TCP_MSS_MAX_IPV4
, 536}, {536} },
301 { "_mss_max_ipv4", MOD_PROTO_TCP
,
302 mod_set_uint32
, mod_get_uint32
,
303 {1, TCP_MSS_MAX_IPV4
, TCP_MSS_MAX_IPV4
},
304 {TCP_MSS_MAX_IPV4
} },
306 { "_mss_min", MOD_PROTO_TCP
,
307 mod_set_uint32
, mod_get_uint32
,
308 {1, TCP_MSS_MAX
, 108}, {108} },
310 { "_naglim_def", MOD_PROTO_TCP
,
311 mod_set_uint32
, mod_get_uint32
,
312 {1, (64*1024)-1, (4*1024)-1}, {(4*1024)-1} },
315 { "_rexmit_interval_initial", MOD_PROTO_TCP
,
316 mod_set_uint32
, mod_get_uint32
,
317 {1*MS
, 20*SECONDS
, 1*SECONDS
}, {1*SECONDS
} },
319 { "_rexmit_interval_max", MOD_PROTO_TCP
,
320 mod_set_uint32
, mod_get_uint32
,
321 {1*MS
, 2*HOURS
, 60*SECONDS
}, {60*SECONDS
} },
323 { "_rexmit_interval_min", MOD_PROTO_TCP
,
324 mod_set_uint32
, mod_get_uint32
,
325 {1*MS
, 2*HOURS
, 400*MS
}, {400*MS
} },
327 { "_deferred_ack_interval", MOD_PROTO_TCP
,
328 mod_set_uint32
, mod_get_uint32
,
329 {1*MS
, 1*MINUTES
, 100*MS
}, {100*MS
} },
331 { "_snd_lowat_fraction", MOD_PROTO_TCP
,
332 mod_set_uint32
, mod_get_uint32
,
335 { "dupack_fast_retrans", MOD_PROTO_TCP
,
336 mod_set_uint32
, mod_get_uint32
,
337 {1, 10000, 3}, {3} },
339 { "_ignore_path_mtu", MOD_PROTO_TCP
,
340 mod_set_boolean
, mod_get_boolean
,
341 {B_FALSE
}, {B_FALSE
} },
343 { "smallest_anon_port", MOD_PROTO_TCP
,
344 tcp_smallest_anon_set
, mod_get_uint32
,
345 {1024, ULP_MAX_PORT
, 32*1024}, {32*1024} },
347 { "largest_anon_port", MOD_PROTO_TCP
,
348 tcp_largest_anon_set
, mod_get_uint32
,
349 {1024, ULP_MAX_PORT
, ULP_MAX_PORT
},
352 { "send_buf", MOD_PROTO_TCP
,
353 tcp_set_buf_prop
, tcp_get_buf_prop
,
354 {TCP_XMIT_LOWATER
, ULP_MAX_BUF
, TCP_XMIT_HIWATER
},
355 {TCP_XMIT_HIWATER
} },
358 { "_xmit_lowat", MOD_PROTO_TCP
,
359 mod_set_uint32
, mod_get_uint32
,
360 {TCP_XMIT_LOWATER
, ULP_MAX_BUF
, TCP_XMIT_LOWATER
},
361 {TCP_XMIT_LOWATER
} },
363 { "recv_buf", MOD_PROTO_TCP
,
364 tcp_set_buf_prop
, tcp_get_buf_prop
,
365 {TCP_RECV_LOWATER
, ULP_MAX_BUF
, TCP_RECV_HIWATER
},
366 {TCP_RECV_HIWATER
} },
368 { "_recv_hiwat_minmss", MOD_PROTO_TCP
,
369 mod_set_uint32
, mod_get_uint32
,
370 {1, 65536, 4}, {4} },
372 { "_fin_wait_2_flush_interval", MOD_PROTO_TCP
,
373 mod_set_uint32
, mod_get_uint32
,
374 {1*SECONDS
, 2*HOURS
, 60*SECONDS
},
377 { "max_buf", MOD_PROTO_TCP
,
378 mod_set_uint32
, mod_get_uint32
,
379 {8192, ULP_MAX_BUF
, 1024*1024}, {1024*1024} },
381 { "_rtt_updates", MOD_PROTO_TCP
,
382 mod_set_uint32
, mod_get_uint32
,
383 {0, 65536, 20}, {20} },
385 { "_wscale_always", MOD_PROTO_TCP
,
386 mod_set_boolean
, mod_get_boolean
,
387 {B_TRUE
}, {B_TRUE
} },
389 { "_tstamp_always", MOD_PROTO_TCP
,
390 mod_set_boolean
, mod_get_boolean
,
391 {B_FALSE
}, {B_FALSE
} },
393 { "_tstamp_if_wscale", MOD_PROTO_TCP
,
394 mod_set_boolean
, mod_get_boolean
,
395 {B_TRUE
}, {B_TRUE
} },
398 { "_rexmit_interval_extra", MOD_PROTO_TCP
,
399 mod_set_uint32
, mod_get_uint32
,
400 {0*MS
, 2*HOURS
, 0*MS
}, {0*MS
} },
402 { "_deferred_acks_max", MOD_PROTO_TCP
,
403 mod_set_uint32
, mod_get_uint32
,
406 { "_slow_start_after_idle", MOD_PROTO_TCP
,
407 mod_set_uint32
, mod_get_uint32
,
408 {0, 16384, 0}, {0} },
410 { "_slow_start_initial", MOD_PROTO_TCP
,
411 mod_set_uint32
, mod_get_uint32
,
414 { "sack", MOD_PROTO_TCP
,
415 mod_set_uint32
, mod_get_uint32
,
418 { "_ipv6_hoplimit", MOD_PROTO_TCP
,
419 mod_set_uint32
, mod_get_uint32
,
420 {0, IPV6_MAX_HOPS
, IPV6_DEFAULT_HOPS
},
421 {IPV6_DEFAULT_HOPS
} },
423 { "_mss_def_ipv6", MOD_PROTO_TCP
,
424 mod_set_uint32
, mod_get_uint32
,
425 {1, TCP_MSS_MAX_IPV6
, 1220}, {1220} },
427 { "_mss_max_ipv6", MOD_PROTO_TCP
,
428 mod_set_uint32
, mod_get_uint32
,
429 {1, TCP_MSS_MAX_IPV6
, TCP_MSS_MAX_IPV6
},
430 {TCP_MSS_MAX_IPV6
} },
432 { "_rev_src_routes", MOD_PROTO_TCP
,
433 mod_set_boolean
, mod_get_boolean
,
434 {B_FALSE
}, {B_FALSE
} },
436 { "_local_dack_interval", MOD_PROTO_TCP
,
437 mod_set_uint32
, mod_get_uint32
,
438 {10*MS
, 500*MS
, 50*MS
}, {50*MS
} },
441 { "_local_dacks_max", MOD_PROTO_TCP
,
442 mod_set_uint32
, mod_get_uint32
,
445 { "ecn", MOD_PROTO_TCP
,
446 mod_set_uint32
, mod_get_uint32
,
449 { "_rst_sent_rate_enabled", MOD_PROTO_TCP
,
450 mod_set_boolean
, mod_get_boolean
,
451 {B_TRUE
}, {B_TRUE
} },
453 { "_rst_sent_rate", MOD_PROTO_TCP
,
454 mod_set_uint32
, mod_get_uint32
,
455 {0, UINT32_MAX
, 40}, {40} },
457 { "_push_timer_interval", MOD_PROTO_TCP
,
458 mod_set_uint32
, mod_get_uint32
,
459 {0, 100*MS
, 50*MS
}, {50*MS
} },
461 { "_use_smss_as_mss_opt", MOD_PROTO_TCP
,
462 mod_set_boolean
, mod_get_boolean
,
463 {B_FALSE
}, {B_FALSE
} },
465 { "_keepalive_abort_interval", MOD_PROTO_TCP
,
466 mod_set_uint32
, mod_get_uint32
,
467 {0, UINT32_MAX
, 8*MINUTES
}, {8*MINUTES
} },
470 * tcp_wroff_xtra is the extra space in front of TCP/IP header for link
471 * layer header. It has to be a multiple of 8.
473 { "_wroff_xtra", MOD_PROTO_TCP
,
474 mod_set_aligned
, mod_get_uint32
,
475 {0, 256, 32}, {32} },
477 { "_dev_flow_ctl", MOD_PROTO_TCP
,
478 mod_set_boolean
, mod_get_boolean
,
479 {B_FALSE
}, {B_FALSE
} },
481 { "_reass_timeout", MOD_PROTO_TCP
,
482 mod_set_uint32
, mod_get_uint32
,
483 {0, UINT32_MAX
, 100*SECONDS
}, {100*SECONDS
} },
486 { "extra_priv_ports", MOD_PROTO_TCP
,
487 mod_set_extra_privports
, mod_get_extra_privports
,
488 {1, ULP_MAX_PORT
, 0}, {0} },
490 { "_listener_limit_conf", MOD_PROTO_TCP
,
491 NULL
, tcp_listener_conf_get
, {0}, {0} },
493 { "_listener_limit_conf_add", MOD_PROTO_TCP
,
494 tcp_listener_conf_add
, NULL
, {0}, {0} },
496 { "_listener_limit_conf_del", MOD_PROTO_TCP
,
497 tcp_listener_conf_del
, NULL
, {0}, {0} },
499 { "?", MOD_PROTO_TCP
, NULL
, mod_get_allprop
, {0}, {0} },
501 { NULL
, 0, NULL
, NULL
, {0}, {0} }
504 int tcp_propinfo_count
= A_CNT(tcp_propinfo_tbl
);