1 // SPDX-License-Identifier: GPL-2.0
3 /* net/sched/sch_etf.c Earliest TxTime First queueing discipline.
5 * Authors: Jesus Sanchez-Palencia <jesus.sanchez-palencia@intel.com>
6 * Vinicius Costa Gomes <vinicius.gomes@intel.com>
9 #include <linux/module.h>
10 #include <linux/types.h>
11 #include <linux/kernel.h>
12 #include <linux/string.h>
13 #include <linux/errno.h>
14 #include <linux/errqueue.h>
15 #include <linux/rbtree.h>
16 #include <linux/skbuff.h>
17 #include <linux/posix-timers.h>
18 #include <net/netlink.h>
19 #include <net/sch_generic.h>
20 #include <net/pkt_sched.h>
23 #define DEADLINE_MODE_IS_ON(x) ((x)->flags & TC_ETF_DEADLINE_MODE_ON)
24 #define OFFLOAD_IS_ON(x) ((x)->flags & TC_ETF_OFFLOAD_ON)
25 #define SKIP_SOCK_CHECK_IS_SET(x) ((x)->flags & TC_ETF_SKIP_SOCK_CHECK)
27 struct etf_sched_data
{
33 s32 delta
; /* in ns */
34 ktime_t last
; /* The txtime of the last skb sent to the netdevice. */
35 struct rb_root_cached head
;
36 struct qdisc_watchdog watchdog
;
37 ktime_t (*get_time
)(void);
40 static const struct nla_policy etf_policy
[TCA_ETF_MAX
+ 1] = {
41 [TCA_ETF_PARMS
] = { .len
= sizeof(struct tc_etf_qopt
) },
44 static inline int validate_input_params(struct tc_etf_qopt
*qopt
,
45 struct netlink_ext_ack
*extack
)
47 /* Check if params comply to the following rules:
48 * * Clockid and delta must be valid.
50 * * Dynamic clockids are not supported.
52 * * Delta must be a positive integer.
54 * Also note that for the HW offload case, we must
55 * expect that system clocks have been synchronized to PHC.
57 if (qopt
->clockid
< 0) {
58 NL_SET_ERR_MSG(extack
, "Dynamic clockids are not supported");
62 if (qopt
->clockid
!= CLOCK_TAI
) {
63 NL_SET_ERR_MSG(extack
, "Invalid clockid. CLOCK_TAI must be used");
67 if (qopt
->delta
< 0) {
68 NL_SET_ERR_MSG(extack
, "Delta must be positive");
75 static bool is_packet_valid(struct Qdisc
*sch
, struct sk_buff
*nskb
)
77 struct etf_sched_data
*q
= qdisc_priv(sch
);
78 ktime_t txtime
= nskb
->tstamp
;
79 struct sock
*sk
= nskb
->sk
;
82 if (q
->skip_sock_check
)
85 if (!sk
|| !sk_fullsock(sk
))
88 if (!sock_flag(sk
, SOCK_TXTIME
))
91 /* We don't perform crosstimestamping.
92 * Drop if packet's clockid differs from qdisc's.
94 if (sk
->sk_clockid
!= q
->clockid
)
97 if (sk
->sk_txtime_deadline_mode
!= q
->deadline_mode
)
102 if (ktime_before(txtime
, now
) || ktime_before(txtime
, q
->last
))
108 static struct sk_buff
*etf_peek_timesortedlist(struct Qdisc
*sch
)
110 struct etf_sched_data
*q
= qdisc_priv(sch
);
113 p
= rb_first_cached(&q
->head
);
120 static void reset_watchdog(struct Qdisc
*sch
)
122 struct etf_sched_data
*q
= qdisc_priv(sch
);
123 struct sk_buff
*skb
= etf_peek_timesortedlist(sch
);
127 qdisc_watchdog_cancel(&q
->watchdog
);
131 next
= ktime_sub_ns(skb
->tstamp
, q
->delta
);
132 qdisc_watchdog_schedule_ns(&q
->watchdog
, ktime_to_ns(next
));
135 static void report_sock_error(struct sk_buff
*skb
, u32 err
, u8 code
)
137 struct sock_exterr_skb
*serr
;
138 struct sk_buff
*clone
;
139 ktime_t txtime
= skb
->tstamp
;
140 struct sock
*sk
= skb
->sk
;
142 if (!sk
|| !sk_fullsock(sk
) || !(sk
->sk_txtime_report_errors
))
145 clone
= skb_clone(skb
, GFP_ATOMIC
);
149 serr
= SKB_EXT_ERR(clone
);
150 serr
->ee
.ee_errno
= err
;
151 serr
->ee
.ee_origin
= SO_EE_ORIGIN_TXTIME
;
152 serr
->ee
.ee_type
= 0;
153 serr
->ee
.ee_code
= code
;
155 serr
->ee
.ee_data
= (txtime
>> 32); /* high part of tstamp */
156 serr
->ee
.ee_info
= txtime
; /* low part of tstamp */
158 if (sock_queue_err_skb(sk
, clone
))
162 static int etf_enqueue_timesortedlist(struct sk_buff
*nskb
, struct Qdisc
*sch
,
163 struct sk_buff
**to_free
)
165 struct etf_sched_data
*q
= qdisc_priv(sch
);
166 struct rb_node
**p
= &q
->head
.rb_root
.rb_node
, *parent
= NULL
;
167 ktime_t txtime
= nskb
->tstamp
;
168 bool leftmost
= true;
170 if (!is_packet_valid(sch
, nskb
)) {
171 report_sock_error(nskb
, EINVAL
,
172 SO_EE_CODE_TXTIME_INVALID_PARAM
);
173 return qdisc_drop(nskb
, sch
, to_free
);
180 skb
= rb_to_skb(parent
);
181 if (ktime_compare(txtime
, skb
->tstamp
) >= 0) {
182 p
= &parent
->rb_right
;
185 p
= &parent
->rb_left
;
188 rb_link_node(&nskb
->rbnode
, parent
, p
);
189 rb_insert_color_cached(&nskb
->rbnode
, &q
->head
, leftmost
);
191 qdisc_qstats_backlog_inc(sch
, nskb
);
194 /* Now we may need to re-arm the qdisc watchdog for the next packet. */
197 return NET_XMIT_SUCCESS
;
200 static void timesortedlist_drop(struct Qdisc
*sch
, struct sk_buff
*skb
,
203 struct etf_sched_data
*q
= qdisc_priv(sch
);
204 struct sk_buff
*to_free
= NULL
;
205 struct sk_buff
*tmp
= NULL
;
207 skb_rbtree_walk_from_safe(skb
, tmp
) {
208 if (ktime_after(skb
->tstamp
, now
))
211 rb_erase_cached(&skb
->rbnode
, &q
->head
);
213 /* The rbnode field in the skb re-uses these fields, now that
214 * we are done with the rbnode, reset them.
218 skb
->dev
= qdisc_dev(sch
);
220 report_sock_error(skb
, ECANCELED
, SO_EE_CODE_TXTIME_MISSED
);
222 qdisc_qstats_backlog_dec(sch
, skb
);
223 qdisc_drop(skb
, sch
, &to_free
);
224 qdisc_qstats_overlimit(sch
);
228 kfree_skb_list(to_free
);
231 static void timesortedlist_remove(struct Qdisc
*sch
, struct sk_buff
*skb
)
233 struct etf_sched_data
*q
= qdisc_priv(sch
);
235 rb_erase_cached(&skb
->rbnode
, &q
->head
);
237 /* The rbnode field in the skb re-uses these fields, now that
238 * we are done with the rbnode, reset them.
242 skb
->dev
= qdisc_dev(sch
);
244 qdisc_qstats_backlog_dec(sch
, skb
);
246 qdisc_bstats_update(sch
, skb
);
248 q
->last
= skb
->tstamp
;
253 static struct sk_buff
*etf_dequeue_timesortedlist(struct Qdisc
*sch
)
255 struct etf_sched_data
*q
= qdisc_priv(sch
);
259 skb
= etf_peek_timesortedlist(sch
);
265 /* Drop if packet has expired while in queue. */
266 if (ktime_before(skb
->tstamp
, now
)) {
267 timesortedlist_drop(sch
, skb
, now
);
272 /* When in deadline mode, dequeue as soon as possible and change the
273 * txtime from deadline to (now + delta).
275 if (q
->deadline_mode
) {
276 timesortedlist_remove(sch
, skb
);
281 next
= ktime_sub_ns(skb
->tstamp
, q
->delta
);
283 /* Dequeue only if now is within the [txtime - delta, txtime] range. */
284 if (ktime_after(now
, next
))
285 timesortedlist_remove(sch
, skb
);
290 /* Now we may need to re-arm the qdisc watchdog for the next packet. */
296 static void etf_disable_offload(struct net_device
*dev
,
297 struct etf_sched_data
*q
)
299 struct tc_etf_qopt_offload etf
= { };
300 const struct net_device_ops
*ops
;
306 ops
= dev
->netdev_ops
;
307 if (!ops
->ndo_setup_tc
)
310 etf
.queue
= q
->queue
;
313 err
= ops
->ndo_setup_tc(dev
, TC_SETUP_QDISC_ETF
, &etf
);
315 pr_warn("Couldn't disable ETF offload for queue %d\n",
319 static int etf_enable_offload(struct net_device
*dev
, struct etf_sched_data
*q
,
320 struct netlink_ext_ack
*extack
)
322 const struct net_device_ops
*ops
= dev
->netdev_ops
;
323 struct tc_etf_qopt_offload etf
= { };
329 if (!ops
->ndo_setup_tc
) {
330 NL_SET_ERR_MSG(extack
, "Specified device does not support ETF offload");
334 etf
.queue
= q
->queue
;
337 err
= ops
->ndo_setup_tc(dev
, TC_SETUP_QDISC_ETF
, &etf
);
339 NL_SET_ERR_MSG(extack
, "Specified device failed to setup ETF hardware offload");
346 static int etf_init(struct Qdisc
*sch
, struct nlattr
*opt
,
347 struct netlink_ext_ack
*extack
)
349 struct etf_sched_data
*q
= qdisc_priv(sch
);
350 struct net_device
*dev
= qdisc_dev(sch
);
351 struct nlattr
*tb
[TCA_ETF_MAX
+ 1];
352 struct tc_etf_qopt
*qopt
;
356 NL_SET_ERR_MSG(extack
,
357 "Missing ETF qdisc options which are mandatory");
361 err
= nla_parse_nested_deprecated(tb
, TCA_ETF_MAX
, opt
, etf_policy
,
366 if (!tb
[TCA_ETF_PARMS
]) {
367 NL_SET_ERR_MSG(extack
, "Missing mandatory ETF parameters");
371 qopt
= nla_data(tb
[TCA_ETF_PARMS
]);
373 pr_debug("delta %d clockid %d offload %s deadline %s\n",
374 qopt
->delta
, qopt
->clockid
,
375 OFFLOAD_IS_ON(qopt
) ? "on" : "off",
376 DEADLINE_MODE_IS_ON(qopt
) ? "on" : "off");
378 err
= validate_input_params(qopt
, extack
);
382 q
->queue
= sch
->dev_queue
- netdev_get_tx_queue(dev
, 0);
384 if (OFFLOAD_IS_ON(qopt
)) {
385 err
= etf_enable_offload(dev
, q
, extack
);
390 /* Everything went OK, save the parameters used. */
391 q
->delta
= qopt
->delta
;
392 q
->clockid
= qopt
->clockid
;
393 q
->offload
= OFFLOAD_IS_ON(qopt
);
394 q
->deadline_mode
= DEADLINE_MODE_IS_ON(qopt
);
395 q
->skip_sock_check
= SKIP_SOCK_CHECK_IS_SET(qopt
);
397 switch (q
->clockid
) {
399 q
->get_time
= ktime_get_real
;
401 case CLOCK_MONOTONIC
:
402 q
->get_time
= ktime_get
;
405 q
->get_time
= ktime_get_boottime
;
408 q
->get_time
= ktime_get_clocktai
;
411 NL_SET_ERR_MSG(extack
, "Clockid is not supported");
415 qdisc_watchdog_init_clockid(&q
->watchdog
, sch
, q
->clockid
);
420 static void timesortedlist_clear(struct Qdisc
*sch
)
422 struct etf_sched_data
*q
= qdisc_priv(sch
);
423 struct rb_node
*p
= rb_first_cached(&q
->head
);
426 struct sk_buff
*skb
= rb_to_skb(p
);
430 rb_erase_cached(&skb
->rbnode
, &q
->head
);
431 rtnl_kfree_skbs(skb
, skb
);
436 static void etf_reset(struct Qdisc
*sch
)
438 struct etf_sched_data
*q
= qdisc_priv(sch
);
440 /* Only cancel watchdog if it's been initialized. */
441 if (q
->watchdog
.qdisc
== sch
)
442 qdisc_watchdog_cancel(&q
->watchdog
);
444 /* No matter which mode we are on, it's safe to clear both lists. */
445 timesortedlist_clear(sch
);
446 __qdisc_reset_queue(&sch
->q
);
448 sch
->qstats
.backlog
= 0;
454 static void etf_destroy(struct Qdisc
*sch
)
456 struct etf_sched_data
*q
= qdisc_priv(sch
);
457 struct net_device
*dev
= qdisc_dev(sch
);
459 /* Only cancel watchdog if it's been initialized. */
460 if (q
->watchdog
.qdisc
== sch
)
461 qdisc_watchdog_cancel(&q
->watchdog
);
463 etf_disable_offload(dev
, q
);
466 static int etf_dump(struct Qdisc
*sch
, struct sk_buff
*skb
)
468 struct etf_sched_data
*q
= qdisc_priv(sch
);
469 struct tc_etf_qopt opt
= { };
472 nest
= nla_nest_start_noflag(skb
, TCA_OPTIONS
);
474 goto nla_put_failure
;
476 opt
.delta
= q
->delta
;
477 opt
.clockid
= q
->clockid
;
479 opt
.flags
|= TC_ETF_OFFLOAD_ON
;
481 if (q
->deadline_mode
)
482 opt
.flags
|= TC_ETF_DEADLINE_MODE_ON
;
484 if (q
->skip_sock_check
)
485 opt
.flags
|= TC_ETF_SKIP_SOCK_CHECK
;
487 if (nla_put(skb
, TCA_ETF_PARMS
, sizeof(opt
), &opt
))
488 goto nla_put_failure
;
490 return nla_nest_end(skb
, nest
);
493 nla_nest_cancel(skb
, nest
);
497 static struct Qdisc_ops etf_qdisc_ops __read_mostly
= {
499 .priv_size
= sizeof(struct etf_sched_data
),
500 .enqueue
= etf_enqueue_timesortedlist
,
501 .dequeue
= etf_dequeue_timesortedlist
,
502 .peek
= etf_peek_timesortedlist
,
505 .destroy
= etf_destroy
,
507 .owner
= THIS_MODULE
,
510 static int __init
etf_module_init(void)
512 return register_qdisc(&etf_qdisc_ops
);
515 static void __exit
etf_module_exit(void)
517 unregister_qdisc(&etf_qdisc_ops
);
519 module_init(etf_module_init
)
520 module_exit(etf_module_exit
)
521 MODULE_LICENSE("GPL");