1 // SPDX-License-Identifier: GPL-2.0
3 /* net/sched/sch_etf.c Earliest TxTime First queueing discipline.
5 * Authors: Jesus Sanchez-Palencia <jesus.sanchez-palencia@intel.com>
6 * Vinicius Costa Gomes <vinicius.gomes@intel.com>
9 #include <linux/module.h>
10 #include <linux/types.h>
11 #include <linux/kernel.h>
12 #include <linux/string.h>
13 #include <linux/errno.h>
14 #include <linux/errqueue.h>
15 #include <linux/rbtree.h>
16 #include <linux/skbuff.h>
17 #include <linux/posix-timers.h>
18 #include <net/netlink.h>
19 #include <net/sch_generic.h>
20 #include <net/pkt_sched.h>
23 #define DEADLINE_MODE_IS_ON(x) ((x)->flags & TC_ETF_DEADLINE_MODE_ON)
24 #define OFFLOAD_IS_ON(x) ((x)->flags & TC_ETF_OFFLOAD_ON)
26 struct etf_sched_data
{
31 s32 delta
; /* in ns */
32 ktime_t last
; /* The txtime of the last skb sent to the netdevice. */
34 struct qdisc_watchdog watchdog
;
35 ktime_t (*get_time
)(void);
38 static const struct nla_policy etf_policy
[TCA_ETF_MAX
+ 1] = {
39 [TCA_ETF_PARMS
] = { .len
= sizeof(struct tc_etf_qopt
) },
42 static inline int validate_input_params(struct tc_etf_qopt
*qopt
,
43 struct netlink_ext_ack
*extack
)
45 /* Check if params comply to the following rules:
46 * * Clockid and delta must be valid.
48 * * Dynamic clockids are not supported.
50 * * Delta must be a positive integer.
52 * Also note that for the HW offload case, we must
53 * expect that system clocks have been synchronized to PHC.
55 if (qopt
->clockid
< 0) {
56 NL_SET_ERR_MSG(extack
, "Dynamic clockids are not supported");
60 if (qopt
->clockid
!= CLOCK_TAI
) {
61 NL_SET_ERR_MSG(extack
, "Invalid clockid. CLOCK_TAI must be used");
65 if (qopt
->delta
< 0) {
66 NL_SET_ERR_MSG(extack
, "Delta must be positive");
73 static bool is_packet_valid(struct Qdisc
*sch
, struct sk_buff
*nskb
)
75 struct etf_sched_data
*q
= qdisc_priv(sch
);
76 ktime_t txtime
= nskb
->tstamp
;
77 struct sock
*sk
= nskb
->sk
;
83 if (!sock_flag(sk
, SOCK_TXTIME
))
86 /* We don't perform crosstimestamping.
87 * Drop if packet's clockid differs from qdisc's.
89 if (sk
->sk_clockid
!= q
->clockid
)
92 if (sk
->sk_txtime_deadline_mode
!= q
->deadline_mode
)
96 if (ktime_before(txtime
, now
) || ktime_before(txtime
, q
->last
))
102 static struct sk_buff
*etf_peek_timesortedlist(struct Qdisc
*sch
)
104 struct etf_sched_data
*q
= qdisc_priv(sch
);
107 p
= rb_first(&q
->head
);
114 static void reset_watchdog(struct Qdisc
*sch
)
116 struct etf_sched_data
*q
= qdisc_priv(sch
);
117 struct sk_buff
*skb
= etf_peek_timesortedlist(sch
);
123 next
= ktime_sub_ns(skb
->tstamp
, q
->delta
);
124 qdisc_watchdog_schedule_ns(&q
->watchdog
, ktime_to_ns(next
));
127 static void report_sock_error(struct sk_buff
*skb
, u32 err
, u8 code
)
129 struct sock_exterr_skb
*serr
;
130 struct sk_buff
*clone
;
131 ktime_t txtime
= skb
->tstamp
;
133 if (!skb
->sk
|| !(skb
->sk
->sk_txtime_report_errors
))
136 clone
= skb_clone(skb
, GFP_ATOMIC
);
140 serr
= SKB_EXT_ERR(clone
);
141 serr
->ee
.ee_errno
= err
;
142 serr
->ee
.ee_origin
= SO_EE_ORIGIN_TXTIME
;
143 serr
->ee
.ee_type
= 0;
144 serr
->ee
.ee_code
= code
;
146 serr
->ee
.ee_data
= (txtime
>> 32); /* high part of tstamp */
147 serr
->ee
.ee_info
= txtime
; /* low part of tstamp */
149 if (sock_queue_err_skb(skb
->sk
, clone
))
153 static int etf_enqueue_timesortedlist(struct sk_buff
*nskb
, struct Qdisc
*sch
,
154 struct sk_buff
**to_free
)
156 struct etf_sched_data
*q
= qdisc_priv(sch
);
157 struct rb_node
**p
= &q
->head
.rb_node
, *parent
= NULL
;
158 ktime_t txtime
= nskb
->tstamp
;
160 if (!is_packet_valid(sch
, nskb
)) {
161 report_sock_error(nskb
, EINVAL
,
162 SO_EE_CODE_TXTIME_INVALID_PARAM
);
163 return qdisc_drop(nskb
, sch
, to_free
);
170 skb
= rb_to_skb(parent
);
171 if (ktime_after(txtime
, skb
->tstamp
))
172 p
= &parent
->rb_right
;
174 p
= &parent
->rb_left
;
176 rb_link_node(&nskb
->rbnode
, parent
, p
);
177 rb_insert_color(&nskb
->rbnode
, &q
->head
);
179 qdisc_qstats_backlog_inc(sch
, nskb
);
182 /* Now we may need to re-arm the qdisc watchdog for the next packet. */
185 return NET_XMIT_SUCCESS
;
188 static void timesortedlist_erase(struct Qdisc
*sch
, struct sk_buff
*skb
,
191 struct etf_sched_data
*q
= qdisc_priv(sch
);
193 rb_erase(&skb
->rbnode
, &q
->head
);
195 /* The rbnode field in the skb re-uses these fields, now that
196 * we are done with the rbnode, reset them.
200 skb
->dev
= qdisc_dev(sch
);
202 qdisc_qstats_backlog_dec(sch
, skb
);
205 struct sk_buff
*to_free
= NULL
;
207 report_sock_error(skb
, ECANCELED
, SO_EE_CODE_TXTIME_MISSED
);
209 qdisc_drop(skb
, sch
, &to_free
);
210 kfree_skb_list(to_free
);
211 qdisc_qstats_overlimit(sch
);
213 qdisc_bstats_update(sch
, skb
);
215 q
->last
= skb
->tstamp
;
221 static struct sk_buff
*etf_dequeue_timesortedlist(struct Qdisc
*sch
)
223 struct etf_sched_data
*q
= qdisc_priv(sch
);
227 skb
= etf_peek_timesortedlist(sch
);
233 /* Drop if packet has expired while in queue. */
234 if (ktime_before(skb
->tstamp
, now
)) {
235 timesortedlist_erase(sch
, skb
, true);
240 /* When in deadline mode, dequeue as soon as possible and change the
241 * txtime from deadline to (now + delta).
243 if (q
->deadline_mode
) {
244 timesortedlist_erase(sch
, skb
, false);
249 next
= ktime_sub_ns(skb
->tstamp
, q
->delta
);
251 /* Dequeue only if now is within the [txtime - delta, txtime] range. */
252 if (ktime_after(now
, next
))
253 timesortedlist_erase(sch
, skb
, false);
258 /* Now we may need to re-arm the qdisc watchdog for the next packet. */
264 static void etf_disable_offload(struct net_device
*dev
,
265 struct etf_sched_data
*q
)
267 struct tc_etf_qopt_offload etf
= { };
268 const struct net_device_ops
*ops
;
274 ops
= dev
->netdev_ops
;
275 if (!ops
->ndo_setup_tc
)
278 etf
.queue
= q
->queue
;
281 err
= ops
->ndo_setup_tc(dev
, TC_SETUP_QDISC_ETF
, &etf
);
283 pr_warn("Couldn't disable ETF offload for queue %d\n",
287 static int etf_enable_offload(struct net_device
*dev
, struct etf_sched_data
*q
,
288 struct netlink_ext_ack
*extack
)
290 const struct net_device_ops
*ops
= dev
->netdev_ops
;
291 struct tc_etf_qopt_offload etf
= { };
297 if (!ops
->ndo_setup_tc
) {
298 NL_SET_ERR_MSG(extack
, "Specified device does not support ETF offload");
302 etf
.queue
= q
->queue
;
305 err
= ops
->ndo_setup_tc(dev
, TC_SETUP_QDISC_ETF
, &etf
);
307 NL_SET_ERR_MSG(extack
, "Specified device failed to setup ETF hardware offload");
314 static int etf_init(struct Qdisc
*sch
, struct nlattr
*opt
,
315 struct netlink_ext_ack
*extack
)
317 struct etf_sched_data
*q
= qdisc_priv(sch
);
318 struct net_device
*dev
= qdisc_dev(sch
);
319 struct nlattr
*tb
[TCA_ETF_MAX
+ 1];
320 struct tc_etf_qopt
*qopt
;
324 NL_SET_ERR_MSG(extack
,
325 "Missing ETF qdisc options which are mandatory");
329 err
= nla_parse_nested(tb
, TCA_ETF_MAX
, opt
, etf_policy
, extack
);
333 if (!tb
[TCA_ETF_PARMS
]) {
334 NL_SET_ERR_MSG(extack
, "Missing mandatory ETF parameters");
338 qopt
= nla_data(tb
[TCA_ETF_PARMS
]);
340 pr_debug("delta %d clockid %d offload %s deadline %s\n",
341 qopt
->delta
, qopt
->clockid
,
342 OFFLOAD_IS_ON(qopt
) ? "on" : "off",
343 DEADLINE_MODE_IS_ON(qopt
) ? "on" : "off");
345 err
= validate_input_params(qopt
, extack
);
349 q
->queue
= sch
->dev_queue
- netdev_get_tx_queue(dev
, 0);
351 if (OFFLOAD_IS_ON(qopt
)) {
352 err
= etf_enable_offload(dev
, q
, extack
);
357 /* Everything went OK, save the parameters used. */
358 q
->delta
= qopt
->delta
;
359 q
->clockid
= qopt
->clockid
;
360 q
->offload
= OFFLOAD_IS_ON(qopt
);
361 q
->deadline_mode
= DEADLINE_MODE_IS_ON(qopt
);
363 switch (q
->clockid
) {
365 q
->get_time
= ktime_get_real
;
367 case CLOCK_MONOTONIC
:
368 q
->get_time
= ktime_get
;
371 q
->get_time
= ktime_get_boottime
;
374 q
->get_time
= ktime_get_clocktai
;
377 NL_SET_ERR_MSG(extack
, "Clockid is not supported");
381 qdisc_watchdog_init_clockid(&q
->watchdog
, sch
, q
->clockid
);
386 static void timesortedlist_clear(struct Qdisc
*sch
)
388 struct etf_sched_data
*q
= qdisc_priv(sch
);
389 struct rb_node
*p
= rb_first(&q
->head
);
392 struct sk_buff
*skb
= rb_to_skb(p
);
396 rb_erase(&skb
->rbnode
, &q
->head
);
397 rtnl_kfree_skbs(skb
, skb
);
402 static void etf_reset(struct Qdisc
*sch
)
404 struct etf_sched_data
*q
= qdisc_priv(sch
);
406 /* Only cancel watchdog if it's been initialized. */
407 if (q
->watchdog
.qdisc
== sch
)
408 qdisc_watchdog_cancel(&q
->watchdog
);
410 /* No matter which mode we are on, it's safe to clear both lists. */
411 timesortedlist_clear(sch
);
412 __qdisc_reset_queue(&sch
->q
);
414 sch
->qstats
.backlog
= 0;
420 static void etf_destroy(struct Qdisc
*sch
)
422 struct etf_sched_data
*q
= qdisc_priv(sch
);
423 struct net_device
*dev
= qdisc_dev(sch
);
425 /* Only cancel watchdog if it's been initialized. */
426 if (q
->watchdog
.qdisc
== sch
)
427 qdisc_watchdog_cancel(&q
->watchdog
);
429 etf_disable_offload(dev
, q
);
432 static int etf_dump(struct Qdisc
*sch
, struct sk_buff
*skb
)
434 struct etf_sched_data
*q
= qdisc_priv(sch
);
435 struct tc_etf_qopt opt
= { };
438 nest
= nla_nest_start(skb
, TCA_OPTIONS
);
440 goto nla_put_failure
;
442 opt
.delta
= q
->delta
;
443 opt
.clockid
= q
->clockid
;
445 opt
.flags
|= TC_ETF_OFFLOAD_ON
;
447 if (q
->deadline_mode
)
448 opt
.flags
|= TC_ETF_DEADLINE_MODE_ON
;
450 if (nla_put(skb
, TCA_ETF_PARMS
, sizeof(opt
), &opt
))
451 goto nla_put_failure
;
453 return nla_nest_end(skb
, nest
);
456 nla_nest_cancel(skb
, nest
);
460 static struct Qdisc_ops etf_qdisc_ops __read_mostly
= {
462 .priv_size
= sizeof(struct etf_sched_data
),
463 .enqueue
= etf_enqueue_timesortedlist
,
464 .dequeue
= etf_dequeue_timesortedlist
,
465 .peek
= etf_peek_timesortedlist
,
468 .destroy
= etf_destroy
,
470 .owner
= THIS_MODULE
,
473 static int __init
etf_module_init(void)
475 return register_qdisc(&etf_qdisc_ops
);
478 static void __exit
etf_module_exit(void)
480 unregister_qdisc(&etf_qdisc_ops
);
482 module_init(etf_module_init
)
483 module_exit(etf_module_exit
)
484 MODULE_LICENSE("GPL");