1 // SPDX-License-Identifier: GPL-2.0
3 /* net/sched/sch_etf.c Earliest TxTime First queueing discipline.
5 * Authors: Jesus Sanchez-Palencia <jesus.sanchez-palencia@intel.com>
6 * Vinicius Costa Gomes <vinicius.gomes@intel.com>
9 #include <linux/module.h>
10 #include <linux/types.h>
11 #include <linux/kernel.h>
12 #include <linux/string.h>
13 #include <linux/errno.h>
14 #include <linux/errqueue.h>
15 #include <linux/rbtree.h>
16 #include <linux/skbuff.h>
17 #include <linux/posix-timers.h>
18 #include <net/netlink.h>
19 #include <net/sch_generic.h>
20 #include <net/pkt_sched.h>
23 #define DEADLINE_MODE_IS_ON(x) ((x)->flags & TC_ETF_DEADLINE_MODE_ON)
24 #define OFFLOAD_IS_ON(x) ((x)->flags & TC_ETF_OFFLOAD_ON)
26 struct etf_sched_data
{
31 s32 delta
; /* in ns */
32 ktime_t last
; /* The txtime of the last skb sent to the netdevice. */
34 struct qdisc_watchdog watchdog
;
35 ktime_t (*get_time
)(void);
38 static const struct nla_policy etf_policy
[TCA_ETF_MAX
+ 1] = {
39 [TCA_ETF_PARMS
] = { .len
= sizeof(struct tc_etf_qopt
) },
42 static inline int validate_input_params(struct tc_etf_qopt
*qopt
,
43 struct netlink_ext_ack
*extack
)
45 /* Check if params comply to the following rules:
46 * * Clockid and delta must be valid.
48 * * Dynamic clockids are not supported.
50 * * Delta must be a positive integer.
52 * Also note that for the HW offload case, we must
53 * expect that system clocks have been synchronized to PHC.
55 if (qopt
->clockid
< 0) {
56 NL_SET_ERR_MSG(extack
, "Dynamic clockids are not supported");
60 if (qopt
->clockid
!= CLOCK_TAI
) {
61 NL_SET_ERR_MSG(extack
, "Invalid clockid. CLOCK_TAI must be used");
65 if (qopt
->delta
< 0) {
66 NL_SET_ERR_MSG(extack
, "Delta must be positive");
73 static bool is_packet_valid(struct Qdisc
*sch
, struct sk_buff
*nskb
)
75 struct etf_sched_data
*q
= qdisc_priv(sch
);
76 ktime_t txtime
= nskb
->tstamp
;
77 struct sock
*sk
= nskb
->sk
;
80 if (!sk
|| !sk_fullsock(sk
))
83 if (!sock_flag(sk
, SOCK_TXTIME
))
86 /* We don't perform crosstimestamping.
87 * Drop if packet's clockid differs from qdisc's.
89 if (sk
->sk_clockid
!= q
->clockid
)
92 if (sk
->sk_txtime_deadline_mode
!= q
->deadline_mode
)
96 if (ktime_before(txtime
, now
) || ktime_before(txtime
, q
->last
))
102 static struct sk_buff
*etf_peek_timesortedlist(struct Qdisc
*sch
)
104 struct etf_sched_data
*q
= qdisc_priv(sch
);
107 p
= rb_first(&q
->head
);
114 static void reset_watchdog(struct Qdisc
*sch
)
116 struct etf_sched_data
*q
= qdisc_priv(sch
);
117 struct sk_buff
*skb
= etf_peek_timesortedlist(sch
);
123 next
= ktime_sub_ns(skb
->tstamp
, q
->delta
);
124 qdisc_watchdog_schedule_ns(&q
->watchdog
, ktime_to_ns(next
));
127 static void report_sock_error(struct sk_buff
*skb
, u32 err
, u8 code
)
129 struct sock_exterr_skb
*serr
;
130 struct sk_buff
*clone
;
131 ktime_t txtime
= skb
->tstamp
;
132 struct sock
*sk
= skb
->sk
;
134 if (!sk
|| !sk_fullsock(sk
) || !(sk
->sk_txtime_report_errors
))
137 clone
= skb_clone(skb
, GFP_ATOMIC
);
141 serr
= SKB_EXT_ERR(clone
);
142 serr
->ee
.ee_errno
= err
;
143 serr
->ee
.ee_origin
= SO_EE_ORIGIN_TXTIME
;
144 serr
->ee
.ee_type
= 0;
145 serr
->ee
.ee_code
= code
;
147 serr
->ee
.ee_data
= (txtime
>> 32); /* high part of tstamp */
148 serr
->ee
.ee_info
= txtime
; /* low part of tstamp */
150 if (sock_queue_err_skb(sk
, clone
))
154 static int etf_enqueue_timesortedlist(struct sk_buff
*nskb
, struct Qdisc
*sch
,
155 struct sk_buff
**to_free
)
157 struct etf_sched_data
*q
= qdisc_priv(sch
);
158 struct rb_node
**p
= &q
->head
.rb_node
, *parent
= NULL
;
159 ktime_t txtime
= nskb
->tstamp
;
161 if (!is_packet_valid(sch
, nskb
)) {
162 report_sock_error(nskb
, EINVAL
,
163 SO_EE_CODE_TXTIME_INVALID_PARAM
);
164 return qdisc_drop(nskb
, sch
, to_free
);
171 skb
= rb_to_skb(parent
);
172 if (ktime_after(txtime
, skb
->tstamp
))
173 p
= &parent
->rb_right
;
175 p
= &parent
->rb_left
;
177 rb_link_node(&nskb
->rbnode
, parent
, p
);
178 rb_insert_color(&nskb
->rbnode
, &q
->head
);
180 qdisc_qstats_backlog_inc(sch
, nskb
);
183 /* Now we may need to re-arm the qdisc watchdog for the next packet. */
186 return NET_XMIT_SUCCESS
;
189 static void timesortedlist_erase(struct Qdisc
*sch
, struct sk_buff
*skb
,
192 struct etf_sched_data
*q
= qdisc_priv(sch
);
194 rb_erase(&skb
->rbnode
, &q
->head
);
196 /* The rbnode field in the skb re-uses these fields, now that
197 * we are done with the rbnode, reset them.
201 skb
->dev
= qdisc_dev(sch
);
203 qdisc_qstats_backlog_dec(sch
, skb
);
206 struct sk_buff
*to_free
= NULL
;
208 report_sock_error(skb
, ECANCELED
, SO_EE_CODE_TXTIME_MISSED
);
210 qdisc_drop(skb
, sch
, &to_free
);
211 kfree_skb_list(to_free
);
212 qdisc_qstats_overlimit(sch
);
214 qdisc_bstats_update(sch
, skb
);
216 q
->last
= skb
->tstamp
;
222 static struct sk_buff
*etf_dequeue_timesortedlist(struct Qdisc
*sch
)
224 struct etf_sched_data
*q
= qdisc_priv(sch
);
228 skb
= etf_peek_timesortedlist(sch
);
234 /* Drop if packet has expired while in queue. */
235 if (ktime_before(skb
->tstamp
, now
)) {
236 timesortedlist_erase(sch
, skb
, true);
241 /* When in deadline mode, dequeue as soon as possible and change the
242 * txtime from deadline to (now + delta).
244 if (q
->deadline_mode
) {
245 timesortedlist_erase(sch
, skb
, false);
250 next
= ktime_sub_ns(skb
->tstamp
, q
->delta
);
252 /* Dequeue only if now is within the [txtime - delta, txtime] range. */
253 if (ktime_after(now
, next
))
254 timesortedlist_erase(sch
, skb
, false);
259 /* Now we may need to re-arm the qdisc watchdog for the next packet. */
265 static void etf_disable_offload(struct net_device
*dev
,
266 struct etf_sched_data
*q
)
268 struct tc_etf_qopt_offload etf
= { };
269 const struct net_device_ops
*ops
;
275 ops
= dev
->netdev_ops
;
276 if (!ops
->ndo_setup_tc
)
279 etf
.queue
= q
->queue
;
282 err
= ops
->ndo_setup_tc(dev
, TC_SETUP_QDISC_ETF
, &etf
);
284 pr_warn("Couldn't disable ETF offload for queue %d\n",
288 static int etf_enable_offload(struct net_device
*dev
, struct etf_sched_data
*q
,
289 struct netlink_ext_ack
*extack
)
291 const struct net_device_ops
*ops
= dev
->netdev_ops
;
292 struct tc_etf_qopt_offload etf
= { };
298 if (!ops
->ndo_setup_tc
) {
299 NL_SET_ERR_MSG(extack
, "Specified device does not support ETF offload");
303 etf
.queue
= q
->queue
;
306 err
= ops
->ndo_setup_tc(dev
, TC_SETUP_QDISC_ETF
, &etf
);
308 NL_SET_ERR_MSG(extack
, "Specified device failed to setup ETF hardware offload");
315 static int etf_init(struct Qdisc
*sch
, struct nlattr
*opt
,
316 struct netlink_ext_ack
*extack
)
318 struct etf_sched_data
*q
= qdisc_priv(sch
);
319 struct net_device
*dev
= qdisc_dev(sch
);
320 struct nlattr
*tb
[TCA_ETF_MAX
+ 1];
321 struct tc_etf_qopt
*qopt
;
325 NL_SET_ERR_MSG(extack
,
326 "Missing ETF qdisc options which are mandatory");
330 err
= nla_parse_nested(tb
, TCA_ETF_MAX
, opt
, etf_policy
, extack
);
334 if (!tb
[TCA_ETF_PARMS
]) {
335 NL_SET_ERR_MSG(extack
, "Missing mandatory ETF parameters");
339 qopt
= nla_data(tb
[TCA_ETF_PARMS
]);
341 pr_debug("delta %d clockid %d offload %s deadline %s\n",
342 qopt
->delta
, qopt
->clockid
,
343 OFFLOAD_IS_ON(qopt
) ? "on" : "off",
344 DEADLINE_MODE_IS_ON(qopt
) ? "on" : "off");
346 err
= validate_input_params(qopt
, extack
);
350 q
->queue
= sch
->dev_queue
- netdev_get_tx_queue(dev
, 0);
352 if (OFFLOAD_IS_ON(qopt
)) {
353 err
= etf_enable_offload(dev
, q
, extack
);
358 /* Everything went OK, save the parameters used. */
359 q
->delta
= qopt
->delta
;
360 q
->clockid
= qopt
->clockid
;
361 q
->offload
= OFFLOAD_IS_ON(qopt
);
362 q
->deadline_mode
= DEADLINE_MODE_IS_ON(qopt
);
364 switch (q
->clockid
) {
366 q
->get_time
= ktime_get_real
;
368 case CLOCK_MONOTONIC
:
369 q
->get_time
= ktime_get
;
372 q
->get_time
= ktime_get_boottime
;
375 q
->get_time
= ktime_get_clocktai
;
378 NL_SET_ERR_MSG(extack
, "Clockid is not supported");
382 qdisc_watchdog_init_clockid(&q
->watchdog
, sch
, q
->clockid
);
387 static void timesortedlist_clear(struct Qdisc
*sch
)
389 struct etf_sched_data
*q
= qdisc_priv(sch
);
390 struct rb_node
*p
= rb_first(&q
->head
);
393 struct sk_buff
*skb
= rb_to_skb(p
);
397 rb_erase(&skb
->rbnode
, &q
->head
);
398 rtnl_kfree_skbs(skb
, skb
);
403 static void etf_reset(struct Qdisc
*sch
)
405 struct etf_sched_data
*q
= qdisc_priv(sch
);
407 /* Only cancel watchdog if it's been initialized. */
408 if (q
->watchdog
.qdisc
== sch
)
409 qdisc_watchdog_cancel(&q
->watchdog
);
411 /* No matter which mode we are on, it's safe to clear both lists. */
412 timesortedlist_clear(sch
);
413 __qdisc_reset_queue(&sch
->q
);
415 sch
->qstats
.backlog
= 0;
421 static void etf_destroy(struct Qdisc
*sch
)
423 struct etf_sched_data
*q
= qdisc_priv(sch
);
424 struct net_device
*dev
= qdisc_dev(sch
);
426 /* Only cancel watchdog if it's been initialized. */
427 if (q
->watchdog
.qdisc
== sch
)
428 qdisc_watchdog_cancel(&q
->watchdog
);
430 etf_disable_offload(dev
, q
);
433 static int etf_dump(struct Qdisc
*sch
, struct sk_buff
*skb
)
435 struct etf_sched_data
*q
= qdisc_priv(sch
);
436 struct tc_etf_qopt opt
= { };
439 nest
= nla_nest_start(skb
, TCA_OPTIONS
);
441 goto nla_put_failure
;
443 opt
.delta
= q
->delta
;
444 opt
.clockid
= q
->clockid
;
446 opt
.flags
|= TC_ETF_OFFLOAD_ON
;
448 if (q
->deadline_mode
)
449 opt
.flags
|= TC_ETF_DEADLINE_MODE_ON
;
451 if (nla_put(skb
, TCA_ETF_PARMS
, sizeof(opt
), &opt
))
452 goto nla_put_failure
;
454 return nla_nest_end(skb
, nest
);
457 nla_nest_cancel(skb
, nest
);
461 static struct Qdisc_ops etf_qdisc_ops __read_mostly
= {
463 .priv_size
= sizeof(struct etf_sched_data
),
464 .enqueue
= etf_enqueue_timesortedlist
,
465 .dequeue
= etf_dequeue_timesortedlist
,
466 .peek
= etf_peek_timesortedlist
,
469 .destroy
= etf_destroy
,
471 .owner
= THIS_MODULE
,
474 static int __init
etf_module_init(void)
476 return register_qdisc(&etf_qdisc_ops
);
479 static void __exit
etf_module_exit(void)
481 unregister_qdisc(&etf_qdisc_ops
);
483 module_init(etf_module_init
)
484 module_exit(etf_module_exit
)
485 MODULE_LICENSE("GPL");