1 /* net/sched/sch_atm.c - ATM VC selection "queueing discipline" */
3 /* Written 1998,1999 by Werner Almesberger, EPFL ICA */
6 #include <linux/config.h>
7 #include <linux/module.h>
8 #include <linux/skbuff.h>
9 #include <linux/atmdev.h>
10 #include <linux/atmclip.h>
11 #include <linux/netdevice.h>
12 #include <linux/rtnetlink.h>
13 #include <linux/file.h> /* for fput */
14 #include <net/pkt_sched.h>
18 extern struct socket
*sockfd_lookup(int fd
, int *err
); /* @@@ fix this */
19 #define sockfd_put(sock) fput((sock)->file) /* @@@ copied because it's
20 __inline__ in socket.c */
24 #define DPRINTK(format,args...) printk(KERN_DEBUG format,##args)
26 #define DPRINTK(format,args...)
30 #define D2PRINTK(format,args...) printk(KERN_DEBUG format,##args)
32 #define D2PRINTK(format,args...)
37 * The ATM queuing discipline provides a framework for invoking classifiers
38 * (aka "filters"), which in turn select classes of this queuing discipline.
39 * Each class maps the flow(s) it is handling to a given VC. Multiple classes
40 * may share the same VC.
42 * When creating a class, VCs are specified by passing the number of the open
43 * socket descriptor by which the calling process references the VC. The kernel
44 * keeps the VC open at least until all classes using it are removed.
46 * In this file, most functions are named atm_tc_* to avoid confusion with all
47 * the atm_* in net/atm. This naming convention differs from what's used in the
51 * - sometimes messes up the IP stack
52 * - any manipulations besides the few operations described in the README, are
53 * untested and likely to crash the system
54 * - should lock the flow while there is data in the queue (?)
58 #define PRIV(sch) ((struct atm_qdisc_data *) (sch)->data)
61 struct atm_flow_data
{
62 struct Qdisc
*q
; /* FIFO, TBF, etc. */
63 struct tcf_proto
*filter_list
;
64 struct atm_vcc
*vcc
; /* VCC; NULL if VCC is closed */
65 struct socket
*sock
; /* for closing */
66 u32 classid
; /* x:y type ID */
67 int ref
; /* reference count */
68 struct tc_stats stats
;
69 struct atm_flow_data
*next
;
70 struct atm_flow_data
*excess
; /* flow for excess traffic;
71 NULL to set CLP instead */
73 unsigned char hdr
[0]; /* header data; MUST BE LAST */
76 struct atm_qdisc_data
{
77 struct atm_flow_data link
; /* unclassified skbs go here */
78 struct atm_flow_data
*flows
; /* NB: "link" is also on this
83 /* ------------------------- Class/flow operations ------------------------- */
86 static int find_flow(struct atm_qdisc_data
*qdisc
,struct atm_flow_data
*flow
)
88 struct atm_flow_data
*walk
;
90 DPRINTK("find_flow(qdisc %p,flow %p)\n",qdisc
,flow
);
91 for (walk
= qdisc
->flows
; walk
; walk
= walk
->next
)
92 if (walk
== flow
) return 1;
93 DPRINTK("find_flow: not found\n");
98 static __inline__
struct atm_flow_data
*lookup_flow(struct Qdisc
*sch
,
101 struct atm_flow_data
*flow
;
103 for (flow
= PRIV(sch
)->flows
; flow
; flow
= flow
->next
)
104 if (flow
->classid
== classid
) break;
109 static int atm_tc_graft(struct Qdisc
*sch
,unsigned long arg
,
110 struct Qdisc
*new,struct Qdisc
**old
)
112 struct atm_qdisc_data
*p
= PRIV(sch
);
113 struct atm_flow_data
*flow
= (struct atm_flow_data
*) arg
;
115 DPRINTK("atm_tc_graft(sch %p,[qdisc %p],flow %p,new %p,old %p)\n",sch
,
117 if (!find_flow(p
,flow
)) return -EINVAL
;
118 if (!new) new = &noop_qdisc
;
119 *old
= xchg(&flow
->q
,new);
120 if (*old
) qdisc_reset(*old
);
125 static struct Qdisc
*atm_tc_leaf(struct Qdisc
*sch
,unsigned long cl
)
127 struct atm_flow_data
*flow
= (struct atm_flow_data
*) cl
;
129 DPRINTK("atm_tc_leaf(sch %p,flow %p)\n",sch
,flow
);
130 return flow
? flow
->q
: NULL
;
134 static unsigned long atm_tc_get(struct Qdisc
*sch
,u32 classid
)
136 struct atm_qdisc_data
*p
= PRIV(sch
);
137 struct atm_flow_data
*flow
;
139 DPRINTK("atm_tc_get(sch %p,[qdisc %p],classid %x)\n",sch
,p
,classid
);
140 flow
= lookup_flow(sch
,classid
);
141 if (flow
) flow
->ref
++;
142 DPRINTK("atm_tc_get: flow %p\n",flow
);
143 return (unsigned long) flow
;
147 static unsigned long atm_tc_bind_filter(struct Qdisc
*sch
,
148 unsigned long parent
, u32 classid
)
150 return atm_tc_get(sch
,classid
);
155 * atm_tc_put handles all destructions, including the ones that are explicitly
156 * requested (atm_tc_destroy, etc.). The assumption here is that we never drop
157 * anything that still seems to be in use.
160 static void atm_tc_put(struct Qdisc
*sch
, unsigned long cl
)
162 struct atm_qdisc_data
*p
= PRIV(sch
);
163 struct atm_flow_data
*flow
= (struct atm_flow_data
*) cl
;
164 struct atm_flow_data
**prev
;
165 struct tcf_proto
*filter
;
167 DPRINTK("atm_tc_put(sch %p,[qdisc %p],flow %p)\n",sch
,p
,flow
);
168 if (--flow
->ref
) return;
169 DPRINTK("atm_tc_put: destroying\n");
170 for (prev
= &p
->flows
; *prev
; prev
= &(*prev
)->next
)
171 if (*prev
== flow
) break;
173 printk(KERN_CRIT
"atm_tc_put: class %p not found\n",flow
);
177 DPRINTK("atm_tc_put: qdisc %p\n",flow
->q
);
178 qdisc_destroy(flow
->q
);
179 while ((filter
= flow
->filter_list
)) {
180 DPRINTK("atm_tc_put: destroying filter %p\n",filter
);
181 flow
->filter_list
= filter
->next
;
182 DPRINTK("atm_tc_put: filter %p\n",filter
);
183 filter
->ops
->destroy(filter
);
186 DPRINTK("atm_tc_put: f_count %d\n",file_count(flow
->sock
->file
));
187 sockfd_put(flow
->sock
);
189 if (flow
->excess
) atm_tc_put(sch
,(unsigned long) flow
->excess
);
190 if (flow
!= &p
->link
) kfree(flow
);
192 * If flow == &p->link, the qdisc no longer works at this point and
193 * needs to be removed. (By the caller of atm_tc_put.)
198 static int atm_tc_change(struct Qdisc
*sch
, u32 classid
, u32 parent
,
199 struct rtattr
**tca
, unsigned long *arg
)
201 struct atm_qdisc_data
*p
= PRIV(sch
);
202 struct atm_flow_data
*flow
= (struct atm_flow_data
*) *arg
;
203 struct atm_flow_data
*excess
= NULL
;
204 struct rtattr
*opt
= tca
[TCA_OPTIONS
-1];
205 struct rtattr
*tb
[TCA_ATM_MAX
];
207 int fd
,error
,hdr_len
;
210 DPRINTK("atm_tc_change(sch %p,[qdisc %p],classid %x,parent %x,"
211 "flow %p,opt %p)\n",sch
,p
,classid
,parent
,flow
,opt
);
213 * The concept of parents doesn't apply for this qdisc.
215 if (parent
&& parent
!= TC_H_ROOT
&& parent
!= sch
->handle
)
218 * ATM classes cannot be changed. In order to change properties of the
219 * ATM connection, that socket needs to be modified directly (via the
220 * native ATM API. In order to send a flow to a different VC, the old
221 * class needs to be removed and a new one added. (This may be changed
224 if (flow
) return -EBUSY
;
225 if (opt
== NULL
|| rtattr_parse(tb
,TCA_ATM_MAX
,RTA_DATA(opt
),
226 RTA_PAYLOAD(opt
))) return -EINVAL
;
227 if (!tb
[TCA_ATM_FD
-1] || RTA_PAYLOAD(tb
[TCA_ATM_FD
-1]) < sizeof(fd
))
229 fd
= *(int *) RTA_DATA(tb
[TCA_ATM_FD
-1]);
230 DPRINTK("atm_tc_change: fd %d\n",fd
);
231 if (tb
[TCA_ATM_HDR
-1]) {
232 hdr_len
= RTA_PAYLOAD(tb
[TCA_ATM_HDR
-1]);
233 hdr
= RTA_DATA(tb
[TCA_ATM_HDR
-1]);
236 hdr_len
= RFC1483LLC_LEN
;
237 hdr
= NULL
; /* default LLC/SNAP for IP */
239 if (!tb
[TCA_ATM_EXCESS
-1]) excess
= NULL
;
241 if (RTA_PAYLOAD(tb
[TCA_ATM_EXCESS
-1]) != sizeof(u32
))
243 excess
= (struct atm_flow_data
*) atm_tc_get(sch
,
244 *(u32
*) RTA_DATA(tb
[TCA_ATM_EXCESS
-1]));
245 if (!excess
) return -ENOENT
;
247 DPRINTK("atm_tc_change: type %d, payload %d, hdr_len %d\n",
248 opt
->rta_type
,RTA_PAYLOAD(opt
),hdr_len
);
249 if (!(sock
= sockfd_lookup(fd
,&error
))) return error
; /* f_count++ */
250 DPRINTK("atm_tc_change: f_count %d\n",file_count(sock
->file
));
251 if (sock
->ops
->family
!= PF_ATMSVC
&& sock
->ops
->family
!= PF_ATMPVC
) {
255 /* @@@ should check if the socket is really operational or we'll crash
256 on vcc->dev->ops->send */
258 if (TC_H_MAJ(classid
^ sch
->handle
)) {
259 DPRINTK("atm_tc_change: classid mismatch\n");
263 if (find_flow(p
,flow
)) {
272 for (i
= 1; i
< 0x8000; i
++) {
273 classid
= TC_H_MAKE(sch
->handle
,0x8000 | i
);
274 if (!(cl
= atm_tc_get(sch
,classid
))) break;
278 DPRINTK("atm_tc_change: new id %x\n",classid
);
279 flow
= kmalloc(sizeof(struct atm_flow_data
)+hdr_len
,GFP_KERNEL
);
280 DPRINTK("atm_tc_change: flow %p\n",flow
);
285 memset(flow
,0,sizeof(*flow
));
286 flow
->filter_list
= NULL
;
287 if (!(flow
->q
= qdisc_create_dflt(sch
->dev
,&pfifo_qdisc_ops
)))
288 flow
->q
= &noop_qdisc
;
289 DPRINTK("atm_tc_change: qdisc %p\n",flow
->q
);
291 flow
->vcc
= ATM_SD(sock
); /* speedup */
292 DPRINTK("atm_tc_change: vcc %p\n",flow
->vcc
);
293 flow
->classid
= classid
;
295 flow
->excess
= excess
;
296 flow
->next
= p
->link
.next
;
298 flow
->hdr_len
= hdr_len
;
299 if (hdr
) memcpy(flow
->hdr
,hdr
,hdr_len
);
301 memcpy(flow
->hdr
,llc_oui
,sizeof(llc_oui
));
302 ((u16
*) flow
->hdr
)[3] = htons(ETH_P_IP
);
304 *arg
= (unsigned long) flow
;
307 if (excess
) atm_tc_put(sch
,(unsigned long) excess
);
313 static int atm_tc_delete(struct Qdisc
*sch
,unsigned long arg
)
315 struct atm_qdisc_data
*p
= PRIV(sch
);
316 struct atm_flow_data
*flow
= (struct atm_flow_data
*) arg
;
318 DPRINTK("atm_tc_delete(sch %p,[qdisc %p],flow %p)\n",sch
,p
,flow
);
319 if (!find_flow(PRIV(sch
),flow
)) return -EINVAL
;
320 if (flow
->filter_list
|| flow
== &p
->link
) return -EBUSY
;
322 * Reference count must be 2: one for "keepalive" (set at class
323 * creation), and one for the reference held when calling delete.
326 printk(KERN_ERR
"atm_tc_delete: flow->ref == %d\n",flow
->ref
);
329 if (flow
->ref
> 2) return -EBUSY
; /* catch references via excess, etc.*/
335 static void atm_tc_walk(struct Qdisc
*sch
,struct qdisc_walker
*walker
)
337 struct atm_qdisc_data
*p
= PRIV(sch
);
338 struct atm_flow_data
*flow
;
340 DPRINTK("atm_tc_walk(sch %p,[qdisc %p],walker %p)\n",sch
,p
,walker
);
341 if (walker
->stop
) return;
342 for (flow
= p
->flows
; flow
; flow
= flow
->next
) {
343 if (walker
->count
>= walker
->skip
)
344 if (walker
->fn(sch
,(unsigned long) flow
,walker
) < 0) {
353 static struct tcf_proto
**atm_tc_find_tcf(struct Qdisc
*sch
,unsigned long cl
)
355 struct atm_qdisc_data
*p
= PRIV(sch
);
356 struct atm_flow_data
*flow
= (struct atm_flow_data
*) cl
;
358 DPRINTK("atm_tc_find_tcf(sch %p,[qdisc %p],flow %p)\n",sch
,p
,flow
);
359 return flow
? &flow
->filter_list
: &p
->link
.filter_list
;
363 /* --------------------------- Qdisc operations ---------------------------- */
366 static int atm_tc_enqueue(struct sk_buff
*skb
,struct Qdisc
*sch
)
368 struct atm_qdisc_data
*p
= PRIV(sch
);
369 struct atm_flow_data
*flow
= NULL
; /* @@@ */
370 struct tcf_result res
;
372 int ret
= NET_XMIT_POLICED
;
374 D2PRINTK("atm_tc_enqueue(skb %p,sch %p,[qdisc %p])\n",skb
,sch
,p
);
375 result
= TC_POLICE_OK
; /* be nice to gcc */
376 if (TC_H_MAJ(skb
->priority
) != sch
->handle
||
377 !(flow
= (struct atm_flow_data
*) atm_tc_get(sch
,skb
->priority
)))
378 for (flow
= p
->flows
; flow
; flow
= flow
->next
)
379 if (flow
->filter_list
) {
380 result
= tc_classify(skb
,flow
->filter_list
,
382 if (result
< 0) continue;
383 flow
= (struct atm_flow_data
*) res
.class;
384 if (!flow
) flow
= lookup_flow(sch
,res
.classid
);
387 if (!flow
) flow
= &p
->link
;
390 ATM_SKB(skb
)->atm_options
= flow
->vcc
->atm_options
;
391 /*@@@ looks good ... but it's not supposed to work :-)*/
392 #ifdef CONFIG_NET_CLS_POLICE
397 case TC_POLICE_RECLASSIFY
:
398 if (flow
->excess
) flow
= flow
->excess
;
400 ATM_SKB(skb
)->atm_options
|=
413 #ifdef CONFIG_NET_CLS_POLICE
414 result
== TC_POLICE_SHOT
||
416 (ret
= flow
->q
->enqueue(skb
,flow
->q
)) != 0) {
418 if (flow
) flow
->stats
.drops
++;
421 sch
->stats
.bytes
+= skb
->len
;
422 sch
->stats
.packets
++;
423 flow
->stats
.bytes
+= skb
->len
;
424 flow
->stats
.packets
++;
430 static struct sk_buff
*atm_tc_dequeue(struct Qdisc
*sch
)
432 struct atm_qdisc_data
*p
= PRIV(sch
);
433 struct atm_flow_data
*flow
;
436 D2PRINTK("atm_tc_dequeue(sch %p,[qdisc %p])\n",sch
,p
);
437 for (flow
= p
->link
.next
; flow
; flow
= flow
->next
)
439 * If traffic is properly shaped, this won't generate nasty
440 * little bursts. Otherwise, it may ... @@@
442 while ((skb
= flow
->q
->dequeue(flow
->q
))) {
444 D2PRINTK("atm_tc_deqeueue: sending on class %p\n",flow
);
445 /* remove any LL header somebody else has attached */
446 skb_pull(skb
,(char *) skb
->nh
.iph
-(char *) skb
->data
);
447 if (skb_headroom(skb
) < flow
->hdr_len
) {
450 new = skb_realloc_headroom(skb
,flow
->hdr_len
);
455 D2PRINTK("atm_tc_dequeue: ip %p, data %p\n",
456 skb
->nh
.iph
,skb
->data
);
457 ATM_SKB(skb
)->vcc
= flow
->vcc
;
458 memcpy(skb_push(skb
,flow
->hdr_len
),flow
->hdr
,
460 atomic_add(skb
->truesize
,&flow
->vcc
->tx_inuse
);
461 ATM_SKB(skb
)->iovcnt
= 0;
462 /* atm.atm_options are already set by atm_tc_enqueue */
463 (void) flow
->vcc
->dev
->ops
->send(flow
->vcc
,skb
);
465 skb
= p
->link
.q
->dequeue(p
->link
.q
);
466 if (skb
) sch
->q
.qlen
--;
471 static int atm_tc_drop(struct Qdisc
*sch
)
473 struct atm_qdisc_data
*p
= PRIV(sch
);
474 struct atm_flow_data
*flow
;
476 DPRINTK("atm_tc_drop(sch %p,[qdisc %p])\n",sch
,p
);
477 for (flow
= p
->flows
; flow
; flow
= flow
->next
)
478 if (flow
->q
->ops
->drop
&& flow
->q
->ops
->drop(flow
->q
))
484 static int atm_tc_init(struct Qdisc
*sch
,struct rtattr
*opt
)
486 struct atm_qdisc_data
*p
= PRIV(sch
);
488 DPRINTK("atm_tc_init(sch %p,[qdisc %p],opt %p)\n",sch
,p
,opt
);
489 memset(p
,0,sizeof(*p
));
491 if(!(p
->link
.q
= qdisc_create_dflt(sch
->dev
,&pfifo_qdisc_ops
)))
492 p
->link
.q
= &noop_qdisc
;
493 DPRINTK("atm_tc_init: link (%p) qdisc %p\n",&p
->link
,p
->link
.q
);
494 p
->link
.filter_list
= NULL
;
497 p
->link
.classid
= sch
->handle
;
505 static void atm_tc_reset(struct Qdisc
*sch
)
507 struct atm_qdisc_data
*p
= PRIV(sch
);
508 struct atm_flow_data
*flow
;
510 DPRINTK("atm_tc_reset(sch %p,[qdisc %p])\n",sch
,p
);
511 for (flow
= p
->flows
; flow
; flow
= flow
->next
) qdisc_reset(flow
->q
);
516 static void atm_tc_destroy(struct Qdisc
*sch
)
518 struct atm_qdisc_data
*p
= PRIV(sch
);
519 struct atm_flow_data
*flow
;
521 DPRINTK("atm_tc_destroy(sch %p,[qdisc %p])\n",sch
,p
);
523 while ((flow
= p
->flows
)) {
525 printk(KERN_ERR
"atm_destroy: %p->ref = %d\n",flow
,
527 atm_tc_put(sch
,(unsigned long) flow
);
528 if (p
->flows
== flow
) {
529 printk(KERN_ERR
"atm_destroy: putting flow %p didn't "
531 p
->flows
= flow
->next
; /* brute force */
539 #ifdef CONFIG_RTNETLINK
541 static int atm_tc_dump_class(struct Qdisc
*sch
, unsigned long cl
,
542 struct sk_buff
*skb
, struct tcmsg
*tcm
)
544 struct atm_qdisc_data
*p
= PRIV(sch
);
545 struct atm_flow_data
*flow
= (struct atm_flow_data
*) cl
;
546 unsigned char *b
= skb
->tail
;
549 DPRINTK("atm_tc_dump_class(sch %p,[qdisc %p],flow %p,skb %p,tcm %p)\n",
551 if (!find_flow(p
,flow
)) return -EINVAL
;
552 tcm
->tcm_handle
= flow
->classid
;
553 rta
= (struct rtattr
*) b
;
554 RTA_PUT(skb
,TCA_OPTIONS
,0,NULL
);
555 RTA_PUT(skb
,TCA_ATM_HDR
,flow
->hdr_len
,flow
->hdr
);
557 struct sockaddr_atmpvc pvc
;
560 pvc
.sap_family
= AF_ATMPVC
;
561 pvc
.sap_addr
.itf
= flow
->vcc
->dev
? flow
->vcc
->dev
->number
: -1;
562 pvc
.sap_addr
.vpi
= flow
->vcc
->vpi
;
563 pvc
.sap_addr
.vci
= flow
->vcc
->vci
;
564 RTA_PUT(skb
,TCA_ATM_ADDR
,sizeof(pvc
),&pvc
);
565 state
= ATM_VF2VS(flow
->vcc
->flags
);
566 RTA_PUT(skb
,TCA_ATM_STATE
,sizeof(state
),&state
);
569 RTA_PUT(skb
,TCA_ATM_EXCESS
,sizeof(u32
),&flow
->classid
);
573 RTA_PUT(skb
,TCA_ATM_EXCESS
,sizeof(zero
),&zero
);
575 rta
->rta_len
= skb
->tail
-b
;
579 skb_trim(skb
,b
-skb
->data
);
583 static int atm_tc_dump(struct Qdisc
*sch
, struct sk_buff
*skb
)
591 static struct Qdisc_class_ops atm_class_ops
=
593 atm_tc_graft
, /* graft */
594 atm_tc_leaf
, /* leaf */
595 atm_tc_get
, /* get */
596 atm_tc_put
, /* put */
597 atm_tc_change
, /* change */
598 atm_tc_delete
, /* delete */
599 atm_tc_walk
, /* walk */
601 atm_tc_find_tcf
, /* tcf_chain */
602 atm_tc_bind_filter
, /* bind_tcf */
603 atm_tc_put
, /* unbind_tcf */
605 #ifdef CONFIG_RTNETLINK
606 atm_tc_dump_class
, /* dump */
610 struct Qdisc_ops atm_qdisc_ops
=
613 &atm_class_ops
, /* cl_ops */
615 sizeof(struct atm_qdisc_data
),
617 atm_tc_enqueue
, /* enqueue */
618 atm_tc_dequeue
, /* dequeue */
619 atm_tc_enqueue
, /* requeue; we're cheating a little */
620 atm_tc_drop
, /* drop */
622 atm_tc_init
, /* init */
623 atm_tc_reset
, /* reset */
624 atm_tc_destroy
, /* destroy */
627 #ifdef CONFIG_RTNETLINK
628 atm_tc_dump
/* dump */
634 int init_module(void)
636 return register_qdisc(&atm_qdisc_ops
);
640 void cleanup_module(void)
642 unregister_qdisc(&atm_qdisc_ops
);