2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
6 * PACKET - implements raw packet sockets.
9 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 * Alan Cox, <gw4pts@gw4pts.ampr.org>
13 * Alan Cox : verify_area() now used correctly
14 * Alan Cox : new skbuff lists, look ma no backlogs!
15 * Alan Cox : tidied skbuff lists.
16 * Alan Cox : Now uses generic datagram routines I
17 * added. Also fixed the peek/read crash
18 * from all old Linux datagram code.
19 * Alan Cox : Uses the improved datagram code.
20 * Alan Cox : Added NULL's for socket options.
21 * Alan Cox : Re-commented the code.
22 * Alan Cox : Use new kernel side addressing
23 * Rob Janssen : Correct MTU usage.
24 * Dave Platt : Counter leaks caused by incorrect
25 * interrupt locking and some slightly
26 * dubious gcc output. Can you read
27 * compiler: it said _VOLATILE_
28 * Richard Kooijman : Timestamp fixes.
29 * Alan Cox : New buffers. Use sk->mac.raw.
30 * Alan Cox : sendmsg/recvmsg support.
31 * Alan Cox : Protocol setting support
32 * Alexey Kuznetsov : Untied from IPv4 stack.
33 * Cyrus Durgin : Fixed kerneld for kmod.
34 * Michal Ostrowski : Module initialization cleanup.
35 * Ulises Alonso : Frame number limit removal and
36 * packet_set_ring memory leak.
37 * Eric Biederman : Allow for > 8 byte hardware addresses.
38 * The convention is that longer addresses
39 * will simply extend the hardware address
40 * byte arrays at the end of sockaddr_ll
42 * Johann Baudy : Added TX RING.
44 * This program is free software; you can redistribute it and/or
45 * modify it under the terms of the GNU General Public License
46 * as published by the Free Software Foundation; either version
47 * 2 of the License, or (at your option) any later version.
51 #include <linux/types.h>
53 #include <linux/capability.h>
54 #include <linux/fcntl.h>
55 #include <linux/socket.h>
57 #include <linux/inet.h>
58 #include <linux/netdevice.h>
59 #include <linux/if_packet.h>
60 #include <linux/wireless.h>
61 #include <linux/kernel.h>
62 #include <linux/kmod.h>
63 #include <linux/slab.h>
64 #include <linux/vmalloc.h>
65 #include <net/net_namespace.h>
67 #include <net/protocol.h>
68 #include <linux/skbuff.h>
70 #include <linux/errno.h>
71 #include <linux/timer.h>
72 #include <asm/system.h>
73 #include <asm/uaccess.h>
74 #include <asm/ioctls.h>
76 #include <asm/cacheflush.h>
78 #include <linux/proc_fs.h>
79 #include <linux/seq_file.h>
80 #include <linux/poll.h>
81 #include <linux/module.h>
82 #include <linux/init.h>
83 #include <linux/mutex.h>
84 #include <linux/if_vlan.h>
85 #include <linux/virtio_net.h>
86 #include <linux/errqueue.h>
87 #include <linux/net_tstamp.h>
90 #include <net/inet_common.h>
95 - if device has no dev->hard_header routine, it adds and removes ll header
96 inside itself. In this case ll header is invisible outside of device,
97 but higher levels still should reserve dev->hard_header_len.
98 Some devices are enough clever to reallocate skb, when header
99 will not fit to reserved space (tunnel), another ones are silly
101 - packet socket receives packets with pulled ll header,
102 so that SOCK_RAW should push it back.
107 Incoming, dev->hard_header!=NULL
108 mac_header -> ll header
111 Outgoing, dev->hard_header!=NULL
112 mac_header -> ll header
115 Incoming, dev->hard_header==NULL
116 mac_header -> UNKNOWN position. It is very likely, that it points to ll
117 header. PPP makes it, that is wrong, because introduce
118 assymetry between rx and tx paths.
121 Outgoing, dev->hard_header==NULL
122 mac_header -> data. ll header is still not built!
126 If dev->hard_header==NULL we are unlikely to restore sensible ll header.
132 dev->hard_header != NULL
133 mac_header -> ll header
136 dev->hard_header == NULL (ll header is added by device, we cannot control it)
140 We should set nh.raw on output to correct posistion,
141 packet classifier depends on it.
144 /* Private packet socket structures. */
146 struct packet_mclist
{
147 struct packet_mclist
*next
;
152 unsigned char addr
[MAX_ADDR_LEN
];
154 /* identical to struct packet_mreq except it has
155 * a longer address field.
157 struct packet_mreq_max
{
159 unsigned short mr_type
;
160 unsigned short mr_alen
;
161 unsigned char mr_address
[MAX_ADDR_LEN
];
164 static int packet_set_ring(struct sock
*sk
, struct tpacket_req
*req
,
165 int closing
, int tx_ring
);
167 #define PGV_FROM_VMALLOC 1
172 struct packet_ring_buffer
{
175 unsigned int frames_per_block
;
176 unsigned int frame_size
;
177 unsigned int frame_max
;
179 unsigned int pg_vec_order
;
180 unsigned int pg_vec_pages
;
181 unsigned int pg_vec_len
;
187 static int tpacket_snd(struct packet_sock
*po
, struct msghdr
*msg
);
189 static void packet_flush_mclist(struct sock
*sk
);
192 /* struct sock has to be the first member of packet_sock */
194 struct tpacket_stats stats
;
195 struct packet_ring_buffer rx_ring
;
196 struct packet_ring_buffer tx_ring
;
198 spinlock_t bind_lock
;
199 struct mutex pg_vec_lock
;
200 unsigned int running
:1, /* prot_hook is attached*/
204 int ifindex
; /* bound device */
206 struct packet_mclist
*mclist
;
208 enum tpacket_versions tp_version
;
209 unsigned int tp_hdrlen
;
210 unsigned int tp_reserve
;
211 unsigned int tp_loss
:1;
212 unsigned int tp_tstamp
;
213 struct packet_type prot_hook ____cacheline_aligned_in_smp
;
216 struct packet_skb_cb
{
217 unsigned int origlen
;
219 struct sockaddr_pkt pkt
;
220 struct sockaddr_ll ll
;
224 #define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb))
226 static inline __pure
struct page
*pgv_to_page(void *addr
)
228 if (is_vmalloc_addr(addr
))
229 return vmalloc_to_page(addr
);
230 return virt_to_page(addr
);
233 static void __packet_set_status(struct packet_sock
*po
, void *frame
, int status
)
236 struct tpacket_hdr
*h1
;
237 struct tpacket2_hdr
*h2
;
242 switch (po
->tp_version
) {
244 h
.h1
->tp_status
= status
;
245 flush_dcache_page(pgv_to_page(&h
.h1
->tp_status
));
248 h
.h2
->tp_status
= status
;
249 flush_dcache_page(pgv_to_page(&h
.h2
->tp_status
));
252 pr_err("TPACKET version not supported\n");
259 static int __packet_get_status(struct packet_sock
*po
, void *frame
)
262 struct tpacket_hdr
*h1
;
263 struct tpacket2_hdr
*h2
;
270 switch (po
->tp_version
) {
272 flush_dcache_page(pgv_to_page(&h
.h1
->tp_status
));
273 return h
.h1
->tp_status
;
275 flush_dcache_page(pgv_to_page(&h
.h2
->tp_status
));
276 return h
.h2
->tp_status
;
278 pr_err("TPACKET version not supported\n");
284 static void *packet_lookup_frame(struct packet_sock
*po
,
285 struct packet_ring_buffer
*rb
,
286 unsigned int position
,
289 unsigned int pg_vec_pos
, frame_offset
;
291 struct tpacket_hdr
*h1
;
292 struct tpacket2_hdr
*h2
;
296 pg_vec_pos
= position
/ rb
->frames_per_block
;
297 frame_offset
= position
% rb
->frames_per_block
;
299 h
.raw
= rb
->pg_vec
[pg_vec_pos
].buffer
+
300 (frame_offset
* rb
->frame_size
);
302 if (status
!= __packet_get_status(po
, h
.raw
))
308 static inline void *packet_current_frame(struct packet_sock
*po
,
309 struct packet_ring_buffer
*rb
,
312 return packet_lookup_frame(po
, rb
, rb
->head
, status
);
315 static inline void *packet_previous_frame(struct packet_sock
*po
,
316 struct packet_ring_buffer
*rb
,
319 unsigned int previous
= rb
->head
? rb
->head
- 1 : rb
->frame_max
;
320 return packet_lookup_frame(po
, rb
, previous
, status
);
323 static inline void packet_increment_head(struct packet_ring_buffer
*buff
)
325 buff
->head
= buff
->head
!= buff
->frame_max
? buff
->head
+1 : 0;
328 static inline struct packet_sock
*pkt_sk(struct sock
*sk
)
330 return (struct packet_sock
*)sk
;
333 static void packet_sock_destruct(struct sock
*sk
)
335 skb_queue_purge(&sk
->sk_error_queue
);
337 WARN_ON(atomic_read(&sk
->sk_rmem_alloc
));
338 WARN_ON(atomic_read(&sk
->sk_wmem_alloc
));
340 if (!sock_flag(sk
, SOCK_DEAD
)) {
341 pr_err("Attempt to release alive packet socket: %p\n", sk
);
345 sk_refcnt_debug_dec(sk
);
349 static const struct proto_ops packet_ops
;
351 static const struct proto_ops packet_ops_spkt
;
353 static int packet_rcv_spkt(struct sk_buff
*skb
, struct net_device
*dev
,
354 struct packet_type
*pt
, struct net_device
*orig_dev
)
357 struct sockaddr_pkt
*spkt
;
360 * When we registered the protocol we saved the socket in the data
361 * field for just this event.
364 sk
= pt
->af_packet_priv
;
367 * Yank back the headers [hope the device set this
368 * right or kerboom...]
370 * Incoming packets have ll header pulled,
373 * For outgoing ones skb->data == skb_mac_header(skb)
374 * so that this procedure is noop.
377 if (skb
->pkt_type
== PACKET_LOOPBACK
)
380 if (!net_eq(dev_net(dev
), sock_net(sk
)))
383 skb
= skb_share_check(skb
, GFP_ATOMIC
);
387 /* drop any routing info */
390 /* drop conntrack reference */
393 spkt
= &PACKET_SKB_CB(skb
)->sa
.pkt
;
395 skb_push(skb
, skb
->data
- skb_mac_header(skb
));
398 * The SOCK_PACKET socket receives _all_ frames.
401 spkt
->spkt_family
= dev
->type
;
402 strlcpy(spkt
->spkt_device
, dev
->name
, sizeof(spkt
->spkt_device
));
403 spkt
->spkt_protocol
= skb
->protocol
;
406 * Charge the memory to the socket. This is done specifically
407 * to prevent sockets using all the memory up.
410 if (sock_queue_rcv_skb(sk
, skb
) == 0)
421 * Output a raw packet to a device layer. This bypasses all the other
422 * protocol layers and you must therefore supply it with a complete frame
425 static int packet_sendmsg_spkt(struct kiocb
*iocb
, struct socket
*sock
,
426 struct msghdr
*msg
, size_t len
)
428 struct sock
*sk
= sock
->sk
;
429 struct sockaddr_pkt
*saddr
= (struct sockaddr_pkt
*)msg
->msg_name
;
430 struct sk_buff
*skb
= NULL
;
431 struct net_device
*dev
;
436 * Get and verify the address.
440 if (msg
->msg_namelen
< sizeof(struct sockaddr
))
442 if (msg
->msg_namelen
== sizeof(struct sockaddr_pkt
))
443 proto
= saddr
->spkt_protocol
;
445 return -ENOTCONN
; /* SOCK_PACKET must be sent giving an address */
448 * Find the device first to size check it
451 saddr
->spkt_device
[13] = 0;
454 dev
= dev_get_by_name_rcu(sock_net(sk
), saddr
->spkt_device
);
460 if (!(dev
->flags
& IFF_UP
))
464 * You may not queue a frame bigger than the mtu. This is the lowest level
465 * raw protocol and you must do your own fragmentation at this level.
469 if (len
> dev
->mtu
+ dev
->hard_header_len
)
473 size_t reserved
= LL_RESERVED_SPACE(dev
);
474 unsigned int hhlen
= dev
->header_ops
? dev
->hard_header_len
: 0;
477 skb
= sock_wmalloc(sk
, len
+ reserved
, 0, GFP_KERNEL
);
480 /* FIXME: Save some space for broken drivers that write a hard
481 * header at transmission time by themselves. PPP is the notable
482 * one here. This should really be fixed at the driver level.
484 skb_reserve(skb
, reserved
);
485 skb_reset_network_header(skb
);
487 /* Try to align data part correctly */
492 skb_reset_network_header(skb
);
494 err
= memcpy_fromiovec(skb_put(skb
, len
), msg
->msg_iov
, len
);
501 skb
->protocol
= proto
;
503 skb
->priority
= sk
->sk_priority
;
504 skb
->mark
= sk
->sk_mark
;
505 err
= sock_tx_timestamp(sk
, &skb_shinfo(skb
)->tx_flags
);
520 static inline unsigned int run_filter(const struct sk_buff
*skb
,
521 const struct sock
*sk
,
524 struct sk_filter
*filter
;
527 filter
= rcu_dereference_bh(sk
->sk_filter
);
529 res
= sk_run_filter(skb
, filter
->insns
);
530 rcu_read_unlock_bh();
536 * This function makes lazy skb cloning in hope that most of packets
537 * are discarded by BPF.
539 * Note tricky part: we DO mangle shared skb! skb->data, skb->len
540 * and skb->cb are mangled. It works because (and until) packets
541 * falling here are owned by current CPU. Output packets are cloned
542 * by dev_queue_xmit_nit(), input packets are processed by net_bh
543 * sequencially, so that if we return skb to original state on exit,
544 * we will not harm anyone.
547 static int packet_rcv(struct sk_buff
*skb
, struct net_device
*dev
,
548 struct packet_type
*pt
, struct net_device
*orig_dev
)
551 struct sockaddr_ll
*sll
;
552 struct packet_sock
*po
;
553 u8
*skb_head
= skb
->data
;
554 int skb_len
= skb
->len
;
555 unsigned int snaplen
, res
;
557 if (skb
->pkt_type
== PACKET_LOOPBACK
)
560 sk
= pt
->af_packet_priv
;
563 if (!net_eq(dev_net(dev
), sock_net(sk
)))
568 if (dev
->header_ops
) {
569 /* The device has an explicit notion of ll header,
570 * exported to higher levels.
572 * Otherwise, the device hides details of its frame
573 * structure, so that corresponding packet head is
574 * never delivered to user.
576 if (sk
->sk_type
!= SOCK_DGRAM
)
577 skb_push(skb
, skb
->data
- skb_mac_header(skb
));
578 else if (skb
->pkt_type
== PACKET_OUTGOING
) {
579 /* Special case: outgoing packets have ll header at head */
580 skb_pull(skb
, skb_network_offset(skb
));
586 res
= run_filter(skb
, sk
, snaplen
);
592 if (atomic_read(&sk
->sk_rmem_alloc
) + skb
->truesize
>=
593 (unsigned)sk
->sk_rcvbuf
)
596 if (skb_shared(skb
)) {
597 struct sk_buff
*nskb
= skb_clone(skb
, GFP_ATOMIC
);
601 if (skb_head
!= skb
->data
) {
602 skb
->data
= skb_head
;
609 BUILD_BUG_ON(sizeof(*PACKET_SKB_CB(skb
)) + MAX_ADDR_LEN
- 8 >
612 sll
= &PACKET_SKB_CB(skb
)->sa
.ll
;
613 sll
->sll_family
= AF_PACKET
;
614 sll
->sll_hatype
= dev
->type
;
615 sll
->sll_protocol
= skb
->protocol
;
616 sll
->sll_pkttype
= skb
->pkt_type
;
617 if (unlikely(po
->origdev
))
618 sll
->sll_ifindex
= orig_dev
->ifindex
;
620 sll
->sll_ifindex
= dev
->ifindex
;
622 sll
->sll_halen
= dev_parse_header(skb
, sll
->sll_addr
);
624 PACKET_SKB_CB(skb
)->origlen
= skb
->len
;
626 if (pskb_trim(skb
, snaplen
))
629 skb_set_owner_r(skb
, sk
);
633 /* drop conntrack reference */
636 spin_lock(&sk
->sk_receive_queue
.lock
);
637 po
->stats
.tp_packets
++;
638 skb
->dropcount
= atomic_read(&sk
->sk_drops
);
639 __skb_queue_tail(&sk
->sk_receive_queue
, skb
);
640 spin_unlock(&sk
->sk_receive_queue
.lock
);
641 sk
->sk_data_ready(sk
, skb
->len
);
645 po
->stats
.tp_drops
= atomic_inc_return(&sk
->sk_drops
);
648 if (skb_head
!= skb
->data
&& skb_shared(skb
)) {
649 skb
->data
= skb_head
;
657 static int tpacket_rcv(struct sk_buff
*skb
, struct net_device
*dev
,
658 struct packet_type
*pt
, struct net_device
*orig_dev
)
661 struct packet_sock
*po
;
662 struct sockaddr_ll
*sll
;
664 struct tpacket_hdr
*h1
;
665 struct tpacket2_hdr
*h2
;
668 u8
*skb_head
= skb
->data
;
669 int skb_len
= skb
->len
;
670 unsigned int snaplen
, res
;
671 unsigned long status
= TP_STATUS_LOSING
|TP_STATUS_USER
;
672 unsigned short macoff
, netoff
, hdrlen
;
673 struct sk_buff
*copy_skb
= NULL
;
676 struct skb_shared_hwtstamps
*shhwtstamps
= skb_hwtstamps(skb
);
678 if (skb
->pkt_type
== PACKET_LOOPBACK
)
681 sk
= pt
->af_packet_priv
;
684 if (!net_eq(dev_net(dev
), sock_net(sk
)))
687 if (dev
->header_ops
) {
688 if (sk
->sk_type
!= SOCK_DGRAM
)
689 skb_push(skb
, skb
->data
- skb_mac_header(skb
));
690 else if (skb
->pkt_type
== PACKET_OUTGOING
) {
691 /* Special case: outgoing packets have ll header at head */
692 skb_pull(skb
, skb_network_offset(skb
));
696 if (skb
->ip_summed
== CHECKSUM_PARTIAL
)
697 status
|= TP_STATUS_CSUMNOTREADY
;
701 res
= run_filter(skb
, sk
, snaplen
);
707 if (sk
->sk_type
== SOCK_DGRAM
) {
708 macoff
= netoff
= TPACKET_ALIGN(po
->tp_hdrlen
) + 16 +
711 unsigned maclen
= skb_network_offset(skb
);
712 netoff
= TPACKET_ALIGN(po
->tp_hdrlen
+
713 (maclen
< 16 ? 16 : maclen
)) +
715 macoff
= netoff
- maclen
;
718 if (macoff
+ snaplen
> po
->rx_ring
.frame_size
) {
719 if (po
->copy_thresh
&&
720 atomic_read(&sk
->sk_rmem_alloc
) + skb
->truesize
<
721 (unsigned)sk
->sk_rcvbuf
) {
722 if (skb_shared(skb
)) {
723 copy_skb
= skb_clone(skb
, GFP_ATOMIC
);
725 copy_skb
= skb_get(skb
);
726 skb_head
= skb
->data
;
729 skb_set_owner_r(copy_skb
, sk
);
731 snaplen
= po
->rx_ring
.frame_size
- macoff
;
732 if ((int)snaplen
< 0)
736 spin_lock(&sk
->sk_receive_queue
.lock
);
737 h
.raw
= packet_current_frame(po
, &po
->rx_ring
, TP_STATUS_KERNEL
);
740 packet_increment_head(&po
->rx_ring
);
741 po
->stats
.tp_packets
++;
743 status
|= TP_STATUS_COPY
;
744 __skb_queue_tail(&sk
->sk_receive_queue
, copy_skb
);
746 if (!po
->stats
.tp_drops
)
747 status
&= ~TP_STATUS_LOSING
;
748 spin_unlock(&sk
->sk_receive_queue
.lock
);
750 skb_copy_bits(skb
, 0, h
.raw
+ macoff
, snaplen
);
752 switch (po
->tp_version
) {
754 h
.h1
->tp_len
= skb
->len
;
755 h
.h1
->tp_snaplen
= snaplen
;
756 h
.h1
->tp_mac
= macoff
;
757 h
.h1
->tp_net
= netoff
;
758 if ((po
->tp_tstamp
& SOF_TIMESTAMPING_SYS_HARDWARE
)
759 && shhwtstamps
->syststamp
.tv64
)
760 tv
= ktime_to_timeval(shhwtstamps
->syststamp
);
761 else if ((po
->tp_tstamp
& SOF_TIMESTAMPING_RAW_HARDWARE
)
762 && shhwtstamps
->hwtstamp
.tv64
)
763 tv
= ktime_to_timeval(shhwtstamps
->hwtstamp
);
764 else if (skb
->tstamp
.tv64
)
765 tv
= ktime_to_timeval(skb
->tstamp
);
767 do_gettimeofday(&tv
);
768 h
.h1
->tp_sec
= tv
.tv_sec
;
769 h
.h1
->tp_usec
= tv
.tv_usec
;
770 hdrlen
= sizeof(*h
.h1
);
773 h
.h2
->tp_len
= skb
->len
;
774 h
.h2
->tp_snaplen
= snaplen
;
775 h
.h2
->tp_mac
= macoff
;
776 h
.h2
->tp_net
= netoff
;
777 if ((po
->tp_tstamp
& SOF_TIMESTAMPING_SYS_HARDWARE
)
778 && shhwtstamps
->syststamp
.tv64
)
779 ts
= ktime_to_timespec(shhwtstamps
->syststamp
);
780 else if ((po
->tp_tstamp
& SOF_TIMESTAMPING_RAW_HARDWARE
)
781 && shhwtstamps
->hwtstamp
.tv64
)
782 ts
= ktime_to_timespec(shhwtstamps
->hwtstamp
);
783 else if (skb
->tstamp
.tv64
)
784 ts
= ktime_to_timespec(skb
->tstamp
);
787 h
.h2
->tp_sec
= ts
.tv_sec
;
788 h
.h2
->tp_nsec
= ts
.tv_nsec
;
789 h
.h2
->tp_vlan_tci
= vlan_tx_tag_get(skb
);
790 hdrlen
= sizeof(*h
.h2
);
796 sll
= h
.raw
+ TPACKET_ALIGN(hdrlen
);
797 sll
->sll_halen
= dev_parse_header(skb
, sll
->sll_addr
);
798 sll
->sll_family
= AF_PACKET
;
799 sll
->sll_hatype
= dev
->type
;
800 sll
->sll_protocol
= skb
->protocol
;
801 sll
->sll_pkttype
= skb
->pkt_type
;
802 if (unlikely(po
->origdev
))
803 sll
->sll_ifindex
= orig_dev
->ifindex
;
805 sll
->sll_ifindex
= dev
->ifindex
;
807 __packet_set_status(po
, h
.raw
, status
);
809 #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
813 end
= (u8
*)PAGE_ALIGN((unsigned long)h
.raw
+ macoff
+ snaplen
);
814 for (start
= h
.raw
; start
< end
; start
+= PAGE_SIZE
)
815 flush_dcache_page(pgv_to_page(start
));
819 sk
->sk_data_ready(sk
, 0);
822 if (skb_head
!= skb
->data
&& skb_shared(skb
)) {
823 skb
->data
= skb_head
;
831 po
->stats
.tp_drops
++;
832 spin_unlock(&sk
->sk_receive_queue
.lock
);
834 sk
->sk_data_ready(sk
, 0);
839 static void tpacket_destruct_skb(struct sk_buff
*skb
)
841 struct packet_sock
*po
= pkt_sk(skb
->sk
);
846 if (likely(po
->tx_ring
.pg_vec
)) {
847 ph
= skb_shinfo(skb
)->destructor_arg
;
848 BUG_ON(__packet_get_status(po
, ph
) != TP_STATUS_SENDING
);
849 BUG_ON(atomic_read(&po
->tx_ring
.pending
) == 0);
850 atomic_dec(&po
->tx_ring
.pending
);
851 __packet_set_status(po
, ph
, TP_STATUS_AVAILABLE
);
857 static int tpacket_fill_skb(struct packet_sock
*po
, struct sk_buff
*skb
,
858 void *frame
, struct net_device
*dev
, int size_max
,
859 __be16 proto
, unsigned char *addr
)
862 struct tpacket_hdr
*h1
;
863 struct tpacket2_hdr
*h2
;
866 int to_write
, offset
, len
, tp_len
, nr_frags
, len_max
;
867 struct socket
*sock
= po
->sk
.sk_socket
;
874 skb
->protocol
= proto
;
876 skb
->priority
= po
->sk
.sk_priority
;
877 skb
->mark
= po
->sk
.sk_mark
;
878 skb_shinfo(skb
)->destructor_arg
= ph
.raw
;
880 switch (po
->tp_version
) {
882 tp_len
= ph
.h2
->tp_len
;
885 tp_len
= ph
.h1
->tp_len
;
888 if (unlikely(tp_len
> size_max
)) {
889 pr_err("packet size is too long (%d > %d)\n", tp_len
, size_max
);
893 skb_reserve(skb
, LL_RESERVED_SPACE(dev
));
894 skb_reset_network_header(skb
);
896 data
= ph
.raw
+ po
->tp_hdrlen
- sizeof(struct sockaddr_ll
);
899 if (sock
->type
== SOCK_DGRAM
) {
900 err
= dev_hard_header(skb
, dev
, ntohs(proto
), addr
,
902 if (unlikely(err
< 0))
904 } else if (dev
->hard_header_len
) {
905 /* net device doesn't like empty head */
906 if (unlikely(tp_len
<= dev
->hard_header_len
)) {
907 pr_err("packet size is too short (%d < %d)\n",
908 tp_len
, dev
->hard_header_len
);
912 skb_push(skb
, dev
->hard_header_len
);
913 err
= skb_store_bits(skb
, 0, data
,
914 dev
->hard_header_len
);
918 data
+= dev
->hard_header_len
;
919 to_write
-= dev
->hard_header_len
;
923 offset
= offset_in_page(data
);
924 len_max
= PAGE_SIZE
- offset
;
925 len
= ((to_write
> len_max
) ? len_max
: to_write
);
927 skb
->data_len
= to_write
;
928 skb
->len
+= to_write
;
929 skb
->truesize
+= to_write
;
930 atomic_add(to_write
, &po
->sk
.sk_wmem_alloc
);
932 while (likely(to_write
)) {
933 nr_frags
= skb_shinfo(skb
)->nr_frags
;
935 if (unlikely(nr_frags
>= MAX_SKB_FRAGS
)) {
936 pr_err("Packet exceed the number of skb frags(%lu)\n",
941 page
= pgv_to_page(data
);
943 flush_dcache_page(page
);
945 skb_fill_page_desc(skb
, nr_frags
, page
, offset
, len
);
949 len
= ((to_write
> len_max
) ? len_max
: to_write
);
955 static int tpacket_snd(struct packet_sock
*po
, struct msghdr
*msg
)
959 struct net_device
*dev
;
961 int ifindex
, err
, reserve
= 0;
963 struct sockaddr_ll
*saddr
= (struct sockaddr_ll
*)msg
->msg_name
;
964 int tp_len
, size_max
;
969 sock
= po
->sk
.sk_socket
;
971 mutex_lock(&po
->pg_vec_lock
);
975 ifindex
= po
->ifindex
;
980 if (msg
->msg_namelen
< sizeof(struct sockaddr_ll
))
982 if (msg
->msg_namelen
< (saddr
->sll_halen
983 + offsetof(struct sockaddr_ll
,
986 ifindex
= saddr
->sll_ifindex
;
987 proto
= saddr
->sll_protocol
;
988 addr
= saddr
->sll_addr
;
991 dev
= dev_get_by_index(sock_net(&po
->sk
), ifindex
);
993 if (unlikely(dev
== NULL
))
996 reserve
= dev
->hard_header_len
;
999 if (unlikely(!(dev
->flags
& IFF_UP
)))
1002 size_max
= po
->tx_ring
.frame_size
1003 - (po
->tp_hdrlen
- sizeof(struct sockaddr_ll
));
1005 if (size_max
> dev
->mtu
+ reserve
)
1006 size_max
= dev
->mtu
+ reserve
;
1009 ph
= packet_current_frame(po
, &po
->tx_ring
,
1010 TP_STATUS_SEND_REQUEST
);
1012 if (unlikely(ph
== NULL
)) {
1017 status
= TP_STATUS_SEND_REQUEST
;
1018 skb
= sock_alloc_send_skb(&po
->sk
,
1019 LL_ALLOCATED_SPACE(dev
)
1020 + sizeof(struct sockaddr_ll
),
1023 if (unlikely(skb
== NULL
))
1026 tp_len
= tpacket_fill_skb(po
, skb
, ph
, dev
, size_max
, proto
,
1029 if (unlikely(tp_len
< 0)) {
1031 __packet_set_status(po
, ph
,
1032 TP_STATUS_AVAILABLE
);
1033 packet_increment_head(&po
->tx_ring
);
1037 status
= TP_STATUS_WRONG_FORMAT
;
1043 skb
->destructor
= tpacket_destruct_skb
;
1044 __packet_set_status(po
, ph
, TP_STATUS_SENDING
);
1045 atomic_inc(&po
->tx_ring
.pending
);
1047 status
= TP_STATUS_SEND_REQUEST
;
1048 err
= dev_queue_xmit(skb
);
1049 if (unlikely(err
> 0)) {
1050 err
= net_xmit_errno(err
);
1051 if (err
&& __packet_get_status(po
, ph
) ==
1052 TP_STATUS_AVAILABLE
) {
1053 /* skb was destructed already */
1058 * skb was dropped but not destructed yet;
1059 * let's treat it like congestion or err < 0
1063 packet_increment_head(&po
->tx_ring
);
1065 } while (likely((ph
!= NULL
) ||
1066 ((!(msg
->msg_flags
& MSG_DONTWAIT
)) &&
1067 (atomic_read(&po
->tx_ring
.pending
))))
1074 __packet_set_status(po
, ph
, status
);
1079 mutex_unlock(&po
->pg_vec_lock
);
1083 static inline struct sk_buff
*packet_alloc_skb(struct sock
*sk
, size_t prepad
,
1084 size_t reserve
, size_t len
,
1085 size_t linear
, int noblock
,
1088 struct sk_buff
*skb
;
1090 /* Under a page? Don't bother with paged skb. */
1091 if (prepad
+ len
< PAGE_SIZE
|| !linear
)
1094 skb
= sock_alloc_send_pskb(sk
, prepad
+ linear
, len
- linear
, noblock
,
1099 skb_reserve(skb
, reserve
);
1100 skb_put(skb
, linear
);
1101 skb
->data_len
= len
- linear
;
1102 skb
->len
+= len
- linear
;
1107 static int packet_snd(struct socket
*sock
,
1108 struct msghdr
*msg
, size_t len
)
1110 struct sock
*sk
= sock
->sk
;
1111 struct sockaddr_ll
*saddr
= (struct sockaddr_ll
*)msg
->msg_name
;
1112 struct sk_buff
*skb
;
1113 struct net_device
*dev
;
1115 unsigned char *addr
;
1116 int ifindex
, err
, reserve
= 0;
1117 struct virtio_net_hdr vnet_hdr
= { 0 };
1120 struct packet_sock
*po
= pkt_sk(sk
);
1121 unsigned short gso_type
= 0;
1124 * Get and verify the address.
1127 if (saddr
== NULL
) {
1128 ifindex
= po
->ifindex
;
1133 if (msg
->msg_namelen
< sizeof(struct sockaddr_ll
))
1135 if (msg
->msg_namelen
< (saddr
->sll_halen
+ offsetof(struct sockaddr_ll
, sll_addr
)))
1137 ifindex
= saddr
->sll_ifindex
;
1138 proto
= saddr
->sll_protocol
;
1139 addr
= saddr
->sll_addr
;
1143 dev
= dev_get_by_index(sock_net(sk
), ifindex
);
1147 if (sock
->type
== SOCK_RAW
)
1148 reserve
= dev
->hard_header_len
;
1151 if (!(dev
->flags
& IFF_UP
))
1154 if (po
->has_vnet_hdr
) {
1155 vnet_hdr_len
= sizeof(vnet_hdr
);
1158 if (len
< vnet_hdr_len
)
1161 len
-= vnet_hdr_len
;
1163 err
= memcpy_fromiovec((void *)&vnet_hdr
, msg
->msg_iov
,
1168 if ((vnet_hdr
.flags
& VIRTIO_NET_HDR_F_NEEDS_CSUM
) &&
1169 (vnet_hdr
.csum_start
+ vnet_hdr
.csum_offset
+ 2 >
1171 vnet_hdr
.hdr_len
= vnet_hdr
.csum_start
+
1172 vnet_hdr
.csum_offset
+ 2;
1175 if (vnet_hdr
.hdr_len
> len
)
1178 if (vnet_hdr
.gso_type
!= VIRTIO_NET_HDR_GSO_NONE
) {
1179 switch (vnet_hdr
.gso_type
& ~VIRTIO_NET_HDR_GSO_ECN
) {
1180 case VIRTIO_NET_HDR_GSO_TCPV4
:
1181 gso_type
= SKB_GSO_TCPV4
;
1183 case VIRTIO_NET_HDR_GSO_TCPV6
:
1184 gso_type
= SKB_GSO_TCPV6
;
1186 case VIRTIO_NET_HDR_GSO_UDP
:
1187 gso_type
= SKB_GSO_UDP
;
1193 if (vnet_hdr
.gso_type
& VIRTIO_NET_HDR_GSO_ECN
)
1194 gso_type
|= SKB_GSO_TCP_ECN
;
1196 if (vnet_hdr
.gso_size
== 0)
1203 if (!gso_type
&& (len
> dev
->mtu
+reserve
))
1207 skb
= packet_alloc_skb(sk
, LL_ALLOCATED_SPACE(dev
),
1208 LL_RESERVED_SPACE(dev
), len
, vnet_hdr
.hdr_len
,
1209 msg
->msg_flags
& MSG_DONTWAIT
, &err
);
1213 skb_set_network_header(skb
, reserve
);
1216 if (sock
->type
== SOCK_DGRAM
&&
1217 (offset
= dev_hard_header(skb
, dev
, ntohs(proto
), addr
, NULL
, len
)) < 0)
1220 /* Returns -EFAULT on error */
1221 err
= skb_copy_datagram_from_iovec(skb
, offset
, msg
->msg_iov
, 0, len
);
1224 err
= sock_tx_timestamp(sk
, &skb_shinfo(skb
)->tx_flags
);
1228 skb
->protocol
= proto
;
1230 skb
->priority
= sk
->sk_priority
;
1231 skb
->mark
= sk
->sk_mark
;
1233 if (po
->has_vnet_hdr
) {
1234 if (vnet_hdr
.flags
& VIRTIO_NET_HDR_F_NEEDS_CSUM
) {
1235 if (!skb_partial_csum_set(skb
, vnet_hdr
.csum_start
,
1236 vnet_hdr
.csum_offset
)) {
1242 skb_shinfo(skb
)->gso_size
= vnet_hdr
.gso_size
;
1243 skb_shinfo(skb
)->gso_type
= gso_type
;
1245 /* Header must be checked, and gso_segs computed. */
1246 skb_shinfo(skb
)->gso_type
|= SKB_GSO_DODGY
;
1247 skb_shinfo(skb
)->gso_segs
= 0;
1249 len
+= vnet_hdr_len
;
1256 err
= dev_queue_xmit(skb
);
1257 if (err
> 0 && (err
= net_xmit_errno(err
)) != 0)
1273 static int packet_sendmsg(struct kiocb
*iocb
, struct socket
*sock
,
1274 struct msghdr
*msg
, size_t len
)
1276 struct sock
*sk
= sock
->sk
;
1277 struct packet_sock
*po
= pkt_sk(sk
);
1278 if (po
->tx_ring
.pg_vec
)
1279 return tpacket_snd(po
, msg
);
1281 return packet_snd(sock
, msg
, len
);
1285 * Close a PACKET socket. This is fairly simple. We immediately go
1286 * to 'closed' state and remove our protocol entry in the device list.
1289 static int packet_release(struct socket
*sock
)
1291 struct sock
*sk
= sock
->sk
;
1292 struct packet_sock
*po
;
1294 struct tpacket_req req
;
1302 spin_lock_bh(&net
->packet
.sklist_lock
);
1303 sk_del_node_init_rcu(sk
);
1304 sock_prot_inuse_add(net
, sk
->sk_prot
, -1);
1305 spin_unlock_bh(&net
->packet
.sklist_lock
);
1307 spin_lock(&po
->bind_lock
);
1310 * Remove from protocol table
1314 __dev_remove_pack(&po
->prot_hook
);
1317 spin_unlock(&po
->bind_lock
);
1319 packet_flush_mclist(sk
);
1321 memset(&req
, 0, sizeof(req
));
1323 if (po
->rx_ring
.pg_vec
)
1324 packet_set_ring(sk
, &req
, 1, 0);
1326 if (po
->tx_ring
.pg_vec
)
1327 packet_set_ring(sk
, &req
, 1, 1);
1331 * Now the socket is dead. No more input will appear.
1338 skb_queue_purge(&sk
->sk_receive_queue
);
1339 sk_refcnt_debug_release(sk
);
1346 * Attach a packet hook.
1349 static int packet_do_bind(struct sock
*sk
, struct net_device
*dev
, __be16 protocol
)
1351 struct packet_sock
*po
= pkt_sk(sk
);
1353 * Detach an existing hook if present.
1358 spin_lock(&po
->bind_lock
);
1363 spin_unlock(&po
->bind_lock
);
1364 dev_remove_pack(&po
->prot_hook
);
1365 spin_lock(&po
->bind_lock
);
1369 po
->prot_hook
.type
= protocol
;
1370 po
->prot_hook
.dev
= dev
;
1372 po
->ifindex
= dev
? dev
->ifindex
: 0;
1377 if (!dev
|| (dev
->flags
& IFF_UP
)) {
1378 dev_add_pack(&po
->prot_hook
);
1382 sk
->sk_err
= ENETDOWN
;
1383 if (!sock_flag(sk
, SOCK_DEAD
))
1384 sk
->sk_error_report(sk
);
1388 spin_unlock(&po
->bind_lock
);
1394 * Bind a packet socket to a device
1397 static int packet_bind_spkt(struct socket
*sock
, struct sockaddr
*uaddr
,
1400 struct sock
*sk
= sock
->sk
;
1402 struct net_device
*dev
;
1409 if (addr_len
!= sizeof(struct sockaddr
))
1411 strlcpy(name
, uaddr
->sa_data
, sizeof(name
));
1413 dev
= dev_get_by_name(sock_net(sk
), name
);
1415 err
= packet_do_bind(sk
, dev
, pkt_sk(sk
)->num
);
1421 static int packet_bind(struct socket
*sock
, struct sockaddr
*uaddr
, int addr_len
)
1423 struct sockaddr_ll
*sll
= (struct sockaddr_ll
*)uaddr
;
1424 struct sock
*sk
= sock
->sk
;
1425 struct net_device
*dev
= NULL
;
1433 if (addr_len
< sizeof(struct sockaddr_ll
))
1435 if (sll
->sll_family
!= AF_PACKET
)
1438 if (sll
->sll_ifindex
) {
1440 dev
= dev_get_by_index(sock_net(sk
), sll
->sll_ifindex
);
1444 err
= packet_do_bind(sk
, dev
, sll
->sll_protocol
? : pkt_sk(sk
)->num
);
1452 static struct proto packet_proto
= {
1454 .owner
= THIS_MODULE
,
1455 .obj_size
= sizeof(struct packet_sock
),
1459 * Create a packet of type SOCK_PACKET.
1462 static int packet_create(struct net
*net
, struct socket
*sock
, int protocol
,
1466 struct packet_sock
*po
;
1467 __be16 proto
= (__force __be16
)protocol
; /* weird, but documented */
1470 if (!capable(CAP_NET_RAW
))
1472 if (sock
->type
!= SOCK_DGRAM
&& sock
->type
!= SOCK_RAW
&&
1473 sock
->type
!= SOCK_PACKET
)
1474 return -ESOCKTNOSUPPORT
;
1476 sock
->state
= SS_UNCONNECTED
;
1479 sk
= sk_alloc(net
, PF_PACKET
, GFP_KERNEL
, &packet_proto
);
1483 sock
->ops
= &packet_ops
;
1484 if (sock
->type
== SOCK_PACKET
)
1485 sock
->ops
= &packet_ops_spkt
;
1487 sock_init_data(sock
, sk
);
1490 sk
->sk_family
= PF_PACKET
;
1493 sk
->sk_destruct
= packet_sock_destruct
;
1494 sk_refcnt_debug_inc(sk
);
1497 * Attach a protocol block
1500 spin_lock_init(&po
->bind_lock
);
1501 mutex_init(&po
->pg_vec_lock
);
1502 po
->prot_hook
.func
= packet_rcv
;
1504 if (sock
->type
== SOCK_PACKET
)
1505 po
->prot_hook
.func
= packet_rcv_spkt
;
1507 po
->prot_hook
.af_packet_priv
= sk
;
1510 po
->prot_hook
.type
= proto
;
1511 dev_add_pack(&po
->prot_hook
);
1516 spin_lock_bh(&net
->packet
.sklist_lock
);
1517 sk_add_node_rcu(sk
, &net
->packet
.sklist
);
1518 sock_prot_inuse_add(net
, &packet_proto
, 1);
1519 spin_unlock_bh(&net
->packet
.sklist_lock
);
1526 static int packet_recv_error(struct sock
*sk
, struct msghdr
*msg
, int len
)
1528 struct sock_exterr_skb
*serr
;
1529 struct sk_buff
*skb
, *skb2
;
1533 skb
= skb_dequeue(&sk
->sk_error_queue
);
1539 msg
->msg_flags
|= MSG_TRUNC
;
1542 err
= skb_copy_datagram_iovec(skb
, 0, msg
->msg_iov
, copied
);
1546 sock_recv_timestamp(msg
, sk
, skb
);
1548 serr
= SKB_EXT_ERR(skb
);
1549 put_cmsg(msg
, SOL_PACKET
, PACKET_TX_TIMESTAMP
,
1550 sizeof(serr
->ee
), &serr
->ee
);
1552 msg
->msg_flags
|= MSG_ERRQUEUE
;
1555 /* Reset and regenerate socket error */
1556 spin_lock_bh(&sk
->sk_error_queue
.lock
);
1558 if ((skb2
= skb_peek(&sk
->sk_error_queue
)) != NULL
) {
1559 sk
->sk_err
= SKB_EXT_ERR(skb2
)->ee
.ee_errno
;
1560 spin_unlock_bh(&sk
->sk_error_queue
.lock
);
1561 sk
->sk_error_report(sk
);
1563 spin_unlock_bh(&sk
->sk_error_queue
.lock
);
1572 * Pull a packet from our receive queue and hand it to the user.
1573 * If necessary we block.
1576 static int packet_recvmsg(struct kiocb
*iocb
, struct socket
*sock
,
1577 struct msghdr
*msg
, size_t len
, int flags
)
1579 struct sock
*sk
= sock
->sk
;
1580 struct sk_buff
*skb
;
1582 struct sockaddr_ll
*sll
;
1583 int vnet_hdr_len
= 0;
1586 if (flags
& ~(MSG_PEEK
|MSG_DONTWAIT
|MSG_TRUNC
|MSG_CMSG_COMPAT
|MSG_ERRQUEUE
))
1590 /* What error should we return now? EUNATTACH? */
1591 if (pkt_sk(sk
)->ifindex
< 0)
1595 if (flags
& MSG_ERRQUEUE
) {
1596 err
= packet_recv_error(sk
, msg
, len
);
1601 * Call the generic datagram receiver. This handles all sorts
1602 * of horrible races and re-entrancy so we can forget about it
1603 * in the protocol layers.
1605 * Now it will return ENETDOWN, if device have just gone down,
1606 * but then it will block.
1609 skb
= skb_recv_datagram(sk
, flags
, flags
& MSG_DONTWAIT
, &err
);
1612 * An error occurred so return it. Because skb_recv_datagram()
1613 * handles the blocking we don't see and worry about blocking
1620 if (pkt_sk(sk
)->has_vnet_hdr
) {
1621 struct virtio_net_hdr vnet_hdr
= { 0 };
1624 vnet_hdr_len
= sizeof(vnet_hdr
);
1625 if (len
< vnet_hdr_len
)
1628 len
-= vnet_hdr_len
;
1630 if (skb_is_gso(skb
)) {
1631 struct skb_shared_info
*sinfo
= skb_shinfo(skb
);
1633 /* This is a hint as to how much should be linear. */
1634 vnet_hdr
.hdr_len
= skb_headlen(skb
);
1635 vnet_hdr
.gso_size
= sinfo
->gso_size
;
1636 if (sinfo
->gso_type
& SKB_GSO_TCPV4
)
1637 vnet_hdr
.gso_type
= VIRTIO_NET_HDR_GSO_TCPV4
;
1638 else if (sinfo
->gso_type
& SKB_GSO_TCPV6
)
1639 vnet_hdr
.gso_type
= VIRTIO_NET_HDR_GSO_TCPV6
;
1640 else if (sinfo
->gso_type
& SKB_GSO_UDP
)
1641 vnet_hdr
.gso_type
= VIRTIO_NET_HDR_GSO_UDP
;
1642 else if (sinfo
->gso_type
& SKB_GSO_FCOE
)
1646 if (sinfo
->gso_type
& SKB_GSO_TCP_ECN
)
1647 vnet_hdr
.gso_type
|= VIRTIO_NET_HDR_GSO_ECN
;
1649 vnet_hdr
.gso_type
= VIRTIO_NET_HDR_GSO_NONE
;
1651 if (skb
->ip_summed
== CHECKSUM_PARTIAL
) {
1652 vnet_hdr
.flags
= VIRTIO_NET_HDR_F_NEEDS_CSUM
;
1653 vnet_hdr
.csum_start
= skb_checksum_start_offset(skb
);
1654 vnet_hdr
.csum_offset
= skb
->csum_offset
;
1655 } /* else everything is zero */
1657 err
= memcpy_toiovec(msg
->msg_iov
, (void *)&vnet_hdr
,
1664 * If the address length field is there to be filled in, we fill
1668 sll
= &PACKET_SKB_CB(skb
)->sa
.ll
;
1669 if (sock
->type
== SOCK_PACKET
)
1670 msg
->msg_namelen
= sizeof(struct sockaddr_pkt
);
1672 msg
->msg_namelen
= sll
->sll_halen
+ offsetof(struct sockaddr_ll
, sll_addr
);
1675 * You lose any data beyond the buffer you gave. If it worries a
1676 * user program they can ask the device for its MTU anyway.
1682 msg
->msg_flags
|= MSG_TRUNC
;
1685 err
= skb_copy_datagram_iovec(skb
, 0, msg
->msg_iov
, copied
);
1689 sock_recv_ts_and_drops(msg
, sk
, skb
);
1692 memcpy(msg
->msg_name
, &PACKET_SKB_CB(skb
)->sa
,
1695 if (pkt_sk(sk
)->auxdata
) {
1696 struct tpacket_auxdata aux
;
1698 aux
.tp_status
= TP_STATUS_USER
;
1699 if (skb
->ip_summed
== CHECKSUM_PARTIAL
)
1700 aux
.tp_status
|= TP_STATUS_CSUMNOTREADY
;
1701 aux
.tp_len
= PACKET_SKB_CB(skb
)->origlen
;
1702 aux
.tp_snaplen
= skb
->len
;
1704 aux
.tp_net
= skb_network_offset(skb
);
1705 aux
.tp_vlan_tci
= vlan_tx_tag_get(skb
);
1707 put_cmsg(msg
, SOL_PACKET
, PACKET_AUXDATA
, sizeof(aux
), &aux
);
1711 * Free or return the buffer as appropriate. Again this
1712 * hides all the races and re-entrancy issues from us.
1714 err
= vnet_hdr_len
+ ((flags
&MSG_TRUNC
) ? skb
->len
: copied
);
1717 skb_free_datagram(sk
, skb
);
1722 static int packet_getname_spkt(struct socket
*sock
, struct sockaddr
*uaddr
,
1723 int *uaddr_len
, int peer
)
1725 struct net_device
*dev
;
1726 struct sock
*sk
= sock
->sk
;
1731 uaddr
->sa_family
= AF_PACKET
;
1733 dev
= dev_get_by_index_rcu(sock_net(sk
), pkt_sk(sk
)->ifindex
);
1735 strncpy(uaddr
->sa_data
, dev
->name
, 14);
1737 memset(uaddr
->sa_data
, 0, 14);
1739 *uaddr_len
= sizeof(*uaddr
);
1744 static int packet_getname(struct socket
*sock
, struct sockaddr
*uaddr
,
1745 int *uaddr_len
, int peer
)
1747 struct net_device
*dev
;
1748 struct sock
*sk
= sock
->sk
;
1749 struct packet_sock
*po
= pkt_sk(sk
);
1750 DECLARE_SOCKADDR(struct sockaddr_ll
*, sll
, uaddr
);
1755 sll
->sll_family
= AF_PACKET
;
1756 sll
->sll_ifindex
= po
->ifindex
;
1757 sll
->sll_protocol
= po
->num
;
1758 sll
->sll_pkttype
= 0;
1760 dev
= dev_get_by_index_rcu(sock_net(sk
), po
->ifindex
);
1762 sll
->sll_hatype
= dev
->type
;
1763 sll
->sll_halen
= dev
->addr_len
;
1764 memcpy(sll
->sll_addr
, dev
->dev_addr
, dev
->addr_len
);
1766 sll
->sll_hatype
= 0; /* Bad: we have no ARPHRD_UNSPEC */
1770 *uaddr_len
= offsetof(struct sockaddr_ll
, sll_addr
) + sll
->sll_halen
;
1775 static int packet_dev_mc(struct net_device
*dev
, struct packet_mclist
*i
,
1779 case PACKET_MR_MULTICAST
:
1780 if (i
->alen
!= dev
->addr_len
)
1783 return dev_mc_add(dev
, i
->addr
);
1785 return dev_mc_del(dev
, i
->addr
);
1787 case PACKET_MR_PROMISC
:
1788 return dev_set_promiscuity(dev
, what
);
1790 case PACKET_MR_ALLMULTI
:
1791 return dev_set_allmulti(dev
, what
);
1793 case PACKET_MR_UNICAST
:
1794 if (i
->alen
!= dev
->addr_len
)
1797 return dev_uc_add(dev
, i
->addr
);
1799 return dev_uc_del(dev
, i
->addr
);
1807 static void packet_dev_mclist(struct net_device
*dev
, struct packet_mclist
*i
, int what
)
1809 for ( ; i
; i
= i
->next
) {
1810 if (i
->ifindex
== dev
->ifindex
)
1811 packet_dev_mc(dev
, i
, what
);
1815 static int packet_mc_add(struct sock
*sk
, struct packet_mreq_max
*mreq
)
1817 struct packet_sock
*po
= pkt_sk(sk
);
1818 struct packet_mclist
*ml
, *i
;
1819 struct net_device
*dev
;
1825 dev
= __dev_get_by_index(sock_net(sk
), mreq
->mr_ifindex
);
1830 if (mreq
->mr_alen
> dev
->addr_len
)
1834 i
= kmalloc(sizeof(*i
), GFP_KERNEL
);
1839 for (ml
= po
->mclist
; ml
; ml
= ml
->next
) {
1840 if (ml
->ifindex
== mreq
->mr_ifindex
&&
1841 ml
->type
== mreq
->mr_type
&&
1842 ml
->alen
== mreq
->mr_alen
&&
1843 memcmp(ml
->addr
, mreq
->mr_address
, ml
->alen
) == 0) {
1845 /* Free the new element ... */
1851 i
->type
= mreq
->mr_type
;
1852 i
->ifindex
= mreq
->mr_ifindex
;
1853 i
->alen
= mreq
->mr_alen
;
1854 memcpy(i
->addr
, mreq
->mr_address
, i
->alen
);
1856 i
->next
= po
->mclist
;
1858 err
= packet_dev_mc(dev
, i
, 1);
1860 po
->mclist
= i
->next
;
1869 static int packet_mc_drop(struct sock
*sk
, struct packet_mreq_max
*mreq
)
1871 struct packet_mclist
*ml
, **mlp
;
1875 for (mlp
= &pkt_sk(sk
)->mclist
; (ml
= *mlp
) != NULL
; mlp
= &ml
->next
) {
1876 if (ml
->ifindex
== mreq
->mr_ifindex
&&
1877 ml
->type
== mreq
->mr_type
&&
1878 ml
->alen
== mreq
->mr_alen
&&
1879 memcmp(ml
->addr
, mreq
->mr_address
, ml
->alen
) == 0) {
1880 if (--ml
->count
== 0) {
1881 struct net_device
*dev
;
1883 dev
= __dev_get_by_index(sock_net(sk
), ml
->ifindex
);
1885 packet_dev_mc(dev
, ml
, -1);
1893 return -EADDRNOTAVAIL
;
1896 static void packet_flush_mclist(struct sock
*sk
)
1898 struct packet_sock
*po
= pkt_sk(sk
);
1899 struct packet_mclist
*ml
;
1905 while ((ml
= po
->mclist
) != NULL
) {
1906 struct net_device
*dev
;
1908 po
->mclist
= ml
->next
;
1909 dev
= __dev_get_by_index(sock_net(sk
), ml
->ifindex
);
1911 packet_dev_mc(dev
, ml
, -1);
1918 packet_setsockopt(struct socket
*sock
, int level
, int optname
, char __user
*optval
, unsigned int optlen
)
1920 struct sock
*sk
= sock
->sk
;
1921 struct packet_sock
*po
= pkt_sk(sk
);
1924 if (level
!= SOL_PACKET
)
1925 return -ENOPROTOOPT
;
1928 case PACKET_ADD_MEMBERSHIP
:
1929 case PACKET_DROP_MEMBERSHIP
:
1931 struct packet_mreq_max mreq
;
1933 memset(&mreq
, 0, sizeof(mreq
));
1934 if (len
< sizeof(struct packet_mreq
))
1936 if (len
> sizeof(mreq
))
1938 if (copy_from_user(&mreq
, optval
, len
))
1940 if (len
< (mreq
.mr_alen
+ offsetof(struct packet_mreq
, mr_address
)))
1942 if (optname
== PACKET_ADD_MEMBERSHIP
)
1943 ret
= packet_mc_add(sk
, &mreq
);
1945 ret
= packet_mc_drop(sk
, &mreq
);
1949 case PACKET_RX_RING
:
1950 case PACKET_TX_RING
:
1952 struct tpacket_req req
;
1954 if (optlen
< sizeof(req
))
1956 if (pkt_sk(sk
)->has_vnet_hdr
)
1958 if (copy_from_user(&req
, optval
, sizeof(req
)))
1960 return packet_set_ring(sk
, &req
, 0, optname
== PACKET_TX_RING
);
1962 case PACKET_COPY_THRESH
:
1966 if (optlen
!= sizeof(val
))
1968 if (copy_from_user(&val
, optval
, sizeof(val
)))
1971 pkt_sk(sk
)->copy_thresh
= val
;
1974 case PACKET_VERSION
:
1978 if (optlen
!= sizeof(val
))
1980 if (po
->rx_ring
.pg_vec
|| po
->tx_ring
.pg_vec
)
1982 if (copy_from_user(&val
, optval
, sizeof(val
)))
1987 po
->tp_version
= val
;
1993 case PACKET_RESERVE
:
1997 if (optlen
!= sizeof(val
))
1999 if (po
->rx_ring
.pg_vec
|| po
->tx_ring
.pg_vec
)
2001 if (copy_from_user(&val
, optval
, sizeof(val
)))
2003 po
->tp_reserve
= val
;
2010 if (optlen
!= sizeof(val
))
2012 if (po
->rx_ring
.pg_vec
|| po
->tx_ring
.pg_vec
)
2014 if (copy_from_user(&val
, optval
, sizeof(val
)))
2016 po
->tp_loss
= !!val
;
2019 case PACKET_AUXDATA
:
2023 if (optlen
< sizeof(val
))
2025 if (copy_from_user(&val
, optval
, sizeof(val
)))
2028 po
->auxdata
= !!val
;
2031 case PACKET_ORIGDEV
:
2035 if (optlen
< sizeof(val
))
2037 if (copy_from_user(&val
, optval
, sizeof(val
)))
2040 po
->origdev
= !!val
;
2043 case PACKET_VNET_HDR
:
2047 if (sock
->type
!= SOCK_RAW
)
2049 if (po
->rx_ring
.pg_vec
|| po
->tx_ring
.pg_vec
)
2051 if (optlen
< sizeof(val
))
2053 if (copy_from_user(&val
, optval
, sizeof(val
)))
2056 po
->has_vnet_hdr
= !!val
;
2059 case PACKET_TIMESTAMP
:
2063 if (optlen
!= sizeof(val
))
2065 if (copy_from_user(&val
, optval
, sizeof(val
)))
2068 po
->tp_tstamp
= val
;
2072 return -ENOPROTOOPT
;
2076 static int packet_getsockopt(struct socket
*sock
, int level
, int optname
,
2077 char __user
*optval
, int __user
*optlen
)
2081 struct sock
*sk
= sock
->sk
;
2082 struct packet_sock
*po
= pkt_sk(sk
);
2084 struct tpacket_stats st
;
2086 if (level
!= SOL_PACKET
)
2087 return -ENOPROTOOPT
;
2089 if (get_user(len
, optlen
))
2096 case PACKET_STATISTICS
:
2097 if (len
> sizeof(struct tpacket_stats
))
2098 len
= sizeof(struct tpacket_stats
);
2099 spin_lock_bh(&sk
->sk_receive_queue
.lock
);
2101 memset(&po
->stats
, 0, sizeof(st
));
2102 spin_unlock_bh(&sk
->sk_receive_queue
.lock
);
2103 st
.tp_packets
+= st
.tp_drops
;
2107 case PACKET_AUXDATA
:
2108 if (len
> sizeof(int))
2114 case PACKET_ORIGDEV
:
2115 if (len
> sizeof(int))
2121 case PACKET_VNET_HDR
:
2122 if (len
> sizeof(int))
2124 val
= po
->has_vnet_hdr
;
2128 case PACKET_VERSION
:
2129 if (len
> sizeof(int))
2131 val
= po
->tp_version
;
2135 if (len
> sizeof(int))
2137 if (copy_from_user(&val
, optval
, len
))
2141 val
= sizeof(struct tpacket_hdr
);
2144 val
= sizeof(struct tpacket2_hdr
);
2151 case PACKET_RESERVE
:
2152 if (len
> sizeof(unsigned int))
2153 len
= sizeof(unsigned int);
2154 val
= po
->tp_reserve
;
2158 if (len
> sizeof(unsigned int))
2159 len
= sizeof(unsigned int);
2163 case PACKET_TIMESTAMP
:
2164 if (len
> sizeof(int))
2166 val
= po
->tp_tstamp
;
2170 return -ENOPROTOOPT
;
2173 if (put_user(len
, optlen
))
2175 if (copy_to_user(optval
, data
, len
))
2181 static int packet_notifier(struct notifier_block
*this, unsigned long msg
, void *data
)
2184 struct hlist_node
*node
;
2185 struct net_device
*dev
= data
;
2186 struct net
*net
= dev_net(dev
);
2189 sk_for_each_rcu(sk
, node
, &net
->packet
.sklist
) {
2190 struct packet_sock
*po
= pkt_sk(sk
);
2193 case NETDEV_UNREGISTER
:
2195 packet_dev_mclist(dev
, po
->mclist
, -1);
2199 if (dev
->ifindex
== po
->ifindex
) {
2200 spin_lock(&po
->bind_lock
);
2202 __dev_remove_pack(&po
->prot_hook
);
2205 sk
->sk_err
= ENETDOWN
;
2206 if (!sock_flag(sk
, SOCK_DEAD
))
2207 sk
->sk_error_report(sk
);
2209 if (msg
== NETDEV_UNREGISTER
) {
2211 po
->prot_hook
.dev
= NULL
;
2213 spin_unlock(&po
->bind_lock
);
2217 if (dev
->ifindex
== po
->ifindex
) {
2218 spin_lock(&po
->bind_lock
);
2219 if (po
->num
&& !po
->running
) {
2220 dev_add_pack(&po
->prot_hook
);
2224 spin_unlock(&po
->bind_lock
);
2234 static int packet_ioctl(struct socket
*sock
, unsigned int cmd
,
2237 struct sock
*sk
= sock
->sk
;
2242 int amount
= sk_wmem_alloc_get(sk
);
2244 return put_user(amount
, (int __user
*)arg
);
2248 struct sk_buff
*skb
;
2251 spin_lock_bh(&sk
->sk_receive_queue
.lock
);
2252 skb
= skb_peek(&sk
->sk_receive_queue
);
2255 spin_unlock_bh(&sk
->sk_receive_queue
.lock
);
2256 return put_user(amount
, (int __user
*)arg
);
2259 return sock_get_timestamp(sk
, (struct timeval __user
*)arg
);
2261 return sock_get_timestampns(sk
, (struct timespec __user
*)arg
);
2271 case SIOCGIFBRDADDR
:
2272 case SIOCSIFBRDADDR
:
2273 case SIOCGIFNETMASK
:
2274 case SIOCSIFNETMASK
:
2275 case SIOCGIFDSTADDR
:
2276 case SIOCSIFDSTADDR
:
2278 return inet_dgram_ops
.ioctl(sock
, cmd
, arg
);
2282 return -ENOIOCTLCMD
;
2287 static unsigned int packet_poll(struct file
*file
, struct socket
*sock
,
2290 struct sock
*sk
= sock
->sk
;
2291 struct packet_sock
*po
= pkt_sk(sk
);
2292 unsigned int mask
= datagram_poll(file
, sock
, wait
);
2294 spin_lock_bh(&sk
->sk_receive_queue
.lock
);
2295 if (po
->rx_ring
.pg_vec
) {
2296 if (!packet_previous_frame(po
, &po
->rx_ring
, TP_STATUS_KERNEL
))
2297 mask
|= POLLIN
| POLLRDNORM
;
2299 spin_unlock_bh(&sk
->sk_receive_queue
.lock
);
2300 spin_lock_bh(&sk
->sk_write_queue
.lock
);
2301 if (po
->tx_ring
.pg_vec
) {
2302 if (packet_current_frame(po
, &po
->tx_ring
, TP_STATUS_AVAILABLE
))
2303 mask
|= POLLOUT
| POLLWRNORM
;
2305 spin_unlock_bh(&sk
->sk_write_queue
.lock
);
2310 /* Dirty? Well, I still did not learn better way to account
2314 static void packet_mm_open(struct vm_area_struct
*vma
)
2316 struct file
*file
= vma
->vm_file
;
2317 struct socket
*sock
= file
->private_data
;
2318 struct sock
*sk
= sock
->sk
;
2321 atomic_inc(&pkt_sk(sk
)->mapped
);
2324 static void packet_mm_close(struct vm_area_struct
*vma
)
2326 struct file
*file
= vma
->vm_file
;
2327 struct socket
*sock
= file
->private_data
;
2328 struct sock
*sk
= sock
->sk
;
2331 atomic_dec(&pkt_sk(sk
)->mapped
);
2334 static const struct vm_operations_struct packet_mmap_ops
= {
2335 .open
= packet_mm_open
,
2336 .close
= packet_mm_close
,
2339 static void free_pg_vec(struct pgv
*pg_vec
, unsigned int order
,
2344 for (i
= 0; i
< len
; i
++) {
2345 if (likely(pg_vec
[i
].buffer
)) {
2346 if (is_vmalloc_addr(pg_vec
[i
].buffer
))
2347 vfree(pg_vec
[i
].buffer
);
2349 free_pages((unsigned long)pg_vec
[i
].buffer
,
2351 pg_vec
[i
].buffer
= NULL
;
2357 static inline char *alloc_one_pg_vec_page(unsigned long order
)
2359 char *buffer
= NULL
;
2360 gfp_t gfp_flags
= GFP_KERNEL
| __GFP_COMP
|
2361 __GFP_ZERO
| __GFP_NOWARN
| __GFP_NORETRY
;
2363 buffer
= (char *) __get_free_pages(gfp_flags
, order
);
2369 * __get_free_pages failed, fall back to vmalloc
2371 buffer
= vzalloc((1 << order
) * PAGE_SIZE
);
2377 * vmalloc failed, lets dig into swap here
2379 gfp_flags
&= ~__GFP_NORETRY
;
2380 buffer
= (char *)__get_free_pages(gfp_flags
, order
);
2385 * complete and utter failure
2390 static struct pgv
*alloc_pg_vec(struct tpacket_req
*req
, int order
)
2392 unsigned int block_nr
= req
->tp_block_nr
;
2396 pg_vec
= kcalloc(block_nr
, sizeof(struct pgv
), GFP_KERNEL
);
2397 if (unlikely(!pg_vec
))
2400 for (i
= 0; i
< block_nr
; i
++) {
2401 pg_vec
[i
].buffer
= alloc_one_pg_vec_page(order
);
2402 if (unlikely(!pg_vec
[i
].buffer
))
2403 goto out_free_pgvec
;
2410 free_pg_vec(pg_vec
, order
, block_nr
);
2415 static int packet_set_ring(struct sock
*sk
, struct tpacket_req
*req
,
2416 int closing
, int tx_ring
)
2418 struct pgv
*pg_vec
= NULL
;
2419 struct packet_sock
*po
= pkt_sk(sk
);
2420 int was_running
, order
= 0;
2421 struct packet_ring_buffer
*rb
;
2422 struct sk_buff_head
*rb_queue
;
2426 rb
= tx_ring
? &po
->tx_ring
: &po
->rx_ring
;
2427 rb_queue
= tx_ring
? &sk
->sk_write_queue
: &sk
->sk_receive_queue
;
2431 if (atomic_read(&po
->mapped
))
2433 if (atomic_read(&rb
->pending
))
2437 if (req
->tp_block_nr
) {
2438 /* Sanity tests and some calculations */
2440 if (unlikely(rb
->pg_vec
))
2443 switch (po
->tp_version
) {
2445 po
->tp_hdrlen
= TPACKET_HDRLEN
;
2448 po
->tp_hdrlen
= TPACKET2_HDRLEN
;
2453 if (unlikely((int)req
->tp_block_size
<= 0))
2455 if (unlikely(req
->tp_block_size
& (PAGE_SIZE
- 1)))
2457 if (unlikely(req
->tp_frame_size
< po
->tp_hdrlen
+
2460 if (unlikely(req
->tp_frame_size
& (TPACKET_ALIGNMENT
- 1)))
2463 rb
->frames_per_block
= req
->tp_block_size
/req
->tp_frame_size
;
2464 if (unlikely(rb
->frames_per_block
<= 0))
2466 if (unlikely((rb
->frames_per_block
* req
->tp_block_nr
) !=
2471 order
= get_order(req
->tp_block_size
);
2472 pg_vec
= alloc_pg_vec(req
, order
);
2473 if (unlikely(!pg_vec
))
2479 if (unlikely(req
->tp_frame_nr
))
2485 /* Detach socket from network */
2486 spin_lock(&po
->bind_lock
);
2487 was_running
= po
->running
;
2490 __dev_remove_pack(&po
->prot_hook
);
2495 spin_unlock(&po
->bind_lock
);
2500 mutex_lock(&po
->pg_vec_lock
);
2501 if (closing
|| atomic_read(&po
->mapped
) == 0) {
2503 spin_lock_bh(&rb_queue
->lock
);
2504 swap(rb
->pg_vec
, pg_vec
);
2505 rb
->frame_max
= (req
->tp_frame_nr
- 1);
2507 rb
->frame_size
= req
->tp_frame_size
;
2508 spin_unlock_bh(&rb_queue
->lock
);
2510 swap(rb
->pg_vec_order
, order
);
2511 swap(rb
->pg_vec_len
, req
->tp_block_nr
);
2513 rb
->pg_vec_pages
= req
->tp_block_size
/PAGE_SIZE
;
2514 po
->prot_hook
.func
= (po
->rx_ring
.pg_vec
) ?
2515 tpacket_rcv
: packet_rcv
;
2516 skb_queue_purge(rb_queue
);
2517 if (atomic_read(&po
->mapped
))
2518 pr_err("packet_mmap: vma is busy: %d\n",
2519 atomic_read(&po
->mapped
));
2521 mutex_unlock(&po
->pg_vec_lock
);
2523 spin_lock(&po
->bind_lock
);
2524 if (was_running
&& !po
->running
) {
2528 dev_add_pack(&po
->prot_hook
);
2530 spin_unlock(&po
->bind_lock
);
2535 free_pg_vec(pg_vec
, order
, req
->tp_block_nr
);
2540 static int packet_mmap(struct file
*file
, struct socket
*sock
,
2541 struct vm_area_struct
*vma
)
2543 struct sock
*sk
= sock
->sk
;
2544 struct packet_sock
*po
= pkt_sk(sk
);
2545 unsigned long size
, expected_size
;
2546 struct packet_ring_buffer
*rb
;
2547 unsigned long start
;
2554 mutex_lock(&po
->pg_vec_lock
);
2557 for (rb
= &po
->rx_ring
; rb
<= &po
->tx_ring
; rb
++) {
2559 expected_size
+= rb
->pg_vec_len
2565 if (expected_size
== 0)
2568 size
= vma
->vm_end
- vma
->vm_start
;
2569 if (size
!= expected_size
)
2572 start
= vma
->vm_start
;
2573 for (rb
= &po
->rx_ring
; rb
<= &po
->tx_ring
; rb
++) {
2574 if (rb
->pg_vec
== NULL
)
2577 for (i
= 0; i
< rb
->pg_vec_len
; i
++) {
2579 void *kaddr
= rb
->pg_vec
[i
].buffer
;
2582 for (pg_num
= 0; pg_num
< rb
->pg_vec_pages
; pg_num
++) {
2583 page
= pgv_to_page(kaddr
);
2584 err
= vm_insert_page(vma
, start
, page
);
2593 atomic_inc(&po
->mapped
);
2594 vma
->vm_ops
= &packet_mmap_ops
;
2598 mutex_unlock(&po
->pg_vec_lock
);
2602 static const struct proto_ops packet_ops_spkt
= {
2603 .family
= PF_PACKET
,
2604 .owner
= THIS_MODULE
,
2605 .release
= packet_release
,
2606 .bind
= packet_bind_spkt
,
2607 .connect
= sock_no_connect
,
2608 .socketpair
= sock_no_socketpair
,
2609 .accept
= sock_no_accept
,
2610 .getname
= packet_getname_spkt
,
2611 .poll
= datagram_poll
,
2612 .ioctl
= packet_ioctl
,
2613 .listen
= sock_no_listen
,
2614 .shutdown
= sock_no_shutdown
,
2615 .setsockopt
= sock_no_setsockopt
,
2616 .getsockopt
= sock_no_getsockopt
,
2617 .sendmsg
= packet_sendmsg_spkt
,
2618 .recvmsg
= packet_recvmsg
,
2619 .mmap
= sock_no_mmap
,
2620 .sendpage
= sock_no_sendpage
,
2623 static const struct proto_ops packet_ops
= {
2624 .family
= PF_PACKET
,
2625 .owner
= THIS_MODULE
,
2626 .release
= packet_release
,
2627 .bind
= packet_bind
,
2628 .connect
= sock_no_connect
,
2629 .socketpair
= sock_no_socketpair
,
2630 .accept
= sock_no_accept
,
2631 .getname
= packet_getname
,
2632 .poll
= packet_poll
,
2633 .ioctl
= packet_ioctl
,
2634 .listen
= sock_no_listen
,
2635 .shutdown
= sock_no_shutdown
,
2636 .setsockopt
= packet_setsockopt
,
2637 .getsockopt
= packet_getsockopt
,
2638 .sendmsg
= packet_sendmsg
,
2639 .recvmsg
= packet_recvmsg
,
2640 .mmap
= packet_mmap
,
2641 .sendpage
= sock_no_sendpage
,
2644 static const struct net_proto_family packet_family_ops
= {
2645 .family
= PF_PACKET
,
2646 .create
= packet_create
,
2647 .owner
= THIS_MODULE
,
2650 static struct notifier_block packet_netdev_notifier
= {
2651 .notifier_call
= packet_notifier
,
2654 #ifdef CONFIG_PROC_FS
2656 static void *packet_seq_start(struct seq_file
*seq
, loff_t
*pos
)
2659 struct net
*net
= seq_file_net(seq
);
2662 return seq_hlist_start_head_rcu(&net
->packet
.sklist
, *pos
);
2665 static void *packet_seq_next(struct seq_file
*seq
, void *v
, loff_t
*pos
)
2667 struct net
*net
= seq_file_net(seq
);
2668 return seq_hlist_next_rcu(v
, &net
->packet
.sklist
, pos
);
2671 static void packet_seq_stop(struct seq_file
*seq
, void *v
)
2677 static int packet_seq_show(struct seq_file
*seq
, void *v
)
2679 if (v
== SEQ_START_TOKEN
)
2680 seq_puts(seq
, "sk RefCnt Type Proto Iface R Rmem User Inode\n");
2682 struct sock
*s
= sk_entry(v
);
2683 const struct packet_sock
*po
= pkt_sk(s
);
2686 "%p %-6d %-4d %04x %-5d %1d %-6u %-6u %-6lu\n",
2688 atomic_read(&s
->sk_refcnt
),
2693 atomic_read(&s
->sk_rmem_alloc
),
2701 static const struct seq_operations packet_seq_ops
= {
2702 .start
= packet_seq_start
,
2703 .next
= packet_seq_next
,
2704 .stop
= packet_seq_stop
,
2705 .show
= packet_seq_show
,
2708 static int packet_seq_open(struct inode
*inode
, struct file
*file
)
2710 return seq_open_net(inode
, file
, &packet_seq_ops
,
2711 sizeof(struct seq_net_private
));
2714 static const struct file_operations packet_seq_fops
= {
2715 .owner
= THIS_MODULE
,
2716 .open
= packet_seq_open
,
2718 .llseek
= seq_lseek
,
2719 .release
= seq_release_net
,
2724 static int __net_init
packet_net_init(struct net
*net
)
2726 spin_lock_init(&net
->packet
.sklist_lock
);
2727 INIT_HLIST_HEAD(&net
->packet
.sklist
);
2729 if (!proc_net_fops_create(net
, "packet", 0, &packet_seq_fops
))
2735 static void __net_exit
packet_net_exit(struct net
*net
)
2737 proc_net_remove(net
, "packet");
2740 static struct pernet_operations packet_net_ops
= {
2741 .init
= packet_net_init
,
2742 .exit
= packet_net_exit
,
2746 static void __exit
packet_exit(void)
2748 unregister_netdevice_notifier(&packet_netdev_notifier
);
2749 unregister_pernet_subsys(&packet_net_ops
);
2750 sock_unregister(PF_PACKET
);
2751 proto_unregister(&packet_proto
);
2754 static int __init
packet_init(void)
2756 int rc
= proto_register(&packet_proto
, 0);
2761 sock_register(&packet_family_ops
);
2762 register_pernet_subsys(&packet_net_ops
);
2763 register_netdevice_notifier(&packet_netdev_notifier
);
2768 module_init(packet_init
);
2769 module_exit(packet_exit
);
2770 MODULE_LICENSE("GPL");
2771 MODULE_ALIAS_NETPROTO(PF_PACKET
);