1 // SPDX-License-Identifier: GPL-2.0
2 /* sunvnet.c: Sun LDOM Virtual Network Driver.
4 * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net>
5 * Copyright (C) 2016-2017 Oracle. All rights reserved.
8 #include <linux/module.h>
9 #include <linux/kernel.h>
10 #include <linux/types.h>
11 #include <linux/slab.h>
12 #include <linux/delay.h>
13 #include <linux/init.h>
14 #include <linux/netdevice.h>
15 #include <linux/ethtool.h>
16 #include <linux/etherdevice.h>
17 #include <linux/mutex.h>
18 #include <linux/highmem.h>
19 #include <linux/if_vlan.h>
20 #define CREATE_TRACE_POINTS
21 #include <trace/events/sunvnet.h>
23 #if IS_ENABLED(CONFIG_IPV6)
24 #include <linux/icmpv6.h>
30 #include <net/route.h>
35 #include "sunvnet_common.h"
37 /* Heuristic for the number of times to exponentially backoff and
38 * retry sending an LDC trigger when EAGAIN is encountered
40 #define VNET_MAX_RETRIES 10
42 MODULE_AUTHOR("David S. Miller <davem@davemloft.net>");
43 MODULE_DESCRIPTION("Sun LDOM virtual network support library");
44 MODULE_LICENSE("GPL");
45 MODULE_VERSION("1.1");
47 static int __vnet_tx_trigger(struct vnet_port
*port
, u32 start
);
49 static inline u32
vnet_tx_dring_avail(struct vio_dring_state
*dr
)
51 return vio_dring_avail(dr
, VNET_TX_RING_SIZE
);
54 static int vnet_handle_unknown(struct vnet_port
*port
, void *arg
)
56 struct vio_msg_tag
*pkt
= arg
;
58 pr_err("Received unknown msg [%02x:%02x:%04x:%08x]\n",
59 pkt
->type
, pkt
->stype
, pkt
->stype_env
, pkt
->sid
);
60 pr_err("Resetting connection\n");
62 ldc_disconnect(port
->vio
.lp
);
67 static int vnet_port_alloc_tx_ring(struct vnet_port
*port
);
69 int sunvnet_send_attr_common(struct vio_driver_state
*vio
)
71 struct vnet_port
*port
= to_vnet_port(vio
);
72 struct net_device
*dev
= VNET_PORT_TO_NET_DEVICE(port
);
73 struct vio_net_attr_info pkt
;
74 int framelen
= ETH_FRAME_LEN
;
77 err
= vnet_port_alloc_tx_ring(to_vnet_port(vio
));
81 memset(&pkt
, 0, sizeof(pkt
));
82 pkt
.tag
.type
= VIO_TYPE_CTRL
;
83 pkt
.tag
.stype
= VIO_SUBTYPE_INFO
;
84 pkt
.tag
.stype_env
= VIO_ATTR_INFO
;
85 pkt
.tag
.sid
= vio_send_sid(vio
);
86 if (vio_version_before(vio
, 1, 2))
87 pkt
.xfer_mode
= VIO_DRING_MODE
;
89 pkt
.xfer_mode
= VIO_NEW_DRING_MODE
;
90 pkt
.addr_type
= VNET_ADDR_ETHERMAC
;
92 for (i
= 0; i
< 6; i
++)
93 pkt
.addr
|= (u64
)dev
->dev_addr
[i
] << ((5 - i
) * 8);
94 if (vio_version_after(vio
, 1, 3)) {
96 port
->rmtu
= min(VNET_MAXPACKET
, port
->rmtu
);
99 port
->rmtu
= VNET_MAXPACKET
;
100 pkt
.mtu
= port
->rmtu
;
102 if (vio_version_after_eq(vio
, 1, 6))
103 pkt
.options
= VIO_TX_DRING
;
104 } else if (vio_version_before(vio
, 1, 3)) {
107 pkt
.mtu
= framelen
+ VLAN_HLEN
;
111 if (vio_version_after_eq(vio
, 1, 7) && port
->tso
) {
112 pkt
.cflags
|= VNET_LSO_IPV4_CAPAB
;
114 port
->tsolen
= VNET_MAXTSO
;
115 pkt
.ipv4_lso_maxlen
= port
->tsolen
;
118 pkt
.plnk_updt
= PHYSLINK_UPDATE_NONE
;
120 viodbg(HS
, "SEND NET ATTR xmode[0x%x] atype[0x%x] addr[%llx] "
121 "ackfreq[%u] plnk_updt[0x%02x] opts[0x%02x] mtu[%llu] "
122 "cflags[0x%04x] lso_max[%u]\n",
123 pkt
.xfer_mode
, pkt
.addr_type
,
124 (unsigned long long)pkt
.addr
,
125 pkt
.ack_freq
, pkt
.plnk_updt
, pkt
.options
,
126 (unsigned long long)pkt
.mtu
, pkt
.cflags
, pkt
.ipv4_lso_maxlen
);
128 return vio_ldc_send(vio
, &pkt
, sizeof(pkt
));
130 EXPORT_SYMBOL_GPL(sunvnet_send_attr_common
);
132 static int handle_attr_info(struct vio_driver_state
*vio
,
133 struct vio_net_attr_info
*pkt
)
135 struct vnet_port
*port
= to_vnet_port(vio
);
139 viodbg(HS
, "GOT NET ATTR xmode[0x%x] atype[0x%x] addr[%llx] "
140 "ackfreq[%u] plnk_updt[0x%02x] opts[0x%02x] mtu[%llu] "
141 " (rmtu[%llu]) cflags[0x%04x] lso_max[%u]\n",
142 pkt
->xfer_mode
, pkt
->addr_type
,
143 (unsigned long long)pkt
->addr
,
144 pkt
->ack_freq
, pkt
->plnk_updt
, pkt
->options
,
145 (unsigned long long)pkt
->mtu
, port
->rmtu
, pkt
->cflags
,
146 pkt
->ipv4_lso_maxlen
);
148 pkt
->tag
.sid
= vio_send_sid(vio
);
150 xfer_mode
= pkt
->xfer_mode
;
151 /* for version < 1.2, VIO_DRING_MODE = 0x3 and no bitmask */
152 if (vio_version_before(vio
, 1, 2) && xfer_mode
== VIO_DRING_MODE
)
153 xfer_mode
= VIO_NEW_DRING_MODE
;
156 * < v1.3 - ETH_FRAME_LEN exactly
157 * > v1.3 - MIN(pkt.mtu, VNET_MAXPACKET, port->rmtu) and change
159 * = v1.3 - ETH_FRAME_LEN + VLAN_HLEN exactly
161 if (vio_version_before(vio
, 1, 3)) {
162 localmtu
= ETH_FRAME_LEN
;
163 } else if (vio_version_after(vio
, 1, 3)) {
164 localmtu
= port
->rmtu
? port
->rmtu
: VNET_MAXPACKET
;
165 localmtu
= min(pkt
->mtu
, localmtu
);
168 localmtu
= ETH_FRAME_LEN
+ VLAN_HLEN
;
170 port
->rmtu
= localmtu
;
172 /* LSO negotiation */
173 if (vio_version_after_eq(vio
, 1, 7))
174 port
->tso
&= !!(pkt
->cflags
& VNET_LSO_IPV4_CAPAB
);
179 port
->tsolen
= VNET_MAXTSO
;
180 port
->tsolen
= min(port
->tsolen
, pkt
->ipv4_lso_maxlen
);
181 if (port
->tsolen
< VNET_MINTSO
) {
184 pkt
->cflags
&= ~VNET_LSO_IPV4_CAPAB
;
186 pkt
->ipv4_lso_maxlen
= port
->tsolen
;
188 pkt
->cflags
&= ~VNET_LSO_IPV4_CAPAB
;
189 pkt
->ipv4_lso_maxlen
= 0;
193 /* for version >= 1.6, ACK packet mode we support */
194 if (vio_version_after_eq(vio
, 1, 6)) {
195 pkt
->xfer_mode
= VIO_NEW_DRING_MODE
;
196 pkt
->options
= VIO_TX_DRING
;
199 if (!(xfer_mode
| VIO_NEW_DRING_MODE
) ||
200 pkt
->addr_type
!= VNET_ADDR_ETHERMAC
||
201 pkt
->mtu
!= localmtu
) {
202 viodbg(HS
, "SEND NET ATTR NACK\n");
204 pkt
->tag
.stype
= VIO_SUBTYPE_NACK
;
206 (void)vio_ldc_send(vio
, pkt
, sizeof(*pkt
));
211 viodbg(HS
, "SEND NET ATTR ACK xmode[0x%x] atype[0x%x] "
212 "addr[%llx] ackfreq[%u] plnk_updt[0x%02x] opts[0x%02x] "
213 "mtu[%llu] (rmtu[%llu]) cflags[0x%04x] lso_max[%u]\n",
214 pkt
->xfer_mode
, pkt
->addr_type
,
215 (unsigned long long)pkt
->addr
,
216 pkt
->ack_freq
, pkt
->plnk_updt
, pkt
->options
,
217 (unsigned long long)pkt
->mtu
, port
->rmtu
, pkt
->cflags
,
218 pkt
->ipv4_lso_maxlen
);
220 pkt
->tag
.stype
= VIO_SUBTYPE_ACK
;
222 return vio_ldc_send(vio
, pkt
, sizeof(*pkt
));
225 static int handle_attr_ack(struct vio_driver_state
*vio
,
226 struct vio_net_attr_info
*pkt
)
228 viodbg(HS
, "GOT NET ATTR ACK\n");
233 static int handle_attr_nack(struct vio_driver_state
*vio
,
234 struct vio_net_attr_info
*pkt
)
236 viodbg(HS
, "GOT NET ATTR NACK\n");
241 int sunvnet_handle_attr_common(struct vio_driver_state
*vio
, void *arg
)
243 struct vio_net_attr_info
*pkt
= arg
;
245 switch (pkt
->tag
.stype
) {
246 case VIO_SUBTYPE_INFO
:
247 return handle_attr_info(vio
, pkt
);
249 case VIO_SUBTYPE_ACK
:
250 return handle_attr_ack(vio
, pkt
);
252 case VIO_SUBTYPE_NACK
:
253 return handle_attr_nack(vio
, pkt
);
259 EXPORT_SYMBOL_GPL(sunvnet_handle_attr_common
);
261 void sunvnet_handshake_complete_common(struct vio_driver_state
*vio
)
263 struct vio_dring_state
*dr
;
265 dr
= &vio
->drings
[VIO_DRIVER_RX_RING
];
269 dr
= &vio
->drings
[VIO_DRIVER_TX_RING
];
273 EXPORT_SYMBOL_GPL(sunvnet_handshake_complete_common
);
275 /* The hypervisor interface that implements copying to/from imported
276 * memory from another domain requires that copies are done to 8-byte
277 * aligned buffers, and that the lengths of such copies are also 8-byte
280 * So we align skb->data to an 8-byte multiple and pad-out the data
281 * area so we can round the copy length up to the next multiple of
284 * The transmitter puts the actual start of the packet 6 bytes into
285 * the buffer it sends over, so that the IP headers after the ethernet
286 * header are aligned properly. These 6 bytes are not in the descriptor
287 * length, they are simply implied. This offset is represented using
288 * the VNET_PACKET_SKIP macro.
290 static struct sk_buff
*alloc_and_align_skb(struct net_device
*dev
,
294 unsigned long addr
, off
;
296 skb
= netdev_alloc_skb(dev
, len
+ VNET_PACKET_SKIP
+ 8 + 8);
300 addr
= (unsigned long)skb
->data
;
301 off
= ((addr
+ 7UL) & ~7UL) - addr
;
303 skb_reserve(skb
, off
);
308 static inline void vnet_fullcsum_ipv4(struct sk_buff
*skb
)
310 struct iphdr
*iph
= ip_hdr(skb
);
311 int offset
= skb_transport_offset(skb
);
313 if (skb
->protocol
!= htons(ETH_P_IP
))
315 if (iph
->protocol
!= IPPROTO_TCP
&&
316 iph
->protocol
!= IPPROTO_UDP
)
318 skb
->ip_summed
= CHECKSUM_NONE
;
321 if (iph
->protocol
== IPPROTO_TCP
) {
322 struct tcphdr
*ptcp
= tcp_hdr(skb
);
325 skb
->csum
= skb_checksum(skb
, offset
, skb
->len
- offset
, 0);
326 ptcp
->check
= csum_tcpudp_magic(iph
->saddr
, iph
->daddr
,
327 skb
->len
- offset
, IPPROTO_TCP
,
329 } else if (iph
->protocol
== IPPROTO_UDP
) {
330 struct udphdr
*pudp
= udp_hdr(skb
);
333 skb
->csum
= skb_checksum(skb
, offset
, skb
->len
- offset
, 0);
334 pudp
->check
= csum_tcpudp_magic(iph
->saddr
, iph
->daddr
,
335 skb
->len
- offset
, IPPROTO_UDP
,
340 #if IS_ENABLED(CONFIG_IPV6)
341 static inline void vnet_fullcsum_ipv6(struct sk_buff
*skb
)
343 struct ipv6hdr
*ip6h
= ipv6_hdr(skb
);
344 int offset
= skb_transport_offset(skb
);
346 if (skb
->protocol
!= htons(ETH_P_IPV6
))
348 if (ip6h
->nexthdr
!= IPPROTO_TCP
&&
349 ip6h
->nexthdr
!= IPPROTO_UDP
)
351 skb
->ip_summed
= CHECKSUM_NONE
;
354 if (ip6h
->nexthdr
== IPPROTO_TCP
) {
355 struct tcphdr
*ptcp
= tcp_hdr(skb
);
358 skb
->csum
= skb_checksum(skb
, offset
, skb
->len
- offset
, 0);
359 ptcp
->check
= csum_ipv6_magic(&ip6h
->saddr
, &ip6h
->daddr
,
360 skb
->len
- offset
, IPPROTO_TCP
,
362 } else if (ip6h
->nexthdr
== IPPROTO_UDP
) {
363 struct udphdr
*pudp
= udp_hdr(skb
);
366 skb
->csum
= skb_checksum(skb
, offset
, skb
->len
- offset
, 0);
367 pudp
->check
= csum_ipv6_magic(&ip6h
->saddr
, &ip6h
->daddr
,
368 skb
->len
- offset
, IPPROTO_UDP
,
374 static int vnet_rx_one(struct vnet_port
*port
, struct vio_net_desc
*desc
)
376 struct net_device
*dev
= VNET_PORT_TO_NET_DEVICE(port
);
377 unsigned int len
= desc
->size
;
378 unsigned int copy_len
;
384 if (port
->tso
&& port
->tsolen
> port
->rmtu
)
385 maxlen
= port
->tsolen
;
388 if (unlikely(len
< ETH_ZLEN
|| len
> maxlen
)) {
389 dev
->stats
.rx_length_errors
++;
393 skb
= alloc_and_align_skb(dev
, len
);
395 if (unlikely(!skb
)) {
396 dev
->stats
.rx_missed_errors
++;
400 copy_len
= (len
+ VNET_PACKET_SKIP
+ 7U) & ~7U;
401 skb_put(skb
, copy_len
);
402 err
= ldc_copy(port
->vio
.lp
, LDC_COPY_IN
,
403 skb
->data
, copy_len
, 0,
404 desc
->cookies
, desc
->ncookies
);
405 if (unlikely(err
< 0)) {
406 dev
->stats
.rx_frame_errors
++;
410 skb_pull(skb
, VNET_PACKET_SKIP
);
412 skb
->protocol
= eth_type_trans(skb
, dev
);
414 if (vio_version_after_eq(&port
->vio
, 1, 8)) {
415 struct vio_net_dext
*dext
= vio_net_ext(desc
);
417 skb_reset_network_header(skb
);
419 if (dext
->flags
& VNET_PKT_HCK_IPV4_HDRCKSUM
) {
420 if (skb
->protocol
== ETH_P_IP
) {
421 struct iphdr
*iph
= ip_hdr(skb
);
427 if ((dext
->flags
& VNET_PKT_HCK_FULLCKSUM
) &&
428 skb
->ip_summed
== CHECKSUM_NONE
) {
429 if (skb
->protocol
== htons(ETH_P_IP
)) {
430 struct iphdr
*iph
= ip_hdr(skb
);
431 int ihl
= iph
->ihl
* 4;
433 skb_set_transport_header(skb
, ihl
);
434 vnet_fullcsum_ipv4(skb
);
435 #if IS_ENABLED(CONFIG_IPV6)
436 } else if (skb
->protocol
== htons(ETH_P_IPV6
)) {
437 skb_set_transport_header(skb
,
438 sizeof(struct ipv6hdr
));
439 vnet_fullcsum_ipv6(skb
);
443 if (dext
->flags
& VNET_PKT_HCK_IPV4_HDRCKSUM_OK
) {
444 skb
->ip_summed
= CHECKSUM_PARTIAL
;
446 if (dext
->flags
& VNET_PKT_HCK_FULLCKSUM_OK
)
451 skb
->ip_summed
= port
->switch_port
? CHECKSUM_NONE
: CHECKSUM_PARTIAL
;
453 if (unlikely(is_multicast_ether_addr(eth_hdr(skb
)->h_dest
)))
454 dev
->stats
.multicast
++;
455 dev
->stats
.rx_packets
++;
456 dev
->stats
.rx_bytes
+= len
;
457 port
->stats
.rx_packets
++;
458 port
->stats
.rx_bytes
+= len
;
459 napi_gro_receive(&port
->napi
, skb
);
466 dev
->stats
.rx_dropped
++;
470 static int vnet_send_ack(struct vnet_port
*port
, struct vio_dring_state
*dr
,
471 u32 start
, u32 end
, u8 vio_dring_state
)
473 struct vio_dring_data hdr
= {
475 .type
= VIO_TYPE_DATA
,
476 .stype
= VIO_SUBTYPE_ACK
,
477 .stype_env
= VIO_DRING_DATA
,
478 .sid
= vio_send_sid(&port
->vio
),
480 .dring_ident
= dr
->ident
,
483 .state
= vio_dring_state
,
488 hdr
.seq
= dr
->snd_nxt
;
491 err
= vio_ldc_send(&port
->vio
, &hdr
, sizeof(hdr
));
497 if ((delay
<<= 1) > 128)
499 if (retries
++ > VNET_MAX_RETRIES
) {
500 pr_info("ECONNRESET %x:%x:%x:%x:%x:%x\n",
501 port
->raddr
[0], port
->raddr
[1],
502 port
->raddr
[2], port
->raddr
[3],
503 port
->raddr
[4], port
->raddr
[5]);
506 } while (err
== -EAGAIN
);
508 if (err
<= 0 && vio_dring_state
== VIO_DRING_STOPPED
) {
509 port
->stop_rx_idx
= end
;
510 port
->stop_rx
= true;
512 port
->stop_rx_idx
= 0;
513 port
->stop_rx
= false;
519 static struct vio_net_desc
*get_rx_desc(struct vnet_port
*port
,
520 struct vio_dring_state
*dr
,
523 struct vio_net_desc
*desc
= port
->vio
.desc_buf
;
526 err
= ldc_get_dring_entry(port
->vio
.lp
, desc
, dr
->entry_size
,
527 (index
* dr
->entry_size
),
528 dr
->cookies
, dr
->ncookies
);
535 static int put_rx_desc(struct vnet_port
*port
,
536 struct vio_dring_state
*dr
,
537 struct vio_net_desc
*desc
,
542 err
= ldc_put_dring_entry(port
->vio
.lp
, desc
, dr
->entry_size
,
543 (index
* dr
->entry_size
),
544 dr
->cookies
, dr
->ncookies
);
551 static int vnet_walk_rx_one(struct vnet_port
*port
,
552 struct vio_dring_state
*dr
,
553 u32 index
, int *needs_ack
)
555 struct vio_net_desc
*desc
= get_rx_desc(port
, dr
, index
);
556 struct vio_driver_state
*vio
= &port
->vio
;
561 return PTR_ERR(desc
);
563 if (desc
->hdr
.state
!= VIO_DESC_READY
)
568 viodbg(DATA
, "vio_walk_rx_one desc[%02x:%02x:%08x:%08x:%llx:%llx]\n",
569 desc
->hdr
.state
, desc
->hdr
.ack
,
570 desc
->size
, desc
->ncookies
,
571 desc
->cookies
[0].cookie_addr
,
572 desc
->cookies
[0].cookie_size
);
574 err
= vnet_rx_one(port
, desc
);
575 if (err
== -ECONNRESET
)
577 trace_vnet_rx_one(port
->vio
._local_sid
, port
->vio
._peer_sid
,
578 index
, desc
->hdr
.ack
);
579 desc
->hdr
.state
= VIO_DESC_DONE
;
580 err
= put_rx_desc(port
, dr
, desc
, index
);
583 *needs_ack
= desc
->hdr
.ack
;
587 static int vnet_walk_rx(struct vnet_port
*port
, struct vio_dring_state
*dr
,
588 u32 start
, u32 end
, int *npkts
, int budget
)
590 struct vio_driver_state
*vio
= &port
->vio
;
591 int ack_start
= -1, ack_end
= -1;
592 bool send_ack
= true;
594 end
= (end
== (u32
)-1) ? vio_dring_prev(dr
, start
)
595 : vio_dring_next(dr
, end
);
597 viodbg(DATA
, "vnet_walk_rx start[%08x] end[%08x]\n", start
, end
);
599 while (start
!= end
) {
600 int ack
= 0, err
= vnet_walk_rx_one(port
, dr
, start
, &ack
);
602 if (err
== -ECONNRESET
)
610 start
= vio_dring_next(dr
, start
);
611 if (ack
&& start
!= end
) {
612 err
= vnet_send_ack(port
, dr
, ack_start
, ack_end
,
614 if (err
== -ECONNRESET
)
618 if ((*npkts
) >= budget
) {
623 if (unlikely(ack_start
== -1)) {
624 ack_end
= vio_dring_prev(dr
, start
);
628 port
->napi_resume
= false;
629 trace_vnet_tx_send_stopped_ack(port
->vio
._local_sid
,
632 return vnet_send_ack(port
, dr
, ack_start
, ack_end
,
635 trace_vnet_tx_defer_stopped_ack(port
->vio
._local_sid
,
638 port
->napi_resume
= true;
639 port
->napi_stop_idx
= ack_end
;
644 static int vnet_rx(struct vnet_port
*port
, void *msgbuf
, int *npkts
,
647 struct vio_dring_data
*pkt
= msgbuf
;
648 struct vio_dring_state
*dr
= &port
->vio
.drings
[VIO_DRIVER_RX_RING
];
649 struct vio_driver_state
*vio
= &port
->vio
;
651 viodbg(DATA
, "vnet_rx stype_env[%04x] seq[%016llx] rcv_nxt[%016llx]\n",
652 pkt
->tag
.stype_env
, pkt
->seq
, dr
->rcv_nxt
);
654 if (unlikely(pkt
->tag
.stype_env
!= VIO_DRING_DATA
))
656 if (unlikely(pkt
->seq
!= dr
->rcv_nxt
)) {
657 pr_err("RX out of sequence seq[0x%llx] rcv_nxt[0x%llx]\n",
658 pkt
->seq
, dr
->rcv_nxt
);
662 if (!port
->napi_resume
)
665 /* XXX Validate pkt->start_idx and pkt->end_idx XXX */
667 return vnet_walk_rx(port
, dr
, pkt
->start_idx
, pkt
->end_idx
,
671 static int idx_is_pending(struct vio_dring_state
*dr
, u32 end
)
676 while (idx
!= dr
->prod
) {
681 idx
= vio_dring_next(dr
, idx
);
686 static int vnet_ack(struct vnet_port
*port
, void *msgbuf
)
688 struct vio_dring_state
*dr
= &port
->vio
.drings
[VIO_DRIVER_TX_RING
];
689 struct vio_dring_data
*pkt
= msgbuf
;
690 struct net_device
*dev
;
692 struct vio_net_desc
*desc
;
693 struct netdev_queue
*txq
;
695 if (unlikely(pkt
->tag
.stype_env
!= VIO_DRING_DATA
))
699 dev
= VNET_PORT_TO_NET_DEVICE(port
);
701 if (unlikely(!idx_is_pending(dr
, end
))) {
702 netif_tx_unlock(dev
);
706 /* sync for race conditions with vnet_start_xmit() and tell xmit it
707 * is time to send a trigger.
709 trace_vnet_rx_stopped_ack(port
->vio
._local_sid
,
710 port
->vio
._peer_sid
, end
);
711 dr
->cons
= vio_dring_next(dr
, end
);
712 desc
= vio_dring_entry(dr
, dr
->cons
);
713 if (desc
->hdr
.state
== VIO_DESC_READY
&& !port
->start_cons
) {
714 /* vnet_start_xmit() just populated this dring but missed
715 * sending the "start" LDC message to the consumer.
716 * Send a "start" trigger on its behalf.
718 if (__vnet_tx_trigger(port
, dr
->cons
) > 0)
719 port
->start_cons
= false;
721 port
->start_cons
= true;
723 port
->start_cons
= true;
725 netif_tx_unlock(dev
);
727 txq
= netdev_get_tx_queue(dev
, port
->q_index
);
728 if (unlikely(netif_tx_queue_stopped(txq
) &&
729 vnet_tx_dring_avail(dr
) >= VNET_TX_WAKEUP_THRESH(dr
)))
735 static int vnet_nack(struct vnet_port
*port
, void *msgbuf
)
737 /* XXX just reset or similar XXX */
741 static int handle_mcast(struct vnet_port
*port
, void *msgbuf
)
743 struct vio_net_mcast_info
*pkt
= msgbuf
;
744 struct net_device
*dev
= VNET_PORT_TO_NET_DEVICE(port
);
746 if (pkt
->tag
.stype
!= VIO_SUBTYPE_ACK
)
747 pr_err("%s: Got unexpected MCAST reply [%02x:%02x:%04x:%08x]\n",
757 /* If the queue is stopped, wake it up so that we'll
758 * send out another START message at the next TX.
760 static void maybe_tx_wakeup(struct vnet_port
*port
)
762 struct netdev_queue
*txq
;
764 txq
= netdev_get_tx_queue(VNET_PORT_TO_NET_DEVICE(port
),
766 __netif_tx_lock(txq
, smp_processor_id());
767 if (likely(netif_tx_queue_stopped(txq
)))
768 netif_tx_wake_queue(txq
);
769 __netif_tx_unlock(txq
);
772 bool sunvnet_port_is_up_common(struct vnet_port
*vnet
)
774 struct vio_driver_state
*vio
= &vnet
->vio
;
776 return !!(vio
->hs_state
& VIO_HS_COMPLETE
);
778 EXPORT_SYMBOL_GPL(sunvnet_port_is_up_common
);
780 static int vnet_event_napi(struct vnet_port
*port
, int budget
)
782 struct net_device
*dev
= VNET_PORT_TO_NET_DEVICE(port
);
783 struct vio_driver_state
*vio
= &port
->vio
;
787 /* we don't expect any other bits */
788 BUG_ON(port
->rx_event
& ~(LDC_EVENT_DATA_READY
|
792 /* RESET takes precedent over any other event */
793 if (port
->rx_event
& LDC_EVENT_RESET
) {
794 /* a link went down */
796 if (port
->vsw
== 1) {
797 netif_tx_stop_all_queues(dev
);
798 netif_carrier_off(dev
);
801 vio_link_state_change(vio
, LDC_EVENT_RESET
);
802 vnet_port_reset(port
);
805 /* If the device is running but its tx queue was
806 * stopped (due to flow control), restart it.
807 * This is necessary since vnet_port_reset()
808 * clears the tx drings and thus we may never get
809 * back a VIO_TYPE_DATA ACK packet - which is
810 * the normal mechanism to restart the tx queue.
812 if (netif_running(dev
))
813 maybe_tx_wakeup(port
);
816 port
->stats
.event_reset
++;
820 if (port
->rx_event
& LDC_EVENT_UP
) {
823 if (port
->vsw
== 1) {
824 netif_carrier_on(port
->dev
);
825 netif_tx_start_all_queues(port
->dev
);
828 vio_link_state_change(vio
, LDC_EVENT_UP
);
830 port
->stats
.event_up
++;
838 struct vio_msg_tag tag
;
842 if (port
->napi_resume
) {
843 struct vio_dring_data
*pkt
=
844 (struct vio_dring_data
*)&msgbuf
;
845 struct vio_dring_state
*dr
=
846 &port
->vio
.drings
[VIO_DRIVER_RX_RING
];
848 pkt
->tag
.type
= VIO_TYPE_DATA
;
849 pkt
->tag
.stype
= VIO_SUBTYPE_INFO
;
850 pkt
->tag
.stype_env
= VIO_DRING_DATA
;
851 pkt
->seq
= dr
->rcv_nxt
;
852 pkt
->start_idx
= vio_dring_next(dr
,
853 port
->napi_stop_idx
);
856 err
= ldc_read(vio
->lp
, &msgbuf
, sizeof(msgbuf
));
857 if (unlikely(err
< 0)) {
858 if (err
== -ECONNRESET
)
864 viodbg(DATA
, "TAG [%02x:%02x:%04x:%08x]\n",
867 msgbuf
.tag
.stype_env
,
869 err
= vio_validate_sid(vio
, &msgbuf
.tag
);
874 if (likely(msgbuf
.tag
.type
== VIO_TYPE_DATA
)) {
875 if (msgbuf
.tag
.stype
== VIO_SUBTYPE_INFO
) {
876 if (!sunvnet_port_is_up_common(port
)) {
877 /* failures like handshake_failure()
878 * may have cleaned up dring, but
879 * NAPI polling may bring us here.
884 err
= vnet_rx(port
, &msgbuf
, &npkts
, budget
);
889 } else if (msgbuf
.tag
.stype
== VIO_SUBTYPE_ACK
) {
890 err
= vnet_ack(port
, &msgbuf
);
893 } else if (msgbuf
.tag
.stype
== VIO_SUBTYPE_NACK
) {
894 err
= vnet_nack(port
, &msgbuf
);
896 } else if (msgbuf
.tag
.type
== VIO_TYPE_CTRL
) {
897 if (msgbuf
.tag
.stype_env
== VNET_MCAST_INFO
)
898 err
= handle_mcast(port
, &msgbuf
);
900 err
= vio_control_pkt_engine(vio
, &msgbuf
);
904 err
= vnet_handle_unknown(port
, &msgbuf
);
906 if (err
== -ECONNRESET
)
909 if (unlikely(tx_wakeup
&& err
!= -ECONNRESET
))
910 maybe_tx_wakeup(port
);
914 int sunvnet_poll_common(struct napi_struct
*napi
, int budget
)
916 struct vnet_port
*port
= container_of(napi
, struct vnet_port
, napi
);
917 struct vio_driver_state
*vio
= &port
->vio
;
918 int processed
= vnet_event_napi(port
, budget
);
920 if (processed
< budget
) {
921 napi_complete_done(napi
, processed
);
922 port
->rx_event
&= ~LDC_EVENT_DATA_READY
;
923 vio_set_intr(vio
->vdev
->rx_ino
, HV_INTR_ENABLED
);
927 EXPORT_SYMBOL_GPL(sunvnet_poll_common
);
929 void sunvnet_event_common(void *arg
, int event
)
931 struct vnet_port
*port
= arg
;
932 struct vio_driver_state
*vio
= &port
->vio
;
934 port
->rx_event
|= event
;
935 vio_set_intr(vio
->vdev
->rx_ino
, HV_INTR_DISABLED
);
936 napi_schedule(&port
->napi
);
938 EXPORT_SYMBOL_GPL(sunvnet_event_common
);
940 static int __vnet_tx_trigger(struct vnet_port
*port
, u32 start
)
942 struct vio_dring_state
*dr
= &port
->vio
.drings
[VIO_DRIVER_TX_RING
];
943 struct vio_dring_data hdr
= {
945 .type
= VIO_TYPE_DATA
,
946 .stype
= VIO_SUBTYPE_INFO
,
947 .stype_env
= VIO_DRING_DATA
,
948 .sid
= vio_send_sid(&port
->vio
),
950 .dring_ident
= dr
->ident
,
958 trace_vnet_tx_pending_stopped_ack(port
->vio
._local_sid
,
960 port
->stop_rx_idx
, -1);
961 err
= vnet_send_ack(port
,
962 &port
->vio
.drings
[VIO_DRIVER_RX_RING
],
963 port
->stop_rx_idx
, -1,
969 hdr
.seq
= dr
->snd_nxt
;
972 err
= vio_ldc_send(&port
->vio
, &hdr
, sizeof(hdr
));
978 if ((delay
<<= 1) > 128)
980 if (retries
++ > VNET_MAX_RETRIES
)
982 } while (err
== -EAGAIN
);
983 trace_vnet_tx_trigger(port
->vio
._local_sid
,
984 port
->vio
._peer_sid
, start
, err
);
989 static struct sk_buff
*vnet_clean_tx_ring(struct vnet_port
*port
,
992 struct vio_dring_state
*dr
= &port
->vio
.drings
[VIO_DRIVER_TX_RING
];
993 struct sk_buff
*skb
= NULL
;
999 for (i
= 0; i
< VNET_TX_RING_SIZE
; ++i
) {
1000 struct vio_net_desc
*d
;
1004 txi
= VNET_TX_RING_SIZE
- 1;
1006 d
= vio_dring_entry(dr
, txi
);
1008 if (d
->hdr
.state
== VIO_DESC_READY
) {
1012 if (port
->tx_bufs
[txi
].skb
) {
1013 if (d
->hdr
.state
!= VIO_DESC_DONE
)
1014 pr_notice("invalid ring buffer state %d\n",
1016 BUG_ON(port
->tx_bufs
[txi
].skb
->next
);
1018 port
->tx_bufs
[txi
].skb
->next
= skb
;
1019 skb
= port
->tx_bufs
[txi
].skb
;
1020 port
->tx_bufs
[txi
].skb
= NULL
;
1022 ldc_unmap(port
->vio
.lp
,
1023 port
->tx_bufs
[txi
].cookies
,
1024 port
->tx_bufs
[txi
].ncookies
);
1025 } else if (d
->hdr
.state
== VIO_DESC_FREE
) {
1028 d
->hdr
.state
= VIO_DESC_FREE
;
1033 static inline void vnet_free_skbs(struct sk_buff
*skb
)
1035 struct sk_buff
*next
;
1045 void sunvnet_clean_timer_expire_common(struct timer_list
*t
)
1047 struct vnet_port
*port
= from_timer(port
, t
, clean_timer
);
1048 struct sk_buff
*freeskbs
;
1051 netif_tx_lock(VNET_PORT_TO_NET_DEVICE(port
));
1052 freeskbs
= vnet_clean_tx_ring(port
, &pending
);
1053 netif_tx_unlock(VNET_PORT_TO_NET_DEVICE(port
));
1055 vnet_free_skbs(freeskbs
);
1058 (void)mod_timer(&port
->clean_timer
,
1059 jiffies
+ VNET_CLEAN_TIMEOUT
);
1061 del_timer(&port
->clean_timer
);
1063 EXPORT_SYMBOL_GPL(sunvnet_clean_timer_expire_common
);
1065 static inline int vnet_skb_map(struct ldc_channel
*lp
, struct sk_buff
*skb
,
1066 struct ldc_trans_cookie
*cookies
, int ncookies
,
1067 unsigned int map_perm
)
1069 int i
, nc
, err
, blen
;
1072 blen
= skb_headlen(skb
);
1073 if (blen
< ETH_ZLEN
)
1075 blen
+= VNET_PACKET_SKIP
;
1076 blen
+= 8 - (blen
& 7);
1078 err
= ldc_map_single(lp
, skb
->data
- VNET_PACKET_SKIP
, blen
, cookies
,
1079 ncookies
, map_perm
);
1084 for (i
= 0; i
< skb_shinfo(skb
)->nr_frags
; i
++) {
1085 skb_frag_t
*f
= &skb_shinfo(skb
)->frags
[i
];
1088 if (nc
< ncookies
) {
1089 vaddr
= kmap_local_page(skb_frag_page(f
));
1090 blen
= skb_frag_size(f
);
1091 blen
+= 8 - (blen
& 7);
1092 err
= ldc_map_single(lp
, vaddr
+ skb_frag_off(f
),
1093 blen
, cookies
+ nc
, ncookies
- nc
,
1095 kunmap_local(vaddr
);
1101 ldc_unmap(lp
, cookies
, nc
);
1109 static inline struct sk_buff
*vnet_skb_shape(struct sk_buff
*skb
, int ncookies
)
1111 struct sk_buff
*nskb
;
1112 int i
, len
, pad
, docopy
;
1116 if (len
< ETH_ZLEN
) {
1117 pad
+= ETH_ZLEN
- skb
->len
;
1120 len
+= VNET_PACKET_SKIP
;
1121 pad
+= 8 - (len
& 7);
1123 /* make sure we have enough cookies and alignment in every frag */
1124 docopy
= skb_shinfo(skb
)->nr_frags
>= ncookies
;
1125 for (i
= 0; i
< skb_shinfo(skb
)->nr_frags
; i
++) {
1126 skb_frag_t
*f
= &skb_shinfo(skb
)->frags
[i
];
1128 docopy
|= skb_frag_off(f
) & 7;
1130 if (((unsigned long)skb
->data
& 7) != VNET_PACKET_SKIP
||
1131 skb_tailroom(skb
) < pad
||
1132 skb_headroom(skb
) < VNET_PACKET_SKIP
|| docopy
) {
1133 int start
= 0, offset
;
1136 len
= skb
->len
> ETH_ZLEN
? skb
->len
: ETH_ZLEN
;
1137 nskb
= alloc_and_align_skb(skb
->dev
, len
);
1142 skb_reserve(nskb
, VNET_PACKET_SKIP
);
1144 nskb
->protocol
= skb
->protocol
;
1145 offset
= skb_mac_header(skb
) - skb
->data
;
1146 skb_set_mac_header(nskb
, offset
);
1147 offset
= skb_network_offset(skb
);
1148 skb_set_network_header(nskb
, offset
);
1149 offset
= skb_transport_offset(skb
);
1150 skb_set_transport_header(nskb
, offset
);
1153 nskb
->csum_offset
= skb
->csum_offset
;
1154 nskb
->ip_summed
= skb
->ip_summed
;
1156 if (skb
->ip_summed
== CHECKSUM_PARTIAL
)
1157 start
= skb_checksum_start_offset(skb
);
1159 int offset
= start
+ nskb
->csum_offset
;
1161 /* copy the headers, no csum here */
1162 if (skb_copy_bits(skb
, 0, nskb
->data
, start
)) {
1163 dev_kfree_skb(nskb
);
1168 /* copy the rest, with csum calculation */
1169 *(__sum16
*)(skb
->data
+ offset
) = 0;
1170 csum
= skb_copy_and_csum_bits(skb
, start
,
1174 /* add in the header checksums */
1175 if (skb
->protocol
== htons(ETH_P_IP
)) {
1176 struct iphdr
*iph
= ip_hdr(nskb
);
1178 if (iph
->protocol
== IPPROTO_TCP
||
1179 iph
->protocol
== IPPROTO_UDP
) {
1180 csum
= csum_tcpudp_magic(iph
->saddr
,
1186 } else if (skb
->protocol
== htons(ETH_P_IPV6
)) {
1187 struct ipv6hdr
*ip6h
= ipv6_hdr(nskb
);
1189 if (ip6h
->nexthdr
== IPPROTO_TCP
||
1190 ip6h
->nexthdr
== IPPROTO_UDP
) {
1191 csum
= csum_ipv6_magic(&ip6h
->saddr
,
1199 /* save the final result */
1200 *(__sum16
*)(nskb
->data
+ offset
) = csum
;
1202 nskb
->ip_summed
= CHECKSUM_NONE
;
1203 } else if (skb_copy_bits(skb
, 0, nskb
->data
, skb
->len
)) {
1204 dev_kfree_skb(nskb
);
1208 (void)skb_put(nskb
, skb
->len
);
1209 if (skb_is_gso(skb
)) {
1210 skb_shinfo(nskb
)->gso_size
= skb_shinfo(skb
)->gso_size
;
1211 skb_shinfo(nskb
)->gso_type
= skb_shinfo(skb
)->gso_type
;
1213 nskb
->queue_mapping
= skb
->queue_mapping
;
1221 vnet_handle_offloads(struct vnet_port
*port
, struct sk_buff
*skb
,
1222 struct vnet_port
*(*vnet_tx_port
)
1223 (struct sk_buff
*, struct net_device
*))
1225 struct net_device
*dev
= VNET_PORT_TO_NET_DEVICE(port
);
1226 struct vio_dring_state
*dr
= &port
->vio
.drings
[VIO_DRIVER_TX_RING
];
1227 struct sk_buff
*segs
, *curr
, *next
;
1228 int maclen
, datalen
;
1230 int gso_size
, gso_type
, gso_segs
;
1231 int hlen
= skb_transport_header(skb
) - skb_mac_header(skb
);
1232 int proto
= IPPROTO_IP
;
1234 if (skb
->protocol
== htons(ETH_P_IP
))
1235 proto
= ip_hdr(skb
)->protocol
;
1236 else if (skb
->protocol
== htons(ETH_P_IPV6
))
1237 proto
= ipv6_hdr(skb
)->nexthdr
;
1239 if (proto
== IPPROTO_TCP
) {
1240 hlen
+= tcp_hdr(skb
)->doff
* 4;
1241 } else if (proto
== IPPROTO_UDP
) {
1242 hlen
+= sizeof(struct udphdr
);
1244 pr_err("vnet_handle_offloads GSO with unknown transport "
1245 "protocol %d tproto %d\n", skb
->protocol
, proto
);
1246 hlen
= 128; /* XXX */
1248 datalen
= port
->tsolen
- hlen
;
1250 gso_size
= skb_shinfo(skb
)->gso_size
;
1251 gso_type
= skb_shinfo(skb
)->gso_type
;
1252 gso_segs
= skb_shinfo(skb
)->gso_segs
;
1254 if (port
->tso
&& gso_size
< datalen
)
1255 gso_segs
= DIV_ROUND_UP(skb
->len
- hlen
, datalen
);
1257 if (unlikely(vnet_tx_dring_avail(dr
) < gso_segs
)) {
1258 struct netdev_queue
*txq
;
1260 txq
= netdev_get_tx_queue(dev
, port
->q_index
);
1261 netif_tx_stop_queue(txq
);
1262 if (vnet_tx_dring_avail(dr
) < skb_shinfo(skb
)->gso_segs
)
1263 return NETDEV_TX_BUSY
;
1264 netif_tx_wake_queue(txq
);
1267 maclen
= skb_network_header(skb
) - skb_mac_header(skb
);
1268 skb_pull(skb
, maclen
);
1270 if (port
->tso
&& gso_size
< datalen
) {
1271 if (skb_unclone(skb
, GFP_ATOMIC
))
1274 /* segment to TSO size */
1275 skb_shinfo(skb
)->gso_size
= datalen
;
1276 skb_shinfo(skb
)->gso_segs
= gso_segs
;
1278 segs
= skb_gso_segment(skb
, dev
->features
& ~NETIF_F_TSO
);
1282 skb_push(skb
, maclen
);
1283 skb_reset_mac_header(skb
);
1286 skb_list_walk_safe(segs
, curr
, next
) {
1287 skb_mark_not_on_list(curr
);
1288 if (port
->tso
&& curr
->len
> dev
->mtu
) {
1289 skb_shinfo(curr
)->gso_size
= gso_size
;
1290 skb_shinfo(curr
)->gso_type
= gso_type
;
1291 skb_shinfo(curr
)->gso_segs
=
1292 DIV_ROUND_UP(curr
->len
- hlen
, gso_size
);
1294 skb_shinfo(curr
)->gso_size
= 0;
1297 skb_push(curr
, maclen
);
1298 skb_reset_mac_header(curr
);
1299 memcpy(skb_mac_header(curr
), skb_mac_header(skb
),
1301 curr
->csum_start
= skb_transport_header(curr
) - curr
->head
;
1302 if (ip_hdr(curr
)->protocol
== IPPROTO_TCP
)
1303 curr
->csum_offset
= offsetof(struct tcphdr
, check
);
1304 else if (ip_hdr(curr
)->protocol
== IPPROTO_UDP
)
1305 curr
->csum_offset
= offsetof(struct udphdr
, check
);
1307 if (!(status
& NETDEV_TX_MASK
))
1308 status
= sunvnet_start_xmit_common(curr
, dev
,
1310 if (status
& NETDEV_TX_MASK
)
1311 dev_kfree_skb_any(curr
);
1314 if (!(status
& NETDEV_TX_MASK
))
1315 dev_kfree_skb_any(skb
);
1318 dev
->stats
.tx_dropped
++;
1319 dev_kfree_skb_any(skb
);
1320 return NETDEV_TX_OK
;
1324 sunvnet_start_xmit_common(struct sk_buff
*skb
, struct net_device
*dev
,
1325 struct vnet_port
*(*vnet_tx_port
)
1326 (struct sk_buff
*, struct net_device
*))
1328 struct vnet_port
*port
= NULL
;
1329 struct vio_dring_state
*dr
;
1330 struct vio_net_desc
*d
;
1332 struct sk_buff
*freeskbs
= NULL
;
1334 unsigned pending
= 0;
1335 struct netdev_queue
*txq
;
1338 port
= vnet_tx_port(skb
, dev
);
1339 if (unlikely(!port
))
1342 if (skb_is_gso(skb
) && skb
->len
> port
->tsolen
) {
1343 err
= vnet_handle_offloads(port
, skb
, vnet_tx_port
);
1348 if (!skb_is_gso(skb
) && skb
->len
> port
->rmtu
) {
1349 unsigned long localmtu
= port
->rmtu
- ETH_HLEN
;
1351 if (vio_version_after_eq(&port
->vio
, 1, 3))
1352 localmtu
-= VLAN_HLEN
;
1354 if (skb
->protocol
== htons(ETH_P_IP
))
1355 icmp_ndo_send(skb
, ICMP_DEST_UNREACH
, ICMP_FRAG_NEEDED
,
1357 #if IS_ENABLED(CONFIG_IPV6)
1358 else if (skb
->protocol
== htons(ETH_P_IPV6
))
1359 icmpv6_ndo_send(skb
, ICMPV6_PKT_TOOBIG
, 0, localmtu
);
1364 skb
= vnet_skb_shape(skb
, 2);
1369 if (skb
->ip_summed
== CHECKSUM_PARTIAL
) {
1370 if (skb
->protocol
== htons(ETH_P_IP
))
1371 vnet_fullcsum_ipv4(skb
);
1372 #if IS_ENABLED(CONFIG_IPV6)
1373 else if (skb
->protocol
== htons(ETH_P_IPV6
))
1374 vnet_fullcsum_ipv6(skb
);
1378 dr
= &port
->vio
.drings
[VIO_DRIVER_TX_RING
];
1379 i
= skb_get_queue_mapping(skb
);
1380 txq
= netdev_get_tx_queue(dev
, i
);
1381 if (unlikely(vnet_tx_dring_avail(dr
) < 1)) {
1382 if (!netif_tx_queue_stopped(txq
)) {
1383 netif_tx_stop_queue(txq
);
1385 /* This is a hard error, log it. */
1386 netdev_err(dev
, "BUG! Tx Ring full when queue awake!\n");
1387 dev
->stats
.tx_errors
++;
1390 return NETDEV_TX_BUSY
;
1393 d
= vio_dring_cur(dr
);
1397 freeskbs
= vnet_clean_tx_ring(port
, &pending
);
1399 BUG_ON(port
->tx_bufs
[txi
].skb
);
1405 err
= vnet_skb_map(port
->vio
.lp
, skb
, port
->tx_bufs
[txi
].cookies
, 2,
1406 (LDC_MAP_SHADOW
| LDC_MAP_DIRECT
| LDC_MAP_RW
));
1408 netdev_info(dev
, "tx buffer map error %d\n", err
);
1412 port
->tx_bufs
[txi
].skb
= skb
;
1414 port
->tx_bufs
[txi
].ncookies
= err
;
1416 /* We don't rely on the ACKs to free the skb in vnet_start_xmit(),
1417 * thus it is safe to not set VIO_ACK_ENABLE for each transmission:
1418 * the protocol itself does not require it as long as the peer
1419 * sends a VIO_SUBTYPE_ACK for VIO_DRING_STOPPED.
1421 * An ACK for every packet in the ring is expensive as the
1422 * sending of LDC messages is slow and affects performance.
1424 d
->hdr
.ack
= VIO_ACK_DISABLE
;
1426 d
->ncookies
= port
->tx_bufs
[txi
].ncookies
;
1427 for (i
= 0; i
< d
->ncookies
; i
++)
1428 d
->cookies
[i
] = port
->tx_bufs
[txi
].cookies
[i
];
1429 if (vio_version_after_eq(&port
->vio
, 1, 7)) {
1430 struct vio_net_dext
*dext
= vio_net_ext(d
);
1432 memset(dext
, 0, sizeof(*dext
));
1433 if (skb_is_gso(port
->tx_bufs
[txi
].skb
)) {
1434 dext
->ipv4_lso_mss
= skb_shinfo(port
->tx_bufs
[txi
].skb
)
1436 dext
->flags
|= VNET_PKT_IPV4_LSO
;
1438 if (vio_version_after_eq(&port
->vio
, 1, 8) &&
1439 !port
->switch_port
) {
1440 dext
->flags
|= VNET_PKT_HCK_IPV4_HDRCKSUM_OK
;
1441 dext
->flags
|= VNET_PKT_HCK_FULLCKSUM_OK
;
1445 /* This has to be a non-SMP write barrier because we are writing
1446 * to memory which is shared with the peer LDOM.
1450 d
->hdr
.state
= VIO_DESC_READY
;
1452 /* Exactly one ldc "start" trigger (for dr->cons) needs to be sent
1453 * to notify the consumer that some descriptors are READY.
1454 * After that "start" trigger, no additional triggers are needed until
1455 * a DRING_STOPPED is received from the consumer. The dr->cons field
1456 * (set up by vnet_ack()) has the value of the next dring index
1457 * that has not yet been ack-ed. We send a "start" trigger here
1458 * if, and only if, start_cons is true (reset it afterward). Conversely,
1459 * vnet_ack() should check if the dring corresponding to cons
1460 * is marked READY, but start_cons was false.
1461 * If so, vnet_ack() should send out the missed "start" trigger.
1463 * Note that the dma_wmb() above makes sure the cookies et al. are
1464 * not globally visible before the VIO_DESC_READY, and that the
1465 * stores are ordered correctly by the compiler. The consumer will
1466 * not proceed until the VIO_DESC_READY is visible assuring that
1467 * the consumer does not observe anything related to descriptors
1468 * out of order. The HV trap from the LDC start trigger is the
1469 * producer to consumer announcement that work is available to the
1472 if (!port
->start_cons
) { /* previous trigger suffices */
1473 trace_vnet_skip_tx_trigger(port
->vio
._local_sid
,
1474 port
->vio
._peer_sid
, dr
->cons
);
1475 goto ldc_start_done
;
1478 err
= __vnet_tx_trigger(port
, dr
->cons
);
1479 if (unlikely(err
< 0)) {
1480 netdev_info(dev
, "TX trigger error %d\n", err
);
1481 d
->hdr
.state
= VIO_DESC_FREE
;
1482 skb
= port
->tx_bufs
[txi
].skb
;
1483 port
->tx_bufs
[txi
].skb
= NULL
;
1484 dev
->stats
.tx_carrier_errors
++;
1489 port
->start_cons
= false;
1491 dev
->stats
.tx_packets
++;
1492 dev
->stats
.tx_bytes
+= port
->tx_bufs
[txi
].skb
->len
;
1493 port
->stats
.tx_packets
++;
1494 port
->stats
.tx_bytes
+= port
->tx_bufs
[txi
].skb
->len
;
1496 dr
->prod
= (dr
->prod
+ 1) & (VNET_TX_RING_SIZE
- 1);
1497 if (unlikely(vnet_tx_dring_avail(dr
) < 1)) {
1498 netif_tx_stop_queue(txq
);
1500 if (vnet_tx_dring_avail(dr
) > VNET_TX_WAKEUP_THRESH(dr
))
1501 netif_tx_wake_queue(txq
);
1504 (void)mod_timer(&port
->clean_timer
, jiffies
+ VNET_CLEAN_TIMEOUT
);
1507 vnet_free_skbs(freeskbs
);
1509 return NETDEV_TX_OK
;
1513 (void)mod_timer(&port
->clean_timer
,
1514 jiffies
+ VNET_CLEAN_TIMEOUT
);
1516 del_timer(&port
->clean_timer
);
1519 vnet_free_skbs(freeskbs
);
1520 dev
->stats
.tx_dropped
++;
1521 return NETDEV_TX_OK
;
1523 EXPORT_SYMBOL_GPL(sunvnet_start_xmit_common
);
1525 void sunvnet_tx_timeout_common(struct net_device
*dev
, unsigned int txqueue
)
1527 /* XXX Implement me XXX */
1529 EXPORT_SYMBOL_GPL(sunvnet_tx_timeout_common
);
1531 int sunvnet_open_common(struct net_device
*dev
)
1533 netif_carrier_on(dev
);
1534 netif_tx_start_all_queues(dev
);
1538 EXPORT_SYMBOL_GPL(sunvnet_open_common
);
1540 int sunvnet_close_common(struct net_device
*dev
)
1542 netif_tx_stop_all_queues(dev
);
1543 netif_carrier_off(dev
);
1547 EXPORT_SYMBOL_GPL(sunvnet_close_common
);
1549 static struct vnet_mcast_entry
*__vnet_mc_find(struct vnet
*vp
, u8
*addr
)
1551 struct vnet_mcast_entry
*m
;
1553 for (m
= vp
->mcast_list
; m
; m
= m
->next
) {
1554 if (ether_addr_equal(m
->addr
, addr
))
1560 static void __update_mc_list(struct vnet
*vp
, struct net_device
*dev
)
1562 struct netdev_hw_addr
*ha
;
1564 netdev_for_each_mc_addr(ha
, dev
) {
1565 struct vnet_mcast_entry
*m
;
1567 m
= __vnet_mc_find(vp
, ha
->addr
);
1574 m
= kzalloc(sizeof(*m
), GFP_ATOMIC
);
1577 memcpy(m
->addr
, ha
->addr
, ETH_ALEN
);
1580 m
->next
= vp
->mcast_list
;
1586 static void __send_mc_list(struct vnet
*vp
, struct vnet_port
*port
)
1588 struct vio_net_mcast_info info
;
1589 struct vnet_mcast_entry
*m
, **pp
;
1592 memset(&info
, 0, sizeof(info
));
1594 info
.tag
.type
= VIO_TYPE_CTRL
;
1595 info
.tag
.stype
= VIO_SUBTYPE_INFO
;
1596 info
.tag
.stype_env
= VNET_MCAST_INFO
;
1597 info
.tag
.sid
= vio_send_sid(&port
->vio
);
1601 for (m
= vp
->mcast_list
; m
; m
= m
->next
) {
1605 memcpy(&info
.mcast_addr
[n_addrs
* ETH_ALEN
],
1607 if (++n_addrs
== VNET_NUM_MCAST
) {
1608 info
.count
= n_addrs
;
1610 (void)vio_ldc_send(&port
->vio
, &info
,
1616 info
.count
= n_addrs
;
1617 (void)vio_ldc_send(&port
->vio
, &info
, sizeof(info
));
1623 pp
= &vp
->mcast_list
;
1624 while ((m
= *pp
) != NULL
) {
1631 memcpy(&info
.mcast_addr
[n_addrs
* ETH_ALEN
],
1633 if (++n_addrs
== VNET_NUM_MCAST
) {
1634 info
.count
= n_addrs
;
1635 (void)vio_ldc_send(&port
->vio
, &info
,
1644 info
.count
= n_addrs
;
1645 (void)vio_ldc_send(&port
->vio
, &info
, sizeof(info
));
1649 void sunvnet_set_rx_mode_common(struct net_device
*dev
, struct vnet
*vp
)
1651 struct vnet_port
*port
;
1654 list_for_each_entry_rcu(port
, &vp
->port_list
, list
) {
1655 if (port
->switch_port
) {
1656 __update_mc_list(vp
, dev
);
1657 __send_mc_list(vp
, port
);
1663 EXPORT_SYMBOL_GPL(sunvnet_set_rx_mode_common
);
1665 int sunvnet_set_mac_addr_common(struct net_device
*dev
, void *p
)
1669 EXPORT_SYMBOL_GPL(sunvnet_set_mac_addr_common
);
1671 void sunvnet_port_free_tx_bufs_common(struct vnet_port
*port
)
1673 struct vio_dring_state
*dr
;
1676 dr
= &port
->vio
.drings
[VIO_DRIVER_TX_RING
];
1681 for (i
= 0; i
< VNET_TX_RING_SIZE
; i
++) {
1682 struct vio_net_desc
*d
;
1683 void *skb
= port
->tx_bufs
[i
].skb
;
1688 d
= vio_dring_entry(dr
, i
);
1690 ldc_unmap(port
->vio
.lp
,
1691 port
->tx_bufs
[i
].cookies
,
1692 port
->tx_bufs
[i
].ncookies
);
1694 port
->tx_bufs
[i
].skb
= NULL
;
1695 d
->hdr
.state
= VIO_DESC_FREE
;
1697 ldc_free_exp_dring(port
->vio
.lp
, dr
->base
,
1698 (dr
->entry_size
* dr
->num_entries
),
1699 dr
->cookies
, dr
->ncookies
);
1702 dr
->num_entries
= 0;
1706 EXPORT_SYMBOL_GPL(sunvnet_port_free_tx_bufs_common
);
1708 void vnet_port_reset(struct vnet_port
*port
)
1710 del_timer(&port
->clean_timer
);
1711 sunvnet_port_free_tx_bufs_common(port
);
1713 port
->tso
= (port
->vsw
== 0); /* no tso in vsw, misbehaves in bridge */
1716 EXPORT_SYMBOL_GPL(vnet_port_reset
);
1718 static int vnet_port_alloc_tx_ring(struct vnet_port
*port
)
1720 struct vio_dring_state
*dr
;
1721 unsigned long len
, elen
;
1722 int i
, err
, ncookies
;
1725 dr
= &port
->vio
.drings
[VIO_DRIVER_TX_RING
];
1727 elen
= sizeof(struct vio_net_desc
) +
1728 sizeof(struct ldc_trans_cookie
) * 2;
1729 if (vio_version_after_eq(&port
->vio
, 1, 7))
1730 elen
+= sizeof(struct vio_net_dext
);
1731 len
= VNET_TX_RING_SIZE
* elen
;
1733 ncookies
= VIO_MAX_RING_COOKIES
;
1734 dring
= ldc_alloc_exp_dring(port
->vio
.lp
, len
,
1735 dr
->cookies
, &ncookies
,
1739 if (IS_ERR(dring
)) {
1740 err
= PTR_ERR(dring
);
1745 dr
->entry_size
= elen
;
1746 dr
->num_entries
= VNET_TX_RING_SIZE
;
1749 port
->start_cons
= true; /* need an initial trigger */
1750 dr
->pending
= VNET_TX_RING_SIZE
;
1751 dr
->ncookies
= ncookies
;
1753 for (i
= 0; i
< VNET_TX_RING_SIZE
; ++i
) {
1754 struct vio_net_desc
*d
;
1756 d
= vio_dring_entry(dr
, i
);
1757 d
->hdr
.state
= VIO_DESC_FREE
;
1762 sunvnet_port_free_tx_bufs_common(port
);
1767 #ifdef CONFIG_NET_POLL_CONTROLLER
1768 void sunvnet_poll_controller_common(struct net_device
*dev
, struct vnet
*vp
)
1770 struct vnet_port
*port
;
1771 unsigned long flags
;
1773 spin_lock_irqsave(&vp
->lock
, flags
);
1774 if (!list_empty(&vp
->port_list
)) {
1775 port
= list_entry(vp
->port_list
.next
, struct vnet_port
, list
);
1776 napi_schedule(&port
->napi
);
1778 spin_unlock_irqrestore(&vp
->lock
, flags
);
1780 EXPORT_SYMBOL_GPL(sunvnet_poll_controller_common
);
1783 void sunvnet_port_add_txq_common(struct vnet_port
*port
)
1785 struct vnet
*vp
= port
->vp
;
1789 /* find the first least-used q
1790 * When there are more ldoms than q's, we start to
1791 * double up on ports per queue.
1793 for (i
= 0; i
< VNET_MAX_TXQS
; i
++) {
1794 if (vp
->q_used
[i
] == 0) {
1798 if (vp
->q_used
[i
] < vp
->q_used
[smallest
])
1803 vp
->q_used
[smallest
]++;
1804 port
->q_index
= smallest
;
1806 EXPORT_SYMBOL_GPL(sunvnet_port_add_txq_common
);
1808 void sunvnet_port_rm_txq_common(struct vnet_port
*port
)
1811 port
->vp
->q_used
[port
->q_index
]--;
1814 EXPORT_SYMBOL_GPL(sunvnet_port_rm_txq_common
);