1 // SPDX-License-Identifier: GPL-2.0
2 /* sunvnet.c: Sun LDOM Virtual Network Driver.
4 * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net>
5 * Copyright (C) 2016-2017 Oracle. All rights reserved.
8 #include <linux/module.h>
9 #include <linux/kernel.h>
10 #include <linux/types.h>
11 #include <linux/slab.h>
12 #include <linux/delay.h>
13 #include <linux/init.h>
14 #include <linux/netdevice.h>
15 #include <linux/ethtool.h>
16 #include <linux/etherdevice.h>
17 #include <linux/mutex.h>
18 #include <linux/highmem.h>
19 #include <linux/if_vlan.h>
20 #define CREATE_TRACE_POINTS
21 #include <trace/events/sunvnet.h>
23 #if IS_ENABLED(CONFIG_IPV6)
24 #include <linux/icmpv6.h>
29 #include <net/route.h>
34 #include "sunvnet_common.h"
36 /* Heuristic for the number of times to exponentially backoff and
37 * retry sending an LDC trigger when EAGAIN is encountered
39 #define VNET_MAX_RETRIES 10
41 MODULE_AUTHOR("David S. Miller (davem@davemloft.net)");
42 MODULE_DESCRIPTION("Sun LDOM virtual network support library");
43 MODULE_LICENSE("GPL");
44 MODULE_VERSION("1.1");
46 static int __vnet_tx_trigger(struct vnet_port
*port
, u32 start
);
48 static inline u32
vnet_tx_dring_avail(struct vio_dring_state
*dr
)
50 return vio_dring_avail(dr
, VNET_TX_RING_SIZE
);
53 static int vnet_handle_unknown(struct vnet_port
*port
, void *arg
)
55 struct vio_msg_tag
*pkt
= arg
;
57 pr_err("Received unknown msg [%02x:%02x:%04x:%08x]\n",
58 pkt
->type
, pkt
->stype
, pkt
->stype_env
, pkt
->sid
);
59 pr_err("Resetting connection\n");
61 ldc_disconnect(port
->vio
.lp
);
66 static int vnet_port_alloc_tx_ring(struct vnet_port
*port
);
68 int sunvnet_send_attr_common(struct vio_driver_state
*vio
)
70 struct vnet_port
*port
= to_vnet_port(vio
);
71 struct net_device
*dev
= VNET_PORT_TO_NET_DEVICE(port
);
72 struct vio_net_attr_info pkt
;
73 int framelen
= ETH_FRAME_LEN
;
76 err
= vnet_port_alloc_tx_ring(to_vnet_port(vio
));
80 memset(&pkt
, 0, sizeof(pkt
));
81 pkt
.tag
.type
= VIO_TYPE_CTRL
;
82 pkt
.tag
.stype
= VIO_SUBTYPE_INFO
;
83 pkt
.tag
.stype_env
= VIO_ATTR_INFO
;
84 pkt
.tag
.sid
= vio_send_sid(vio
);
85 if (vio_version_before(vio
, 1, 2))
86 pkt
.xfer_mode
= VIO_DRING_MODE
;
88 pkt
.xfer_mode
= VIO_NEW_DRING_MODE
;
89 pkt
.addr_type
= VNET_ADDR_ETHERMAC
;
91 for (i
= 0; i
< 6; i
++)
92 pkt
.addr
|= (u64
)dev
->dev_addr
[i
] << ((5 - i
) * 8);
93 if (vio_version_after(vio
, 1, 3)) {
95 port
->rmtu
= min(VNET_MAXPACKET
, port
->rmtu
);
98 port
->rmtu
= VNET_MAXPACKET
;
101 if (vio_version_after_eq(vio
, 1, 6))
102 pkt
.options
= VIO_TX_DRING
;
103 } else if (vio_version_before(vio
, 1, 3)) {
106 pkt
.mtu
= framelen
+ VLAN_HLEN
;
110 if (vio_version_after_eq(vio
, 1, 7) && port
->tso
) {
111 pkt
.cflags
|= VNET_LSO_IPV4_CAPAB
;
113 port
->tsolen
= VNET_MAXTSO
;
114 pkt
.ipv4_lso_maxlen
= port
->tsolen
;
117 pkt
.plnk_updt
= PHYSLINK_UPDATE_NONE
;
119 viodbg(HS
, "SEND NET ATTR xmode[0x%x] atype[0x%x] addr[%llx] "
120 "ackfreq[%u] plnk_updt[0x%02x] opts[0x%02x] mtu[%llu] "
121 "cflags[0x%04x] lso_max[%u]\n",
122 pkt
.xfer_mode
, pkt
.addr_type
,
123 (unsigned long long)pkt
.addr
,
124 pkt
.ack_freq
, pkt
.plnk_updt
, pkt
.options
,
125 (unsigned long long)pkt
.mtu
, pkt
.cflags
, pkt
.ipv4_lso_maxlen
);
127 return vio_ldc_send(vio
, &pkt
, sizeof(pkt
));
129 EXPORT_SYMBOL_GPL(sunvnet_send_attr_common
);
131 static int handle_attr_info(struct vio_driver_state
*vio
,
132 struct vio_net_attr_info
*pkt
)
134 struct vnet_port
*port
= to_vnet_port(vio
);
138 viodbg(HS
, "GOT NET ATTR xmode[0x%x] atype[0x%x] addr[%llx] "
139 "ackfreq[%u] plnk_updt[0x%02x] opts[0x%02x] mtu[%llu] "
140 " (rmtu[%llu]) cflags[0x%04x] lso_max[%u]\n",
141 pkt
->xfer_mode
, pkt
->addr_type
,
142 (unsigned long long)pkt
->addr
,
143 pkt
->ack_freq
, pkt
->plnk_updt
, pkt
->options
,
144 (unsigned long long)pkt
->mtu
, port
->rmtu
, pkt
->cflags
,
145 pkt
->ipv4_lso_maxlen
);
147 pkt
->tag
.sid
= vio_send_sid(vio
);
149 xfer_mode
= pkt
->xfer_mode
;
150 /* for version < 1.2, VIO_DRING_MODE = 0x3 and no bitmask */
151 if (vio_version_before(vio
, 1, 2) && xfer_mode
== VIO_DRING_MODE
)
152 xfer_mode
= VIO_NEW_DRING_MODE
;
155 * < v1.3 - ETH_FRAME_LEN exactly
156 * > v1.3 - MIN(pkt.mtu, VNET_MAXPACKET, port->rmtu) and change
158 * = v1.3 - ETH_FRAME_LEN + VLAN_HLEN exactly
160 if (vio_version_before(vio
, 1, 3)) {
161 localmtu
= ETH_FRAME_LEN
;
162 } else if (vio_version_after(vio
, 1, 3)) {
163 localmtu
= port
->rmtu
? port
->rmtu
: VNET_MAXPACKET
;
164 localmtu
= min(pkt
->mtu
, localmtu
);
167 localmtu
= ETH_FRAME_LEN
+ VLAN_HLEN
;
169 port
->rmtu
= localmtu
;
171 /* LSO negotiation */
172 if (vio_version_after_eq(vio
, 1, 7))
173 port
->tso
&= !!(pkt
->cflags
& VNET_LSO_IPV4_CAPAB
);
178 port
->tsolen
= VNET_MAXTSO
;
179 port
->tsolen
= min(port
->tsolen
, pkt
->ipv4_lso_maxlen
);
180 if (port
->tsolen
< VNET_MINTSO
) {
183 pkt
->cflags
&= ~VNET_LSO_IPV4_CAPAB
;
185 pkt
->ipv4_lso_maxlen
= port
->tsolen
;
187 pkt
->cflags
&= ~VNET_LSO_IPV4_CAPAB
;
188 pkt
->ipv4_lso_maxlen
= 0;
192 /* for version >= 1.6, ACK packet mode we support */
193 if (vio_version_after_eq(vio
, 1, 6)) {
194 pkt
->xfer_mode
= VIO_NEW_DRING_MODE
;
195 pkt
->options
= VIO_TX_DRING
;
198 if (!(xfer_mode
| VIO_NEW_DRING_MODE
) ||
199 pkt
->addr_type
!= VNET_ADDR_ETHERMAC
||
200 pkt
->mtu
!= localmtu
) {
201 viodbg(HS
, "SEND NET ATTR NACK\n");
203 pkt
->tag
.stype
= VIO_SUBTYPE_NACK
;
205 (void)vio_ldc_send(vio
, pkt
, sizeof(*pkt
));
210 viodbg(HS
, "SEND NET ATTR ACK xmode[0x%x] atype[0x%x] "
211 "addr[%llx] ackfreq[%u] plnk_updt[0x%02x] opts[0x%02x] "
212 "mtu[%llu] (rmtu[%llu]) cflags[0x%04x] lso_max[%u]\n",
213 pkt
->xfer_mode
, pkt
->addr_type
,
214 (unsigned long long)pkt
->addr
,
215 pkt
->ack_freq
, pkt
->plnk_updt
, pkt
->options
,
216 (unsigned long long)pkt
->mtu
, port
->rmtu
, pkt
->cflags
,
217 pkt
->ipv4_lso_maxlen
);
219 pkt
->tag
.stype
= VIO_SUBTYPE_ACK
;
221 return vio_ldc_send(vio
, pkt
, sizeof(*pkt
));
224 static int handle_attr_ack(struct vio_driver_state
*vio
,
225 struct vio_net_attr_info
*pkt
)
227 viodbg(HS
, "GOT NET ATTR ACK\n");
232 static int handle_attr_nack(struct vio_driver_state
*vio
,
233 struct vio_net_attr_info
*pkt
)
235 viodbg(HS
, "GOT NET ATTR NACK\n");
240 int sunvnet_handle_attr_common(struct vio_driver_state
*vio
, void *arg
)
242 struct vio_net_attr_info
*pkt
= arg
;
244 switch (pkt
->tag
.stype
) {
245 case VIO_SUBTYPE_INFO
:
246 return handle_attr_info(vio
, pkt
);
248 case VIO_SUBTYPE_ACK
:
249 return handle_attr_ack(vio
, pkt
);
251 case VIO_SUBTYPE_NACK
:
252 return handle_attr_nack(vio
, pkt
);
258 EXPORT_SYMBOL_GPL(sunvnet_handle_attr_common
);
260 void sunvnet_handshake_complete_common(struct vio_driver_state
*vio
)
262 struct vio_dring_state
*dr
;
264 dr
= &vio
->drings
[VIO_DRIVER_RX_RING
];
268 dr
= &vio
->drings
[VIO_DRIVER_TX_RING
];
272 EXPORT_SYMBOL_GPL(sunvnet_handshake_complete_common
);
274 /* The hypervisor interface that implements copying to/from imported
275 * memory from another domain requires that copies are done to 8-byte
276 * aligned buffers, and that the lengths of such copies are also 8-byte
279 * So we align skb->data to an 8-byte multiple and pad-out the data
280 * area so we can round the copy length up to the next multiple of
283 * The transmitter puts the actual start of the packet 6 bytes into
284 * the buffer it sends over, so that the IP headers after the ethernet
285 * header are aligned properly. These 6 bytes are not in the descriptor
286 * length, they are simply implied. This offset is represented using
287 * the VNET_PACKET_SKIP macro.
289 static struct sk_buff
*alloc_and_align_skb(struct net_device
*dev
,
293 unsigned long addr
, off
;
295 skb
= netdev_alloc_skb(dev
, len
+ VNET_PACKET_SKIP
+ 8 + 8);
299 addr
= (unsigned long)skb
->data
;
300 off
= ((addr
+ 7UL) & ~7UL) - addr
;
302 skb_reserve(skb
, off
);
307 static inline void vnet_fullcsum_ipv4(struct sk_buff
*skb
)
309 struct iphdr
*iph
= ip_hdr(skb
);
310 int offset
= skb_transport_offset(skb
);
312 if (skb
->protocol
!= htons(ETH_P_IP
))
314 if (iph
->protocol
!= IPPROTO_TCP
&&
315 iph
->protocol
!= IPPROTO_UDP
)
317 skb
->ip_summed
= CHECKSUM_NONE
;
320 if (iph
->protocol
== IPPROTO_TCP
) {
321 struct tcphdr
*ptcp
= tcp_hdr(skb
);
324 skb
->csum
= skb_checksum(skb
, offset
, skb
->len
- offset
, 0);
325 ptcp
->check
= csum_tcpudp_magic(iph
->saddr
, iph
->daddr
,
326 skb
->len
- offset
, IPPROTO_TCP
,
328 } else if (iph
->protocol
== IPPROTO_UDP
) {
329 struct udphdr
*pudp
= udp_hdr(skb
);
332 skb
->csum
= skb_checksum(skb
, offset
, skb
->len
- offset
, 0);
333 pudp
->check
= csum_tcpudp_magic(iph
->saddr
, iph
->daddr
,
334 skb
->len
- offset
, IPPROTO_UDP
,
339 #if IS_ENABLED(CONFIG_IPV6)
340 static inline void vnet_fullcsum_ipv6(struct sk_buff
*skb
)
342 struct ipv6hdr
*ip6h
= ipv6_hdr(skb
);
343 int offset
= skb_transport_offset(skb
);
345 if (skb
->protocol
!= htons(ETH_P_IPV6
))
347 if (ip6h
->nexthdr
!= IPPROTO_TCP
&&
348 ip6h
->nexthdr
!= IPPROTO_UDP
)
350 skb
->ip_summed
= CHECKSUM_NONE
;
353 if (ip6h
->nexthdr
== IPPROTO_TCP
) {
354 struct tcphdr
*ptcp
= tcp_hdr(skb
);
357 skb
->csum
= skb_checksum(skb
, offset
, skb
->len
- offset
, 0);
358 ptcp
->check
= csum_ipv6_magic(&ip6h
->saddr
, &ip6h
->daddr
,
359 skb
->len
- offset
, IPPROTO_TCP
,
361 } else if (ip6h
->nexthdr
== IPPROTO_UDP
) {
362 struct udphdr
*pudp
= udp_hdr(skb
);
365 skb
->csum
= skb_checksum(skb
, offset
, skb
->len
- offset
, 0);
366 pudp
->check
= csum_ipv6_magic(&ip6h
->saddr
, &ip6h
->daddr
,
367 skb
->len
- offset
, IPPROTO_UDP
,
373 static int vnet_rx_one(struct vnet_port
*port
, struct vio_net_desc
*desc
)
375 struct net_device
*dev
= VNET_PORT_TO_NET_DEVICE(port
);
376 unsigned int len
= desc
->size
;
377 unsigned int copy_len
;
383 if (port
->tso
&& port
->tsolen
> port
->rmtu
)
384 maxlen
= port
->tsolen
;
387 if (unlikely(len
< ETH_ZLEN
|| len
> maxlen
)) {
388 dev
->stats
.rx_length_errors
++;
392 skb
= alloc_and_align_skb(dev
, len
);
394 if (unlikely(!skb
)) {
395 dev
->stats
.rx_missed_errors
++;
399 copy_len
= (len
+ VNET_PACKET_SKIP
+ 7U) & ~7U;
400 skb_put(skb
, copy_len
);
401 err
= ldc_copy(port
->vio
.lp
, LDC_COPY_IN
,
402 skb
->data
, copy_len
, 0,
403 desc
->cookies
, desc
->ncookies
);
404 if (unlikely(err
< 0)) {
405 dev
->stats
.rx_frame_errors
++;
409 skb_pull(skb
, VNET_PACKET_SKIP
);
411 skb
->protocol
= eth_type_trans(skb
, dev
);
413 if (vio_version_after_eq(&port
->vio
, 1, 8)) {
414 struct vio_net_dext
*dext
= vio_net_ext(desc
);
416 skb_reset_network_header(skb
);
418 if (dext
->flags
& VNET_PKT_HCK_IPV4_HDRCKSUM
) {
419 if (skb
->protocol
== ETH_P_IP
) {
420 struct iphdr
*iph
= ip_hdr(skb
);
426 if ((dext
->flags
& VNET_PKT_HCK_FULLCKSUM
) &&
427 skb
->ip_summed
== CHECKSUM_NONE
) {
428 if (skb
->protocol
== htons(ETH_P_IP
)) {
429 struct iphdr
*iph
= ip_hdr(skb
);
430 int ihl
= iph
->ihl
* 4;
432 skb_set_transport_header(skb
, ihl
);
433 vnet_fullcsum_ipv4(skb
);
434 #if IS_ENABLED(CONFIG_IPV6)
435 } else if (skb
->protocol
== htons(ETH_P_IPV6
)) {
436 skb_set_transport_header(skb
,
437 sizeof(struct ipv6hdr
));
438 vnet_fullcsum_ipv6(skb
);
442 if (dext
->flags
& VNET_PKT_HCK_IPV4_HDRCKSUM_OK
) {
443 skb
->ip_summed
= CHECKSUM_PARTIAL
;
445 if (dext
->flags
& VNET_PKT_HCK_FULLCKSUM_OK
)
450 skb
->ip_summed
= port
->switch_port
? CHECKSUM_NONE
: CHECKSUM_PARTIAL
;
452 if (unlikely(is_multicast_ether_addr(eth_hdr(skb
)->h_dest
)))
453 dev
->stats
.multicast
++;
454 dev
->stats
.rx_packets
++;
455 dev
->stats
.rx_bytes
+= len
;
456 port
->stats
.rx_packets
++;
457 port
->stats
.rx_bytes
+= len
;
458 napi_gro_receive(&port
->napi
, skb
);
465 dev
->stats
.rx_dropped
++;
469 static int vnet_send_ack(struct vnet_port
*port
, struct vio_dring_state
*dr
,
470 u32 start
, u32 end
, u8 vio_dring_state
)
472 struct vio_dring_data hdr
= {
474 .type
= VIO_TYPE_DATA
,
475 .stype
= VIO_SUBTYPE_ACK
,
476 .stype_env
= VIO_DRING_DATA
,
477 .sid
= vio_send_sid(&port
->vio
),
479 .dring_ident
= dr
->ident
,
482 .state
= vio_dring_state
,
487 hdr
.seq
= dr
->snd_nxt
;
490 err
= vio_ldc_send(&port
->vio
, &hdr
, sizeof(hdr
));
496 if ((delay
<<= 1) > 128)
498 if (retries
++ > VNET_MAX_RETRIES
) {
499 pr_info("ECONNRESET %x:%x:%x:%x:%x:%x\n",
500 port
->raddr
[0], port
->raddr
[1],
501 port
->raddr
[2], port
->raddr
[3],
502 port
->raddr
[4], port
->raddr
[5]);
505 } while (err
== -EAGAIN
);
507 if (err
<= 0 && vio_dring_state
== VIO_DRING_STOPPED
) {
508 port
->stop_rx_idx
= end
;
509 port
->stop_rx
= true;
511 port
->stop_rx_idx
= 0;
512 port
->stop_rx
= false;
518 static struct vio_net_desc
*get_rx_desc(struct vnet_port
*port
,
519 struct vio_dring_state
*dr
,
522 struct vio_net_desc
*desc
= port
->vio
.desc_buf
;
525 err
= ldc_get_dring_entry(port
->vio
.lp
, desc
, dr
->entry_size
,
526 (index
* dr
->entry_size
),
527 dr
->cookies
, dr
->ncookies
);
534 static int put_rx_desc(struct vnet_port
*port
,
535 struct vio_dring_state
*dr
,
536 struct vio_net_desc
*desc
,
541 err
= ldc_put_dring_entry(port
->vio
.lp
, desc
, dr
->entry_size
,
542 (index
* dr
->entry_size
),
543 dr
->cookies
, dr
->ncookies
);
550 static int vnet_walk_rx_one(struct vnet_port
*port
,
551 struct vio_dring_state
*dr
,
552 u32 index
, int *needs_ack
)
554 struct vio_net_desc
*desc
= get_rx_desc(port
, dr
, index
);
555 struct vio_driver_state
*vio
= &port
->vio
;
560 return PTR_ERR(desc
);
562 if (desc
->hdr
.state
!= VIO_DESC_READY
)
567 viodbg(DATA
, "vio_walk_rx_one desc[%02x:%02x:%08x:%08x:%llx:%llx]\n",
568 desc
->hdr
.state
, desc
->hdr
.ack
,
569 desc
->size
, desc
->ncookies
,
570 desc
->cookies
[0].cookie_addr
,
571 desc
->cookies
[0].cookie_size
);
573 err
= vnet_rx_one(port
, desc
);
574 if (err
== -ECONNRESET
)
576 trace_vnet_rx_one(port
->vio
._local_sid
, port
->vio
._peer_sid
,
577 index
, desc
->hdr
.ack
);
578 desc
->hdr
.state
= VIO_DESC_DONE
;
579 err
= put_rx_desc(port
, dr
, desc
, index
);
582 *needs_ack
= desc
->hdr
.ack
;
586 static int vnet_walk_rx(struct vnet_port
*port
, struct vio_dring_state
*dr
,
587 u32 start
, u32 end
, int *npkts
, int budget
)
589 struct vio_driver_state
*vio
= &port
->vio
;
590 int ack_start
= -1, ack_end
= -1;
591 bool send_ack
= true;
593 end
= (end
== (u32
)-1) ? vio_dring_prev(dr
, start
)
594 : vio_dring_next(dr
, end
);
596 viodbg(DATA
, "vnet_walk_rx start[%08x] end[%08x]\n", start
, end
);
598 while (start
!= end
) {
599 int ack
= 0, err
= vnet_walk_rx_one(port
, dr
, start
, &ack
);
601 if (err
== -ECONNRESET
)
609 start
= vio_dring_next(dr
, start
);
610 if (ack
&& start
!= end
) {
611 err
= vnet_send_ack(port
, dr
, ack_start
, ack_end
,
613 if (err
== -ECONNRESET
)
617 if ((*npkts
) >= budget
) {
622 if (unlikely(ack_start
== -1)) {
623 ack_end
= vio_dring_prev(dr
, start
);
627 port
->napi_resume
= false;
628 trace_vnet_tx_send_stopped_ack(port
->vio
._local_sid
,
631 return vnet_send_ack(port
, dr
, ack_start
, ack_end
,
634 trace_vnet_tx_defer_stopped_ack(port
->vio
._local_sid
,
637 port
->napi_resume
= true;
638 port
->napi_stop_idx
= ack_end
;
643 static int vnet_rx(struct vnet_port
*port
, void *msgbuf
, int *npkts
,
646 struct vio_dring_data
*pkt
= msgbuf
;
647 struct vio_dring_state
*dr
= &port
->vio
.drings
[VIO_DRIVER_RX_RING
];
648 struct vio_driver_state
*vio
= &port
->vio
;
650 viodbg(DATA
, "vnet_rx stype_env[%04x] seq[%016llx] rcv_nxt[%016llx]\n",
651 pkt
->tag
.stype_env
, pkt
->seq
, dr
->rcv_nxt
);
653 if (unlikely(pkt
->tag
.stype_env
!= VIO_DRING_DATA
))
655 if (unlikely(pkt
->seq
!= dr
->rcv_nxt
)) {
656 pr_err("RX out of sequence seq[0x%llx] rcv_nxt[0x%llx]\n",
657 pkt
->seq
, dr
->rcv_nxt
);
661 if (!port
->napi_resume
)
664 /* XXX Validate pkt->start_idx and pkt->end_idx XXX */
666 return vnet_walk_rx(port
, dr
, pkt
->start_idx
, pkt
->end_idx
,
670 static int idx_is_pending(struct vio_dring_state
*dr
, u32 end
)
675 while (idx
!= dr
->prod
) {
680 idx
= vio_dring_next(dr
, idx
);
685 static int vnet_ack(struct vnet_port
*port
, void *msgbuf
)
687 struct vio_dring_state
*dr
= &port
->vio
.drings
[VIO_DRIVER_TX_RING
];
688 struct vio_dring_data
*pkt
= msgbuf
;
689 struct net_device
*dev
;
691 struct vio_net_desc
*desc
;
692 struct netdev_queue
*txq
;
694 if (unlikely(pkt
->tag
.stype_env
!= VIO_DRING_DATA
))
698 dev
= VNET_PORT_TO_NET_DEVICE(port
);
700 if (unlikely(!idx_is_pending(dr
, end
))) {
701 netif_tx_unlock(dev
);
705 /* sync for race conditions with vnet_start_xmit() and tell xmit it
706 * is time to send a trigger.
708 trace_vnet_rx_stopped_ack(port
->vio
._local_sid
,
709 port
->vio
._peer_sid
, end
);
710 dr
->cons
= vio_dring_next(dr
, end
);
711 desc
= vio_dring_entry(dr
, dr
->cons
);
712 if (desc
->hdr
.state
== VIO_DESC_READY
&& !port
->start_cons
) {
713 /* vnet_start_xmit() just populated this dring but missed
714 * sending the "start" LDC message to the consumer.
715 * Send a "start" trigger on its behalf.
717 if (__vnet_tx_trigger(port
, dr
->cons
) > 0)
718 port
->start_cons
= false;
720 port
->start_cons
= true;
722 port
->start_cons
= true;
724 netif_tx_unlock(dev
);
726 txq
= netdev_get_tx_queue(dev
, port
->q_index
);
727 if (unlikely(netif_tx_queue_stopped(txq
) &&
728 vnet_tx_dring_avail(dr
) >= VNET_TX_WAKEUP_THRESH(dr
)))
734 static int vnet_nack(struct vnet_port
*port
, void *msgbuf
)
736 /* XXX just reset or similar XXX */
740 static int handle_mcast(struct vnet_port
*port
, void *msgbuf
)
742 struct vio_net_mcast_info
*pkt
= msgbuf
;
743 struct net_device
*dev
= VNET_PORT_TO_NET_DEVICE(port
);
745 if (pkt
->tag
.stype
!= VIO_SUBTYPE_ACK
)
746 pr_err("%s: Got unexpected MCAST reply [%02x:%02x:%04x:%08x]\n",
756 /* If the queue is stopped, wake it up so that we'll
757 * send out another START message at the next TX.
759 static void maybe_tx_wakeup(struct vnet_port
*port
)
761 struct netdev_queue
*txq
;
763 txq
= netdev_get_tx_queue(VNET_PORT_TO_NET_DEVICE(port
),
765 __netif_tx_lock(txq
, smp_processor_id());
766 if (likely(netif_tx_queue_stopped(txq
)))
767 netif_tx_wake_queue(txq
);
768 __netif_tx_unlock(txq
);
771 bool sunvnet_port_is_up_common(struct vnet_port
*vnet
)
773 struct vio_driver_state
*vio
= &vnet
->vio
;
775 return !!(vio
->hs_state
& VIO_HS_COMPLETE
);
777 EXPORT_SYMBOL_GPL(sunvnet_port_is_up_common
);
779 static int vnet_event_napi(struct vnet_port
*port
, int budget
)
781 struct net_device
*dev
= VNET_PORT_TO_NET_DEVICE(port
);
782 struct vio_driver_state
*vio
= &port
->vio
;
786 /* we don't expect any other bits */
787 BUG_ON(port
->rx_event
& ~(LDC_EVENT_DATA_READY
|
791 /* RESET takes precedent over any other event */
792 if (port
->rx_event
& LDC_EVENT_RESET
) {
793 /* a link went down */
795 if (port
->vsw
== 1) {
796 netif_tx_stop_all_queues(dev
);
797 netif_carrier_off(dev
);
800 vio_link_state_change(vio
, LDC_EVENT_RESET
);
801 vnet_port_reset(port
);
804 /* If the device is running but its tx queue was
805 * stopped (due to flow control), restart it.
806 * This is necessary since vnet_port_reset()
807 * clears the tx drings and thus we may never get
808 * back a VIO_TYPE_DATA ACK packet - which is
809 * the normal mechanism to restart the tx queue.
811 if (netif_running(dev
))
812 maybe_tx_wakeup(port
);
815 port
->stats
.event_reset
++;
819 if (port
->rx_event
& LDC_EVENT_UP
) {
822 if (port
->vsw
== 1) {
823 netif_carrier_on(port
->dev
);
824 netif_tx_start_all_queues(port
->dev
);
827 vio_link_state_change(vio
, LDC_EVENT_UP
);
829 port
->stats
.event_up
++;
837 struct vio_msg_tag tag
;
841 if (port
->napi_resume
) {
842 struct vio_dring_data
*pkt
=
843 (struct vio_dring_data
*)&msgbuf
;
844 struct vio_dring_state
*dr
=
845 &port
->vio
.drings
[VIO_DRIVER_RX_RING
];
847 pkt
->tag
.type
= VIO_TYPE_DATA
;
848 pkt
->tag
.stype
= VIO_SUBTYPE_INFO
;
849 pkt
->tag
.stype_env
= VIO_DRING_DATA
;
850 pkt
->seq
= dr
->rcv_nxt
;
851 pkt
->start_idx
= vio_dring_next(dr
,
852 port
->napi_stop_idx
);
855 err
= ldc_read(vio
->lp
, &msgbuf
, sizeof(msgbuf
));
856 if (unlikely(err
< 0)) {
857 if (err
== -ECONNRESET
)
863 viodbg(DATA
, "TAG [%02x:%02x:%04x:%08x]\n",
866 msgbuf
.tag
.stype_env
,
868 err
= vio_validate_sid(vio
, &msgbuf
.tag
);
873 if (likely(msgbuf
.tag
.type
== VIO_TYPE_DATA
)) {
874 if (msgbuf
.tag
.stype
== VIO_SUBTYPE_INFO
) {
875 if (!sunvnet_port_is_up_common(port
)) {
876 /* failures like handshake_failure()
877 * may have cleaned up dring, but
878 * NAPI polling may bring us here.
883 err
= vnet_rx(port
, &msgbuf
, &npkts
, budget
);
888 } else if (msgbuf
.tag
.stype
== VIO_SUBTYPE_ACK
) {
889 err
= vnet_ack(port
, &msgbuf
);
892 } else if (msgbuf
.tag
.stype
== VIO_SUBTYPE_NACK
) {
893 err
= vnet_nack(port
, &msgbuf
);
895 } else if (msgbuf
.tag
.type
== VIO_TYPE_CTRL
) {
896 if (msgbuf
.tag
.stype_env
== VNET_MCAST_INFO
)
897 err
= handle_mcast(port
, &msgbuf
);
899 err
= vio_control_pkt_engine(vio
, &msgbuf
);
903 err
= vnet_handle_unknown(port
, &msgbuf
);
905 if (err
== -ECONNRESET
)
908 if (unlikely(tx_wakeup
&& err
!= -ECONNRESET
))
909 maybe_tx_wakeup(port
);
913 int sunvnet_poll_common(struct napi_struct
*napi
, int budget
)
915 struct vnet_port
*port
= container_of(napi
, struct vnet_port
, napi
);
916 struct vio_driver_state
*vio
= &port
->vio
;
917 int processed
= vnet_event_napi(port
, budget
);
919 if (processed
< budget
) {
920 napi_complete_done(napi
, processed
);
921 port
->rx_event
&= ~LDC_EVENT_DATA_READY
;
922 vio_set_intr(vio
->vdev
->rx_ino
, HV_INTR_ENABLED
);
926 EXPORT_SYMBOL_GPL(sunvnet_poll_common
);
928 void sunvnet_event_common(void *arg
, int event
)
930 struct vnet_port
*port
= arg
;
931 struct vio_driver_state
*vio
= &port
->vio
;
933 port
->rx_event
|= event
;
934 vio_set_intr(vio
->vdev
->rx_ino
, HV_INTR_DISABLED
);
935 napi_schedule(&port
->napi
);
937 EXPORT_SYMBOL_GPL(sunvnet_event_common
);
939 static int __vnet_tx_trigger(struct vnet_port
*port
, u32 start
)
941 struct vio_dring_state
*dr
= &port
->vio
.drings
[VIO_DRIVER_TX_RING
];
942 struct vio_dring_data hdr
= {
944 .type
= VIO_TYPE_DATA
,
945 .stype
= VIO_SUBTYPE_INFO
,
946 .stype_env
= VIO_DRING_DATA
,
947 .sid
= vio_send_sid(&port
->vio
),
949 .dring_ident
= dr
->ident
,
957 trace_vnet_tx_pending_stopped_ack(port
->vio
._local_sid
,
959 port
->stop_rx_idx
, -1);
960 err
= vnet_send_ack(port
,
961 &port
->vio
.drings
[VIO_DRIVER_RX_RING
],
962 port
->stop_rx_idx
, -1,
968 hdr
.seq
= dr
->snd_nxt
;
971 err
= vio_ldc_send(&port
->vio
, &hdr
, sizeof(hdr
));
977 if ((delay
<<= 1) > 128)
979 if (retries
++ > VNET_MAX_RETRIES
)
981 } while (err
== -EAGAIN
);
982 trace_vnet_tx_trigger(port
->vio
._local_sid
,
983 port
->vio
._peer_sid
, start
, err
);
988 static struct sk_buff
*vnet_clean_tx_ring(struct vnet_port
*port
,
991 struct vio_dring_state
*dr
= &port
->vio
.drings
[VIO_DRIVER_TX_RING
];
992 struct sk_buff
*skb
= NULL
;
998 for (i
= 0; i
< VNET_TX_RING_SIZE
; ++i
) {
999 struct vio_net_desc
*d
;
1003 txi
= VNET_TX_RING_SIZE
- 1;
1005 d
= vio_dring_entry(dr
, txi
);
1007 if (d
->hdr
.state
== VIO_DESC_READY
) {
1011 if (port
->tx_bufs
[txi
].skb
) {
1012 if (d
->hdr
.state
!= VIO_DESC_DONE
)
1013 pr_notice("invalid ring buffer state %d\n",
1015 BUG_ON(port
->tx_bufs
[txi
].skb
->next
);
1017 port
->tx_bufs
[txi
].skb
->next
= skb
;
1018 skb
= port
->tx_bufs
[txi
].skb
;
1019 port
->tx_bufs
[txi
].skb
= NULL
;
1021 ldc_unmap(port
->vio
.lp
,
1022 port
->tx_bufs
[txi
].cookies
,
1023 port
->tx_bufs
[txi
].ncookies
);
1024 } else if (d
->hdr
.state
== VIO_DESC_FREE
) {
1027 d
->hdr
.state
= VIO_DESC_FREE
;
1032 static inline void vnet_free_skbs(struct sk_buff
*skb
)
1034 struct sk_buff
*next
;
1044 void sunvnet_clean_timer_expire_common(struct timer_list
*t
)
1046 struct vnet_port
*port
= from_timer(port
, t
, clean_timer
);
1047 struct sk_buff
*freeskbs
;
1050 netif_tx_lock(VNET_PORT_TO_NET_DEVICE(port
));
1051 freeskbs
= vnet_clean_tx_ring(port
, &pending
);
1052 netif_tx_unlock(VNET_PORT_TO_NET_DEVICE(port
));
1054 vnet_free_skbs(freeskbs
);
1057 (void)mod_timer(&port
->clean_timer
,
1058 jiffies
+ VNET_CLEAN_TIMEOUT
);
1060 del_timer(&port
->clean_timer
);
1062 EXPORT_SYMBOL_GPL(sunvnet_clean_timer_expire_common
);
1064 static inline int vnet_skb_map(struct ldc_channel
*lp
, struct sk_buff
*skb
,
1065 struct ldc_trans_cookie
*cookies
, int ncookies
,
1066 unsigned int map_perm
)
1068 int i
, nc
, err
, blen
;
1071 blen
= skb_headlen(skb
);
1072 if (blen
< ETH_ZLEN
)
1074 blen
+= VNET_PACKET_SKIP
;
1075 blen
+= 8 - (blen
& 7);
1077 err
= ldc_map_single(lp
, skb
->data
- VNET_PACKET_SKIP
, blen
, cookies
,
1078 ncookies
, map_perm
);
1083 for (i
= 0; i
< skb_shinfo(skb
)->nr_frags
; i
++) {
1084 skb_frag_t
*f
= &skb_shinfo(skb
)->frags
[i
];
1087 if (nc
< ncookies
) {
1088 vaddr
= kmap_atomic(skb_frag_page(f
));
1089 blen
= skb_frag_size(f
);
1090 blen
+= 8 - (blen
& 7);
1091 err
= ldc_map_single(lp
, vaddr
+ skb_frag_off(f
),
1092 blen
, cookies
+ nc
, ncookies
- nc
,
1094 kunmap_atomic(vaddr
);
1100 ldc_unmap(lp
, cookies
, nc
);
1108 static inline struct sk_buff
*vnet_skb_shape(struct sk_buff
*skb
, int ncookies
)
1110 struct sk_buff
*nskb
;
1111 int i
, len
, pad
, docopy
;
1115 if (len
< ETH_ZLEN
) {
1116 pad
+= ETH_ZLEN
- skb
->len
;
1119 len
+= VNET_PACKET_SKIP
;
1120 pad
+= 8 - (len
& 7);
1122 /* make sure we have enough cookies and alignment in every frag */
1123 docopy
= skb_shinfo(skb
)->nr_frags
>= ncookies
;
1124 for (i
= 0; i
< skb_shinfo(skb
)->nr_frags
; i
++) {
1125 skb_frag_t
*f
= &skb_shinfo(skb
)->frags
[i
];
1127 docopy
|= skb_frag_off(f
) & 7;
1129 if (((unsigned long)skb
->data
& 7) != VNET_PACKET_SKIP
||
1130 skb_tailroom(skb
) < pad
||
1131 skb_headroom(skb
) < VNET_PACKET_SKIP
|| docopy
) {
1132 int start
= 0, offset
;
1135 len
= skb
->len
> ETH_ZLEN
? skb
->len
: ETH_ZLEN
;
1136 nskb
= alloc_and_align_skb(skb
->dev
, len
);
1141 skb_reserve(nskb
, VNET_PACKET_SKIP
);
1143 nskb
->protocol
= skb
->protocol
;
1144 offset
= skb_mac_header(skb
) - skb
->data
;
1145 skb_set_mac_header(nskb
, offset
);
1146 offset
= skb_network_header(skb
) - skb
->data
;
1147 skb_set_network_header(nskb
, offset
);
1148 offset
= skb_transport_header(skb
) - skb
->data
;
1149 skb_set_transport_header(nskb
, offset
);
1152 nskb
->csum_offset
= skb
->csum_offset
;
1153 nskb
->ip_summed
= skb
->ip_summed
;
1155 if (skb
->ip_summed
== CHECKSUM_PARTIAL
)
1156 start
= skb_checksum_start_offset(skb
);
1158 int offset
= start
+ nskb
->csum_offset
;
1160 /* copy the headers, no csum here */
1161 if (skb_copy_bits(skb
, 0, nskb
->data
, start
)) {
1162 dev_kfree_skb(nskb
);
1167 /* copy the rest, with csum calculation */
1168 *(__sum16
*)(skb
->data
+ offset
) = 0;
1169 csum
= skb_copy_and_csum_bits(skb
, start
,
1173 /* add in the header checksums */
1174 if (skb
->protocol
== htons(ETH_P_IP
)) {
1175 struct iphdr
*iph
= ip_hdr(nskb
);
1177 if (iph
->protocol
== IPPROTO_TCP
||
1178 iph
->protocol
== IPPROTO_UDP
) {
1179 csum
= csum_tcpudp_magic(iph
->saddr
,
1185 } else if (skb
->protocol
== htons(ETH_P_IPV6
)) {
1186 struct ipv6hdr
*ip6h
= ipv6_hdr(nskb
);
1188 if (ip6h
->nexthdr
== IPPROTO_TCP
||
1189 ip6h
->nexthdr
== IPPROTO_UDP
) {
1190 csum
= csum_ipv6_magic(&ip6h
->saddr
,
1198 /* save the final result */
1199 *(__sum16
*)(nskb
->data
+ offset
) = csum
;
1201 nskb
->ip_summed
= CHECKSUM_NONE
;
1202 } else if (skb_copy_bits(skb
, 0, nskb
->data
, skb
->len
)) {
1203 dev_kfree_skb(nskb
);
1207 (void)skb_put(nskb
, skb
->len
);
1208 if (skb_is_gso(skb
)) {
1209 skb_shinfo(nskb
)->gso_size
= skb_shinfo(skb
)->gso_size
;
1210 skb_shinfo(nskb
)->gso_type
= skb_shinfo(skb
)->gso_type
;
1212 nskb
->queue_mapping
= skb
->queue_mapping
;
1220 vnet_handle_offloads(struct vnet_port
*port
, struct sk_buff
*skb
,
1221 struct vnet_port
*(*vnet_tx_port
)
1222 (struct sk_buff
*, struct net_device
*))
1224 struct net_device
*dev
= VNET_PORT_TO_NET_DEVICE(port
);
1225 struct vio_dring_state
*dr
= &port
->vio
.drings
[VIO_DRIVER_TX_RING
];
1226 struct sk_buff
*segs
, *curr
, *next
;
1227 int maclen
, datalen
;
1229 int gso_size
, gso_type
, gso_segs
;
1230 int hlen
= skb_transport_header(skb
) - skb_mac_header(skb
);
1231 int proto
= IPPROTO_IP
;
1233 if (skb
->protocol
== htons(ETH_P_IP
))
1234 proto
= ip_hdr(skb
)->protocol
;
1235 else if (skb
->protocol
== htons(ETH_P_IPV6
))
1236 proto
= ipv6_hdr(skb
)->nexthdr
;
1238 if (proto
== IPPROTO_TCP
) {
1239 hlen
+= tcp_hdr(skb
)->doff
* 4;
1240 } else if (proto
== IPPROTO_UDP
) {
1241 hlen
+= sizeof(struct udphdr
);
1243 pr_err("vnet_handle_offloads GSO with unknown transport "
1244 "protocol %d tproto %d\n", skb
->protocol
, proto
);
1245 hlen
= 128; /* XXX */
1247 datalen
= port
->tsolen
- hlen
;
1249 gso_size
= skb_shinfo(skb
)->gso_size
;
1250 gso_type
= skb_shinfo(skb
)->gso_type
;
1251 gso_segs
= skb_shinfo(skb
)->gso_segs
;
1253 if (port
->tso
&& gso_size
< datalen
)
1254 gso_segs
= DIV_ROUND_UP(skb
->len
- hlen
, datalen
);
1256 if (unlikely(vnet_tx_dring_avail(dr
) < gso_segs
)) {
1257 struct netdev_queue
*txq
;
1259 txq
= netdev_get_tx_queue(dev
, port
->q_index
);
1260 netif_tx_stop_queue(txq
);
1261 if (vnet_tx_dring_avail(dr
) < skb_shinfo(skb
)->gso_segs
)
1262 return NETDEV_TX_BUSY
;
1263 netif_tx_wake_queue(txq
);
1266 maclen
= skb_network_header(skb
) - skb_mac_header(skb
);
1267 skb_pull(skb
, maclen
);
1269 if (port
->tso
&& gso_size
< datalen
) {
1270 if (skb_unclone(skb
, GFP_ATOMIC
))
1273 /* segment to TSO size */
1274 skb_shinfo(skb
)->gso_size
= datalen
;
1275 skb_shinfo(skb
)->gso_segs
= gso_segs
;
1277 segs
= skb_gso_segment(skb
, dev
->features
& ~NETIF_F_TSO
);
1281 skb_push(skb
, maclen
);
1282 skb_reset_mac_header(skb
);
1285 skb_list_walk_safe(segs
, curr
, next
) {
1286 skb_mark_not_on_list(curr
);
1287 if (port
->tso
&& curr
->len
> dev
->mtu
) {
1288 skb_shinfo(curr
)->gso_size
= gso_size
;
1289 skb_shinfo(curr
)->gso_type
= gso_type
;
1290 skb_shinfo(curr
)->gso_segs
=
1291 DIV_ROUND_UP(curr
->len
- hlen
, gso_size
);
1293 skb_shinfo(curr
)->gso_size
= 0;
1296 skb_push(curr
, maclen
);
1297 skb_reset_mac_header(curr
);
1298 memcpy(skb_mac_header(curr
), skb_mac_header(skb
),
1300 curr
->csum_start
= skb_transport_header(curr
) - curr
->head
;
1301 if (ip_hdr(curr
)->protocol
== IPPROTO_TCP
)
1302 curr
->csum_offset
= offsetof(struct tcphdr
, check
);
1303 else if (ip_hdr(curr
)->protocol
== IPPROTO_UDP
)
1304 curr
->csum_offset
= offsetof(struct udphdr
, check
);
1306 if (!(status
& NETDEV_TX_MASK
))
1307 status
= sunvnet_start_xmit_common(curr
, dev
,
1309 if (status
& NETDEV_TX_MASK
)
1310 dev_kfree_skb_any(curr
);
1313 if (!(status
& NETDEV_TX_MASK
))
1314 dev_kfree_skb_any(skb
);
1317 dev
->stats
.tx_dropped
++;
1318 dev_kfree_skb_any(skb
);
1319 return NETDEV_TX_OK
;
1323 sunvnet_start_xmit_common(struct sk_buff
*skb
, struct net_device
*dev
,
1324 struct vnet_port
*(*vnet_tx_port
)
1325 (struct sk_buff
*, struct net_device
*))
1327 struct vnet_port
*port
= NULL
;
1328 struct vio_dring_state
*dr
;
1329 struct vio_net_desc
*d
;
1331 struct sk_buff
*freeskbs
= NULL
;
1333 unsigned pending
= 0;
1334 struct netdev_queue
*txq
;
1337 port
= vnet_tx_port(skb
, dev
);
1338 if (unlikely(!port
))
1341 if (skb_is_gso(skb
) && skb
->len
> port
->tsolen
) {
1342 err
= vnet_handle_offloads(port
, skb
, vnet_tx_port
);
1347 if (!skb_is_gso(skb
) && skb
->len
> port
->rmtu
) {
1348 unsigned long localmtu
= port
->rmtu
- ETH_HLEN
;
1350 if (vio_version_after_eq(&port
->vio
, 1, 3))
1351 localmtu
-= VLAN_HLEN
;
1353 if (skb
->protocol
== htons(ETH_P_IP
))
1354 icmp_ndo_send(skb
, ICMP_DEST_UNREACH
, ICMP_FRAG_NEEDED
,
1356 #if IS_ENABLED(CONFIG_IPV6)
1357 else if (skb
->protocol
== htons(ETH_P_IPV6
))
1358 icmpv6_ndo_send(skb
, ICMPV6_PKT_TOOBIG
, 0, localmtu
);
1363 skb
= vnet_skb_shape(skb
, 2);
1368 if (skb
->ip_summed
== CHECKSUM_PARTIAL
) {
1369 if (skb
->protocol
== htons(ETH_P_IP
))
1370 vnet_fullcsum_ipv4(skb
);
1371 #if IS_ENABLED(CONFIG_IPV6)
1372 else if (skb
->protocol
== htons(ETH_P_IPV6
))
1373 vnet_fullcsum_ipv6(skb
);
1377 dr
= &port
->vio
.drings
[VIO_DRIVER_TX_RING
];
1378 i
= skb_get_queue_mapping(skb
);
1379 txq
= netdev_get_tx_queue(dev
, i
);
1380 if (unlikely(vnet_tx_dring_avail(dr
) < 1)) {
1381 if (!netif_tx_queue_stopped(txq
)) {
1382 netif_tx_stop_queue(txq
);
1384 /* This is a hard error, log it. */
1385 netdev_err(dev
, "BUG! Tx Ring full when queue awake!\n");
1386 dev
->stats
.tx_errors
++;
1389 return NETDEV_TX_BUSY
;
1392 d
= vio_dring_cur(dr
);
1396 freeskbs
= vnet_clean_tx_ring(port
, &pending
);
1398 BUG_ON(port
->tx_bufs
[txi
].skb
);
1404 err
= vnet_skb_map(port
->vio
.lp
, skb
, port
->tx_bufs
[txi
].cookies
, 2,
1405 (LDC_MAP_SHADOW
| LDC_MAP_DIRECT
| LDC_MAP_RW
));
1407 netdev_info(dev
, "tx buffer map error %d\n", err
);
1411 port
->tx_bufs
[txi
].skb
= skb
;
1413 port
->tx_bufs
[txi
].ncookies
= err
;
1415 /* We don't rely on the ACKs to free the skb in vnet_start_xmit(),
1416 * thus it is safe to not set VIO_ACK_ENABLE for each transmission:
1417 * the protocol itself does not require it as long as the peer
1418 * sends a VIO_SUBTYPE_ACK for VIO_DRING_STOPPED.
1420 * An ACK for every packet in the ring is expensive as the
1421 * sending of LDC messages is slow and affects performance.
1423 d
->hdr
.ack
= VIO_ACK_DISABLE
;
1425 d
->ncookies
= port
->tx_bufs
[txi
].ncookies
;
1426 for (i
= 0; i
< d
->ncookies
; i
++)
1427 d
->cookies
[i
] = port
->tx_bufs
[txi
].cookies
[i
];
1428 if (vio_version_after_eq(&port
->vio
, 1, 7)) {
1429 struct vio_net_dext
*dext
= vio_net_ext(d
);
1431 memset(dext
, 0, sizeof(*dext
));
1432 if (skb_is_gso(port
->tx_bufs
[txi
].skb
)) {
1433 dext
->ipv4_lso_mss
= skb_shinfo(port
->tx_bufs
[txi
].skb
)
1435 dext
->flags
|= VNET_PKT_IPV4_LSO
;
1437 if (vio_version_after_eq(&port
->vio
, 1, 8) &&
1438 !port
->switch_port
) {
1439 dext
->flags
|= VNET_PKT_HCK_IPV4_HDRCKSUM_OK
;
1440 dext
->flags
|= VNET_PKT_HCK_FULLCKSUM_OK
;
1444 /* This has to be a non-SMP write barrier because we are writing
1445 * to memory which is shared with the peer LDOM.
1449 d
->hdr
.state
= VIO_DESC_READY
;
1451 /* Exactly one ldc "start" trigger (for dr->cons) needs to be sent
1452 * to notify the consumer that some descriptors are READY.
1453 * After that "start" trigger, no additional triggers are needed until
1454 * a DRING_STOPPED is received from the consumer. The dr->cons field
1455 * (set up by vnet_ack()) has the value of the next dring index
1456 * that has not yet been ack-ed. We send a "start" trigger here
1457 * if, and only if, start_cons is true (reset it afterward). Conversely,
1458 * vnet_ack() should check if the dring corresponding to cons
1459 * is marked READY, but start_cons was false.
1460 * If so, vnet_ack() should send out the missed "start" trigger.
1462 * Note that the dma_wmb() above makes sure the cookies et al. are
1463 * not globally visible before the VIO_DESC_READY, and that the
1464 * stores are ordered correctly by the compiler. The consumer will
1465 * not proceed until the VIO_DESC_READY is visible assuring that
1466 * the consumer does not observe anything related to descriptors
1467 * out of order. The HV trap from the LDC start trigger is the
1468 * producer to consumer announcement that work is available to the
1471 if (!port
->start_cons
) { /* previous trigger suffices */
1472 trace_vnet_skip_tx_trigger(port
->vio
._local_sid
,
1473 port
->vio
._peer_sid
, dr
->cons
);
1474 goto ldc_start_done
;
1477 err
= __vnet_tx_trigger(port
, dr
->cons
);
1478 if (unlikely(err
< 0)) {
1479 netdev_info(dev
, "TX trigger error %d\n", err
);
1480 d
->hdr
.state
= VIO_DESC_FREE
;
1481 skb
= port
->tx_bufs
[txi
].skb
;
1482 port
->tx_bufs
[txi
].skb
= NULL
;
1483 dev
->stats
.tx_carrier_errors
++;
1488 port
->start_cons
= false;
1490 dev
->stats
.tx_packets
++;
1491 dev
->stats
.tx_bytes
+= port
->tx_bufs
[txi
].skb
->len
;
1492 port
->stats
.tx_packets
++;
1493 port
->stats
.tx_bytes
+= port
->tx_bufs
[txi
].skb
->len
;
1495 dr
->prod
= (dr
->prod
+ 1) & (VNET_TX_RING_SIZE
- 1);
1496 if (unlikely(vnet_tx_dring_avail(dr
) < 1)) {
1497 netif_tx_stop_queue(txq
);
1499 if (vnet_tx_dring_avail(dr
) > VNET_TX_WAKEUP_THRESH(dr
))
1500 netif_tx_wake_queue(txq
);
1503 (void)mod_timer(&port
->clean_timer
, jiffies
+ VNET_CLEAN_TIMEOUT
);
1506 vnet_free_skbs(freeskbs
);
1508 return NETDEV_TX_OK
;
1512 (void)mod_timer(&port
->clean_timer
,
1513 jiffies
+ VNET_CLEAN_TIMEOUT
);
1515 del_timer(&port
->clean_timer
);
1518 vnet_free_skbs(freeskbs
);
1519 dev
->stats
.tx_dropped
++;
1520 return NETDEV_TX_OK
;
1522 EXPORT_SYMBOL_GPL(sunvnet_start_xmit_common
);
1524 void sunvnet_tx_timeout_common(struct net_device
*dev
, unsigned int txqueue
)
1526 /* XXX Implement me XXX */
1528 EXPORT_SYMBOL_GPL(sunvnet_tx_timeout_common
);
1530 int sunvnet_open_common(struct net_device
*dev
)
1532 netif_carrier_on(dev
);
1533 netif_tx_start_all_queues(dev
);
1537 EXPORT_SYMBOL_GPL(sunvnet_open_common
);
1539 int sunvnet_close_common(struct net_device
*dev
)
1541 netif_tx_stop_all_queues(dev
);
1542 netif_carrier_off(dev
);
1546 EXPORT_SYMBOL_GPL(sunvnet_close_common
);
1548 static struct vnet_mcast_entry
*__vnet_mc_find(struct vnet
*vp
, u8
*addr
)
1550 struct vnet_mcast_entry
*m
;
1552 for (m
= vp
->mcast_list
; m
; m
= m
->next
) {
1553 if (ether_addr_equal(m
->addr
, addr
))
1559 static void __update_mc_list(struct vnet
*vp
, struct net_device
*dev
)
1561 struct netdev_hw_addr
*ha
;
1563 netdev_for_each_mc_addr(ha
, dev
) {
1564 struct vnet_mcast_entry
*m
;
1566 m
= __vnet_mc_find(vp
, ha
->addr
);
1573 m
= kzalloc(sizeof(*m
), GFP_ATOMIC
);
1576 memcpy(m
->addr
, ha
->addr
, ETH_ALEN
);
1579 m
->next
= vp
->mcast_list
;
1585 static void __send_mc_list(struct vnet
*vp
, struct vnet_port
*port
)
1587 struct vio_net_mcast_info info
;
1588 struct vnet_mcast_entry
*m
, **pp
;
1591 memset(&info
, 0, sizeof(info
));
1593 info
.tag
.type
= VIO_TYPE_CTRL
;
1594 info
.tag
.stype
= VIO_SUBTYPE_INFO
;
1595 info
.tag
.stype_env
= VNET_MCAST_INFO
;
1596 info
.tag
.sid
= vio_send_sid(&port
->vio
);
1600 for (m
= vp
->mcast_list
; m
; m
= m
->next
) {
1604 memcpy(&info
.mcast_addr
[n_addrs
* ETH_ALEN
],
1606 if (++n_addrs
== VNET_NUM_MCAST
) {
1607 info
.count
= n_addrs
;
1609 (void)vio_ldc_send(&port
->vio
, &info
,
1615 info
.count
= n_addrs
;
1616 (void)vio_ldc_send(&port
->vio
, &info
, sizeof(info
));
1622 pp
= &vp
->mcast_list
;
1623 while ((m
= *pp
) != NULL
) {
1630 memcpy(&info
.mcast_addr
[n_addrs
* ETH_ALEN
],
1632 if (++n_addrs
== VNET_NUM_MCAST
) {
1633 info
.count
= n_addrs
;
1634 (void)vio_ldc_send(&port
->vio
, &info
,
1643 info
.count
= n_addrs
;
1644 (void)vio_ldc_send(&port
->vio
, &info
, sizeof(info
));
1648 void sunvnet_set_rx_mode_common(struct net_device
*dev
, struct vnet
*vp
)
1650 struct vnet_port
*port
;
1653 list_for_each_entry_rcu(port
, &vp
->port_list
, list
) {
1654 if (port
->switch_port
) {
1655 __update_mc_list(vp
, dev
);
1656 __send_mc_list(vp
, port
);
1662 EXPORT_SYMBOL_GPL(sunvnet_set_rx_mode_common
);
1664 int sunvnet_set_mac_addr_common(struct net_device
*dev
, void *p
)
1668 EXPORT_SYMBOL_GPL(sunvnet_set_mac_addr_common
);
1670 void sunvnet_port_free_tx_bufs_common(struct vnet_port
*port
)
1672 struct vio_dring_state
*dr
;
1675 dr
= &port
->vio
.drings
[VIO_DRIVER_TX_RING
];
1680 for (i
= 0; i
< VNET_TX_RING_SIZE
; i
++) {
1681 struct vio_net_desc
*d
;
1682 void *skb
= port
->tx_bufs
[i
].skb
;
1687 d
= vio_dring_entry(dr
, i
);
1689 ldc_unmap(port
->vio
.lp
,
1690 port
->tx_bufs
[i
].cookies
,
1691 port
->tx_bufs
[i
].ncookies
);
1693 port
->tx_bufs
[i
].skb
= NULL
;
1694 d
->hdr
.state
= VIO_DESC_FREE
;
1696 ldc_free_exp_dring(port
->vio
.lp
, dr
->base
,
1697 (dr
->entry_size
* dr
->num_entries
),
1698 dr
->cookies
, dr
->ncookies
);
1701 dr
->num_entries
= 0;
1705 EXPORT_SYMBOL_GPL(sunvnet_port_free_tx_bufs_common
);
1707 void vnet_port_reset(struct vnet_port
*port
)
1709 del_timer(&port
->clean_timer
);
1710 sunvnet_port_free_tx_bufs_common(port
);
1712 port
->tso
= (port
->vsw
== 0); /* no tso in vsw, misbehaves in bridge */
1715 EXPORT_SYMBOL_GPL(vnet_port_reset
);
1717 static int vnet_port_alloc_tx_ring(struct vnet_port
*port
)
1719 struct vio_dring_state
*dr
;
1720 unsigned long len
, elen
;
1721 int i
, err
, ncookies
;
1724 dr
= &port
->vio
.drings
[VIO_DRIVER_TX_RING
];
1726 elen
= sizeof(struct vio_net_desc
) +
1727 sizeof(struct ldc_trans_cookie
) * 2;
1728 if (vio_version_after_eq(&port
->vio
, 1, 7))
1729 elen
+= sizeof(struct vio_net_dext
);
1730 len
= VNET_TX_RING_SIZE
* elen
;
1732 ncookies
= VIO_MAX_RING_COOKIES
;
1733 dring
= ldc_alloc_exp_dring(port
->vio
.lp
, len
,
1734 dr
->cookies
, &ncookies
,
1738 if (IS_ERR(dring
)) {
1739 err
= PTR_ERR(dring
);
1744 dr
->entry_size
= elen
;
1745 dr
->num_entries
= VNET_TX_RING_SIZE
;
1748 port
->start_cons
= true; /* need an initial trigger */
1749 dr
->pending
= VNET_TX_RING_SIZE
;
1750 dr
->ncookies
= ncookies
;
1752 for (i
= 0; i
< VNET_TX_RING_SIZE
; ++i
) {
1753 struct vio_net_desc
*d
;
1755 d
= vio_dring_entry(dr
, i
);
1756 d
->hdr
.state
= VIO_DESC_FREE
;
1761 sunvnet_port_free_tx_bufs_common(port
);
1766 #ifdef CONFIG_NET_POLL_CONTROLLER
1767 void sunvnet_poll_controller_common(struct net_device
*dev
, struct vnet
*vp
)
1769 struct vnet_port
*port
;
1770 unsigned long flags
;
1772 spin_lock_irqsave(&vp
->lock
, flags
);
1773 if (!list_empty(&vp
->port_list
)) {
1774 port
= list_entry(vp
->port_list
.next
, struct vnet_port
, list
);
1775 napi_schedule(&port
->napi
);
1777 spin_unlock_irqrestore(&vp
->lock
, flags
);
1779 EXPORT_SYMBOL_GPL(sunvnet_poll_controller_common
);
1782 void sunvnet_port_add_txq_common(struct vnet_port
*port
)
1784 struct vnet
*vp
= port
->vp
;
1788 /* find the first least-used q
1789 * When there are more ldoms than q's, we start to
1790 * double up on ports per queue.
1792 for (i
= 0; i
< VNET_MAX_TXQS
; i
++) {
1793 if (vp
->q_used
[i
] == 0) {
1797 if (vp
->q_used
[i
] < vp
->q_used
[smallest
])
1802 vp
->q_used
[smallest
]++;
1803 port
->q_index
= smallest
;
1805 EXPORT_SYMBOL_GPL(sunvnet_port_add_txq_common
);
1807 void sunvnet_port_rm_txq_common(struct vnet_port
*port
)
1810 port
->vp
->q_used
[port
->q_index
]--;
1813 EXPORT_SYMBOL_GPL(sunvnet_port_rm_txq_common
);