2 * This file is provided under a dual BSD/GPLv2 license. When using or
3 * redistributing this file, you may do so under either license.
7 * Copyright(c) 2012 Intel Corporation. All rights reserved.
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of version 2 of the GNU General Public License as
11 * published by the Free Software Foundation.
15 * Copyright(c) 2012 Intel Corporation. All rights reserved.
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions
21 * * Redistributions of source code must retain the above copyright
22 * notice, this list of conditions and the following disclaimer.
23 * * Redistributions in binary form must reproduce the above copy
24 * notice, this list of conditions and the following disclaimer in
25 * the documentation and/or other materials provided with the
27 * * Neither the name of Intel Corporation nor the names of its
28 * contributors may be used to endorse or promote products derived
29 * from this software without specific prior written permission.
31 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
32 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
33 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
34 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
35 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
36 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
37 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
38 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
39 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
40 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
41 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 * Intel PCIe NTB Linux driver
45 * Contact Information:
46 * Jon Mason <jon.mason@intel.com>
48 #include <linux/debugfs.h>
49 #include <linux/delay.h>
50 #include <linux/init.h>
51 #include <linux/interrupt.h>
52 #include <linux/module.h>
53 #include <linux/pci.h>
54 #include <linux/random.h>
55 #include <linux/slab.h>
59 #define NTB_NAME "Intel(R) PCI-E Non-Transparent Bridge Driver"
62 MODULE_DESCRIPTION(NTB_NAME
);
63 MODULE_VERSION(NTB_VER
);
64 MODULE_LICENSE("Dual BSD/GPL");
65 MODULE_AUTHOR("Intel Corporation");
68 NTB_CONN_TRANSPARENT
= 0,
83 static struct dentry
*debugfs_dir
;
85 #define BWD_LINK_RECOVERY_TIME 500
87 /* Translate memory window 0,1,2 to BAR 2,4,5 */
88 #define MW_TO_BAR(mw) (mw == 0 ? 2 : (mw == 1 ? 4 : 5))
90 static const struct pci_device_id ntb_pci_tbl
[] = {
91 {PCI_VDEVICE(INTEL
, PCI_DEVICE_ID_INTEL_NTB_B2B_BWD
)},
92 {PCI_VDEVICE(INTEL
, PCI_DEVICE_ID_INTEL_NTB_B2B_JSF
)},
93 {PCI_VDEVICE(INTEL
, PCI_DEVICE_ID_INTEL_NTB_B2B_SNB
)},
94 {PCI_VDEVICE(INTEL
, PCI_DEVICE_ID_INTEL_NTB_B2B_IVT
)},
95 {PCI_VDEVICE(INTEL
, PCI_DEVICE_ID_INTEL_NTB_B2B_HSX
)},
96 {PCI_VDEVICE(INTEL
, PCI_DEVICE_ID_INTEL_NTB_PS_JSF
)},
97 {PCI_VDEVICE(INTEL
, PCI_DEVICE_ID_INTEL_NTB_PS_SNB
)},
98 {PCI_VDEVICE(INTEL
, PCI_DEVICE_ID_INTEL_NTB_PS_IVT
)},
99 {PCI_VDEVICE(INTEL
, PCI_DEVICE_ID_INTEL_NTB_PS_HSX
)},
100 {PCI_VDEVICE(INTEL
, PCI_DEVICE_ID_INTEL_NTB_SS_JSF
)},
101 {PCI_VDEVICE(INTEL
, PCI_DEVICE_ID_INTEL_NTB_SS_SNB
)},
102 {PCI_VDEVICE(INTEL
, PCI_DEVICE_ID_INTEL_NTB_SS_IVT
)},
103 {PCI_VDEVICE(INTEL
, PCI_DEVICE_ID_INTEL_NTB_SS_HSX
)},
106 MODULE_DEVICE_TABLE(pci
, ntb_pci_tbl
);
108 static int is_ntb_xeon(struct ntb_device
*ndev
)
110 switch (ndev
->pdev
->device
) {
111 case PCI_DEVICE_ID_INTEL_NTB_SS_JSF
:
112 case PCI_DEVICE_ID_INTEL_NTB_SS_SNB
:
113 case PCI_DEVICE_ID_INTEL_NTB_SS_IVT
:
114 case PCI_DEVICE_ID_INTEL_NTB_SS_HSX
:
115 case PCI_DEVICE_ID_INTEL_NTB_PS_JSF
:
116 case PCI_DEVICE_ID_INTEL_NTB_PS_SNB
:
117 case PCI_DEVICE_ID_INTEL_NTB_PS_IVT
:
118 case PCI_DEVICE_ID_INTEL_NTB_PS_HSX
:
119 case PCI_DEVICE_ID_INTEL_NTB_B2B_JSF
:
120 case PCI_DEVICE_ID_INTEL_NTB_B2B_SNB
:
121 case PCI_DEVICE_ID_INTEL_NTB_B2B_IVT
:
122 case PCI_DEVICE_ID_INTEL_NTB_B2B_HSX
:
131 static int is_ntb_atom(struct ntb_device
*ndev
)
133 switch (ndev
->pdev
->device
) {
134 case PCI_DEVICE_ID_INTEL_NTB_B2B_BWD
:
143 static void ntb_set_errata_flags(struct ntb_device
*ndev
)
145 switch (ndev
->pdev
->device
) {
147 * this workaround applies to all platform up to IvyBridge
148 * Haswell has splitbar support and use a different workaround
150 case PCI_DEVICE_ID_INTEL_NTB_SS_JSF
:
151 case PCI_DEVICE_ID_INTEL_NTB_SS_SNB
:
152 case PCI_DEVICE_ID_INTEL_NTB_SS_IVT
:
153 case PCI_DEVICE_ID_INTEL_NTB_SS_HSX
:
154 case PCI_DEVICE_ID_INTEL_NTB_PS_JSF
:
155 case PCI_DEVICE_ID_INTEL_NTB_PS_SNB
:
156 case PCI_DEVICE_ID_INTEL_NTB_PS_IVT
:
157 case PCI_DEVICE_ID_INTEL_NTB_PS_HSX
:
158 case PCI_DEVICE_ID_INTEL_NTB_B2B_JSF
:
159 case PCI_DEVICE_ID_INTEL_NTB_B2B_SNB
:
160 case PCI_DEVICE_ID_INTEL_NTB_B2B_IVT
:
161 case PCI_DEVICE_ID_INTEL_NTB_B2B_HSX
:
162 ndev
->wa_flags
|= WA_SNB_ERR
;
168 * ntb_register_event_callback() - register event callback
169 * @ndev: pointer to ntb_device instance
170 * @func: callback function to register
172 * This function registers a callback for any HW driver events such as link
173 * up/down, power management notices and etc.
175 * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
177 int ntb_register_event_callback(struct ntb_device
*ndev
,
178 void (*func
)(void *handle
,
179 enum ntb_hw_event event
))
184 ndev
->event_cb
= func
;
190 * ntb_unregister_event_callback() - unregisters the event callback
191 * @ndev: pointer to ntb_device instance
193 * This function unregisters the existing callback from transport
195 void ntb_unregister_event_callback(struct ntb_device
*ndev
)
197 ndev
->event_cb
= NULL
;
200 static void ntb_irq_work(unsigned long data
)
202 struct ntb_db_cb
*db_cb
= (struct ntb_db_cb
*)data
;
205 rc
= db_cb
->callback(db_cb
->data
, db_cb
->db_num
);
207 tasklet_schedule(&db_cb
->irq_work
);
209 struct ntb_device
*ndev
= db_cb
->ndev
;
212 mask
= readw(ndev
->reg_ofs
.ldb_mask
);
213 clear_bit(db_cb
->db_num
* ndev
->bits_per_vector
, &mask
);
214 writew(mask
, ndev
->reg_ofs
.ldb_mask
);
219 * ntb_register_db_callback() - register a callback for doorbell interrupt
220 * @ndev: pointer to ntb_device instance
221 * @idx: doorbell index to register callback, zero based
222 * @data: pointer to be returned to caller with every callback
223 * @func: callback function to register
225 * This function registers a callback function for the doorbell interrupt
226 * on the primary side. The function will unmask the doorbell as well to
229 * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
231 int ntb_register_db_callback(struct ntb_device
*ndev
, unsigned int idx
,
232 void *data
, int (*func
)(void *data
, int db_num
))
236 if (idx
>= ndev
->max_cbs
|| ndev
->db_cb
[idx
].callback
) {
237 dev_warn(&ndev
->pdev
->dev
, "Invalid Index.\n");
241 ndev
->db_cb
[idx
].callback
= func
;
242 ndev
->db_cb
[idx
].data
= data
;
243 ndev
->db_cb
[idx
].ndev
= ndev
;
245 tasklet_init(&ndev
->db_cb
[idx
].irq_work
, ntb_irq_work
,
246 (unsigned long) &ndev
->db_cb
[idx
]);
248 /* unmask interrupt */
249 mask
= readw(ndev
->reg_ofs
.ldb_mask
);
250 clear_bit(idx
* ndev
->bits_per_vector
, &mask
);
251 writew(mask
, ndev
->reg_ofs
.ldb_mask
);
257 * ntb_unregister_db_callback() - unregister a callback for doorbell interrupt
258 * @ndev: pointer to ntb_device instance
259 * @idx: doorbell index to register callback, zero based
261 * This function unregisters a callback function for the doorbell interrupt
262 * on the primary side. The function will also mask the said doorbell.
264 void ntb_unregister_db_callback(struct ntb_device
*ndev
, unsigned int idx
)
268 if (idx
>= ndev
->max_cbs
|| !ndev
->db_cb
[idx
].callback
)
271 mask
= readw(ndev
->reg_ofs
.ldb_mask
);
272 set_bit(idx
* ndev
->bits_per_vector
, &mask
);
273 writew(mask
, ndev
->reg_ofs
.ldb_mask
);
275 tasklet_disable(&ndev
->db_cb
[idx
].irq_work
);
277 ndev
->db_cb
[idx
].callback
= NULL
;
281 * ntb_find_transport() - find the transport pointer
282 * @transport: pointer to pci device
284 * Given the pci device pointer, return the transport pointer passed in when
285 * the transport attached when it was inited.
287 * RETURNS: pointer to transport.
289 void *ntb_find_transport(struct pci_dev
*pdev
)
291 struct ntb_device
*ndev
= pci_get_drvdata(pdev
);
292 return ndev
->ntb_transport
;
296 * ntb_register_transport() - Register NTB transport with NTB HW driver
297 * @transport: transport identifier
299 * This function allows a transport to reserve the hardware driver for
302 * RETURNS: pointer to ntb_device, NULL on error.
304 struct ntb_device
*ntb_register_transport(struct pci_dev
*pdev
, void *transport
)
306 struct ntb_device
*ndev
= pci_get_drvdata(pdev
);
308 if (ndev
->ntb_transport
)
311 ndev
->ntb_transport
= transport
;
316 * ntb_unregister_transport() - Unregister the transport with the NTB HW driver
317 * @ndev - ntb_device of the transport to be freed
319 * This function unregisters the transport from the HW driver and performs any
320 * necessary cleanups.
322 void ntb_unregister_transport(struct ntb_device
*ndev
)
326 if (!ndev
->ntb_transport
)
329 for (i
= 0; i
< ndev
->max_cbs
; i
++)
330 ntb_unregister_db_callback(ndev
, i
);
332 ntb_unregister_event_callback(ndev
);
333 ndev
->ntb_transport
= NULL
;
337 * ntb_write_local_spad() - write to the secondary scratchpad register
338 * @ndev: pointer to ntb_device instance
339 * @idx: index to the scratchpad register, 0 based
340 * @val: the data value to put into the register
342 * This function allows writing of a 32bit value to the indexed scratchpad
343 * register. This writes over the data mirrored to the local scratchpad register
344 * by the remote system.
346 * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
348 int ntb_write_local_spad(struct ntb_device
*ndev
, unsigned int idx
, u32 val
)
350 if (idx
>= ndev
->limits
.max_spads
)
353 dev_dbg(&ndev
->pdev
->dev
, "Writing %x to local scratch pad index %d\n",
355 writel(val
, ndev
->reg_ofs
.spad_read
+ idx
* 4);
361 * ntb_read_local_spad() - read from the primary scratchpad register
362 * @ndev: pointer to ntb_device instance
363 * @idx: index to scratchpad register, 0 based
364 * @val: pointer to 32bit integer for storing the register value
366 * This function allows reading of the 32bit scratchpad register on
367 * the primary (internal) side. This allows the local system to read data
368 * written and mirrored to the scratchpad register by the remote system.
370 * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
372 int ntb_read_local_spad(struct ntb_device
*ndev
, unsigned int idx
, u32
*val
)
374 if (idx
>= ndev
->limits
.max_spads
)
377 *val
= readl(ndev
->reg_ofs
.spad_write
+ idx
* 4);
378 dev_dbg(&ndev
->pdev
->dev
,
379 "Reading %x from local scratch pad index %d\n", *val
, idx
);
385 * ntb_write_remote_spad() - write to the secondary scratchpad register
386 * @ndev: pointer to ntb_device instance
387 * @idx: index to the scratchpad register, 0 based
388 * @val: the data value to put into the register
390 * This function allows writing of a 32bit value to the indexed scratchpad
391 * register. The register resides on the secondary (external) side. This allows
392 * the local system to write data to be mirrored to the remote systems
393 * scratchpad register.
395 * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
397 int ntb_write_remote_spad(struct ntb_device
*ndev
, unsigned int idx
, u32 val
)
399 if (idx
>= ndev
->limits
.max_spads
)
402 dev_dbg(&ndev
->pdev
->dev
, "Writing %x to remote scratch pad index %d\n",
404 writel(val
, ndev
->reg_ofs
.spad_write
+ idx
* 4);
410 * ntb_read_remote_spad() - read from the primary scratchpad register
411 * @ndev: pointer to ntb_device instance
412 * @idx: index to scratchpad register, 0 based
413 * @val: pointer to 32bit integer for storing the register value
415 * This function allows reading of the 32bit scratchpad register on
416 * the primary (internal) side. This alloows the local system to read the data
417 * it wrote to be mirrored on the remote system.
419 * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
421 int ntb_read_remote_spad(struct ntb_device
*ndev
, unsigned int idx
, u32
*val
)
423 if (idx
>= ndev
->limits
.max_spads
)
426 *val
= readl(ndev
->reg_ofs
.spad_read
+ idx
* 4);
427 dev_dbg(&ndev
->pdev
->dev
,
428 "Reading %x from remote scratch pad index %d\n", *val
, idx
);
434 * ntb_get_mw_base() - get addr for the NTB memory window
435 * @ndev: pointer to ntb_device instance
436 * @mw: memory window number
438 * This function provides the base address of the memory window specified.
440 * RETURNS: address, or NULL on error.
442 resource_size_t
ntb_get_mw_base(struct ntb_device
*ndev
, unsigned int mw
)
444 if (mw
>= ntb_max_mw(ndev
))
447 return pci_resource_start(ndev
->pdev
, MW_TO_BAR(mw
));
451 * ntb_get_mw_vbase() - get virtual addr for the NTB memory window
452 * @ndev: pointer to ntb_device instance
453 * @mw: memory window number
455 * This function provides the base virtual address of the memory window
458 * RETURNS: pointer to virtual address, or NULL on error.
460 void __iomem
*ntb_get_mw_vbase(struct ntb_device
*ndev
, unsigned int mw
)
462 if (mw
>= ntb_max_mw(ndev
))
465 return ndev
->mw
[mw
].vbase
;
469 * ntb_get_mw_size() - return size of NTB memory window
470 * @ndev: pointer to ntb_device instance
471 * @mw: memory window number
473 * This function provides the physical size of the memory window specified
475 * RETURNS: the size of the memory window or zero on error
477 u64
ntb_get_mw_size(struct ntb_device
*ndev
, unsigned int mw
)
479 if (mw
>= ntb_max_mw(ndev
))
482 return ndev
->mw
[mw
].bar_sz
;
486 * ntb_set_mw_addr - set the memory window address
487 * @ndev: pointer to ntb_device instance
488 * @mw: memory window number
489 * @addr: base address for data
491 * This function sets the base physical address of the memory window. This
492 * memory address is where data from the remote system will be transfered into
493 * or out of depending on how the transport is configured.
495 void ntb_set_mw_addr(struct ntb_device
*ndev
, unsigned int mw
, u64 addr
)
497 if (mw
>= ntb_max_mw(ndev
))
500 dev_dbg(&ndev
->pdev
->dev
, "Writing addr %Lx to BAR %d\n", addr
,
503 ndev
->mw
[mw
].phys_addr
= addr
;
505 switch (MW_TO_BAR(mw
)) {
507 writeq(addr
, ndev
->reg_ofs
.bar2_xlat
);
511 writel(addr
, ndev
->reg_ofs
.bar4_xlat
);
513 writeq(addr
, ndev
->reg_ofs
.bar4_xlat
);
516 writel(addr
, ndev
->reg_ofs
.bar5_xlat
);
522 * ntb_ring_doorbell() - Set the doorbell on the secondary/external side
523 * @ndev: pointer to ntb_device instance
524 * @db: doorbell to ring
526 * This function allows triggering of a doorbell on the secondary/external
527 * side that will initiate an interrupt on the remote host
529 * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
531 void ntb_ring_doorbell(struct ntb_device
*ndev
, unsigned int db
)
533 dev_dbg(&ndev
->pdev
->dev
, "%s: ringing doorbell %d\n", __func__
, db
);
535 if (ndev
->hw_type
== BWD_HW
)
536 writeq((u64
) 1 << db
, ndev
->reg_ofs
.rdb
);
538 writew(((1 << ndev
->bits_per_vector
) - 1) <<
539 (db
* ndev
->bits_per_vector
), ndev
->reg_ofs
.rdb
);
542 static void bwd_recover_link(struct ntb_device
*ndev
)
546 /* Driver resets the NTB ModPhy lanes - magic! */
547 writeb(0xe0, ndev
->reg_base
+ BWD_MODPHY_PCSREG6
);
548 writeb(0x40, ndev
->reg_base
+ BWD_MODPHY_PCSREG4
);
549 writeb(0x60, ndev
->reg_base
+ BWD_MODPHY_PCSREG4
);
550 writeb(0x60, ndev
->reg_base
+ BWD_MODPHY_PCSREG6
);
552 /* Driver waits 100ms to allow the NTB ModPhy to settle */
555 /* Clear AER Errors, write to clear */
556 status
= readl(ndev
->reg_base
+ BWD_ERRCORSTS_OFFSET
);
557 dev_dbg(&ndev
->pdev
->dev
, "ERRCORSTS = %x\n", status
);
558 status
&= PCI_ERR_COR_REP_ROLL
;
559 writel(status
, ndev
->reg_base
+ BWD_ERRCORSTS_OFFSET
);
561 /* Clear unexpected electrical idle event in LTSSM, write to clear */
562 status
= readl(ndev
->reg_base
+ BWD_LTSSMERRSTS0_OFFSET
);
563 dev_dbg(&ndev
->pdev
->dev
, "LTSSMERRSTS0 = %x\n", status
);
564 status
|= BWD_LTSSMERRSTS0_UNEXPECTEDEI
;
565 writel(status
, ndev
->reg_base
+ BWD_LTSSMERRSTS0_OFFSET
);
567 /* Clear DeSkew Buffer error, write to clear */
568 status
= readl(ndev
->reg_base
+ BWD_DESKEWSTS_OFFSET
);
569 dev_dbg(&ndev
->pdev
->dev
, "DESKEWSTS = %x\n", status
);
570 status
|= BWD_DESKEWSTS_DBERR
;
571 writel(status
, ndev
->reg_base
+ BWD_DESKEWSTS_OFFSET
);
573 status
= readl(ndev
->reg_base
+ BWD_IBSTERRRCRVSTS0_OFFSET
);
574 dev_dbg(&ndev
->pdev
->dev
, "IBSTERRRCRVSTS0 = %x\n", status
);
575 status
&= BWD_IBIST_ERR_OFLOW
;
576 writel(status
, ndev
->reg_base
+ BWD_IBSTERRRCRVSTS0_OFFSET
);
578 /* Releases the NTB state machine to allow the link to retrain */
579 status
= readl(ndev
->reg_base
+ BWD_LTSSMSTATEJMP_OFFSET
);
580 dev_dbg(&ndev
->pdev
->dev
, "LTSSMSTATEJMP = %x\n", status
);
581 status
&= ~BWD_LTSSMSTATEJMP_FORCEDETECT
;
582 writel(status
, ndev
->reg_base
+ BWD_LTSSMSTATEJMP_OFFSET
);
585 static void ntb_link_event(struct ntb_device
*ndev
, int link_state
)
589 if (ndev
->link_status
== link_state
)
592 if (link_state
== NTB_LINK_UP
) {
595 dev_info(&ndev
->pdev
->dev
, "Link Up\n");
596 ndev
->link_status
= NTB_LINK_UP
;
597 event
= NTB_EVENT_HW_LINK_UP
;
599 if (is_ntb_atom(ndev
) ||
600 ndev
->conn_type
== NTB_CONN_TRANSPARENT
)
601 status
= readw(ndev
->reg_ofs
.lnk_stat
);
603 int rc
= pci_read_config_word(ndev
->pdev
,
604 SNB_LINK_STATUS_OFFSET
,
610 ndev
->link_width
= (status
& NTB_LINK_WIDTH_MASK
) >> 4;
611 ndev
->link_speed
= (status
& NTB_LINK_SPEED_MASK
);
612 dev_info(&ndev
->pdev
->dev
, "Link Width %d, Link Speed %d\n",
613 ndev
->link_width
, ndev
->link_speed
);
615 dev_info(&ndev
->pdev
->dev
, "Link Down\n");
616 ndev
->link_status
= NTB_LINK_DOWN
;
617 event
= NTB_EVENT_HW_LINK_DOWN
;
618 /* Don't modify link width/speed, we need it in link recovery */
621 /* notify the upper layer if we have an event change */
623 ndev
->event_cb(ndev
->ntb_transport
, event
);
626 static int ntb_link_status(struct ntb_device
*ndev
)
630 if (is_ntb_atom(ndev
)) {
633 ntb_cntl
= readl(ndev
->reg_ofs
.lnk_cntl
);
634 if (ntb_cntl
& BWD_CNTL_LINK_DOWN
)
635 link_state
= NTB_LINK_DOWN
;
637 link_state
= NTB_LINK_UP
;
642 rc
= pci_read_config_word(ndev
->pdev
, SNB_LINK_STATUS_OFFSET
,
647 if (status
& NTB_LINK_STATUS_ACTIVE
)
648 link_state
= NTB_LINK_UP
;
650 link_state
= NTB_LINK_DOWN
;
653 ntb_link_event(ndev
, link_state
);
658 static void bwd_link_recovery(struct work_struct
*work
)
660 struct ntb_device
*ndev
= container_of(work
, struct ntb_device
,
664 bwd_recover_link(ndev
);
665 /* There is a potential race between the 2 NTB devices recovering at the
666 * same time. If the times are the same, the link will not recover and
667 * the driver will be stuck in this loop forever. Add a random interval
668 * to the recovery time to prevent this race.
670 msleep(BWD_LINK_RECOVERY_TIME
+ prandom_u32() % BWD_LINK_RECOVERY_TIME
);
672 status32
= readl(ndev
->reg_base
+ BWD_LTSSMSTATEJMP_OFFSET
);
673 if (status32
& BWD_LTSSMSTATEJMP_FORCEDETECT
)
676 status32
= readl(ndev
->reg_base
+ BWD_IBSTERRRCRVSTS0_OFFSET
);
677 if (status32
& BWD_IBIST_ERR_OFLOW
)
680 status32
= readl(ndev
->reg_ofs
.lnk_cntl
);
681 if (!(status32
& BWD_CNTL_LINK_DOWN
)) {
682 unsigned char speed
, width
;
685 status16
= readw(ndev
->reg_ofs
.lnk_stat
);
686 width
= (status16
& NTB_LINK_WIDTH_MASK
) >> 4;
687 speed
= (status16
& NTB_LINK_SPEED_MASK
);
688 if (ndev
->link_width
!= width
|| ndev
->link_speed
!= speed
)
692 schedule_delayed_work(&ndev
->hb_timer
, NTB_HB_TIMEOUT
);
696 schedule_delayed_work(&ndev
->lr_timer
, NTB_HB_TIMEOUT
);
699 /* BWD doesn't have link status interrupt, poll on that platform */
700 static void bwd_link_poll(struct work_struct
*work
)
702 struct ntb_device
*ndev
= container_of(work
, struct ntb_device
,
704 unsigned long ts
= jiffies
;
706 /* If we haven't gotten an interrupt in a while, check the BWD link
709 if (ts
> ndev
->last_ts
+ NTB_HB_TIMEOUT
) {
710 int rc
= ntb_link_status(ndev
);
712 dev_err(&ndev
->pdev
->dev
,
713 "Error determining link status\n");
715 /* Check to see if a link error is the cause of the link down */
716 if (ndev
->link_status
== NTB_LINK_DOWN
) {
717 u32 status32
= readl(ndev
->reg_base
+
718 BWD_LTSSMSTATEJMP_OFFSET
);
719 if (status32
& BWD_LTSSMSTATEJMP_FORCEDETECT
) {
720 schedule_delayed_work(&ndev
->lr_timer
, 0);
726 schedule_delayed_work(&ndev
->hb_timer
, NTB_HB_TIMEOUT
);
729 static int ntb_xeon_setup(struct ntb_device
*ndev
)
731 switch (ndev
->conn_type
) {
733 ndev
->reg_ofs
.ldb
= ndev
->reg_base
+ SNB_PDOORBELL_OFFSET
;
734 ndev
->reg_ofs
.ldb_mask
= ndev
->reg_base
+ SNB_PDBMSK_OFFSET
;
735 ndev
->reg_ofs
.spad_read
= ndev
->reg_base
+ SNB_SPAD_OFFSET
;
736 ndev
->reg_ofs
.bar2_xlat
= ndev
->reg_base
+ SNB_SBAR2XLAT_OFFSET
;
737 ndev
->reg_ofs
.bar4_xlat
= ndev
->reg_base
+ SNB_SBAR4XLAT_OFFSET
;
739 ndev
->reg_ofs
.bar5_xlat
=
740 ndev
->reg_base
+ SNB_SBAR5XLAT_OFFSET
;
741 ndev
->limits
.max_spads
= SNB_MAX_B2B_SPADS
;
743 /* There is a Xeon hardware errata related to writes to
744 * SDOORBELL or B2BDOORBELL in conjunction with inbound access
745 * to NTB MMIO Space, which may hang the system. To workaround
746 * this use the second memory window to access the interrupt and
747 * scratch pad registers on the remote system.
749 if (ndev
->wa_flags
& WA_SNB_ERR
) {
750 if (!ndev
->mw
[ndev
->limits
.max_mw
- 1].bar_sz
)
753 ndev
->limits
.max_db_bits
= SNB_MAX_DB_BITS
;
754 ndev
->reg_ofs
.spad_write
=
755 ndev
->mw
[ndev
->limits
.max_mw
- 1].vbase
+
758 ndev
->mw
[ndev
->limits
.max_mw
- 1].vbase
+
759 SNB_PDOORBELL_OFFSET
;
761 /* Set the Limit register to 4k, the minimum size, to
762 * prevent an illegal access
764 writeq(ndev
->mw
[1].bar_sz
+ 0x1000, ndev
->reg_base
+
765 SNB_PBAR4LMT_OFFSET
);
766 /* HW errata on the Limit registers. They can only be
767 * written when the base register is 4GB aligned and
768 * < 32bit. This should already be the case based on
769 * the driver defaults, but write the Limit registers
770 * first just in case.
773 ndev
->limits
.max_mw
= SNB_ERRATA_MAX_MW
;
775 /* HW Errata on bit 14 of b2bdoorbell register. Writes
776 * will not be mirrored to the remote system. Shrink
777 * the number of bits by one, since bit 14 is the last
780 ndev
->limits
.max_db_bits
= SNB_MAX_DB_BITS
- 1;
781 ndev
->reg_ofs
.spad_write
= ndev
->reg_base
+
783 ndev
->reg_ofs
.rdb
= ndev
->reg_base
+
784 SNB_B2B_DOORBELL_OFFSET
;
786 /* Disable the Limit register, just incase it is set to
787 * something silly. A 64bit write should handle it
788 * regardless of whether it has a split BAR or not.
790 writeq(0, ndev
->reg_base
+ SNB_PBAR4LMT_OFFSET
);
791 /* HW errata on the Limit registers. They can only be
792 * written when the base register is 4GB aligned and
793 * < 32bit. This should already be the case based on
794 * the driver defaults, but write the Limit registers
795 * first just in case.
798 ndev
->limits
.max_mw
= HSX_SPLITBAR_MAX_MW
;
800 ndev
->limits
.max_mw
= SNB_MAX_MW
;
803 /* The Xeon errata workaround requires setting SBAR Base
804 * addresses to known values, so that the PBAR XLAT can be
805 * pointed at SBAR0 of the remote system.
807 if (ndev
->dev_type
== NTB_DEV_USD
) {
808 writeq(SNB_MBAR23_DSD_ADDR
, ndev
->reg_base
+
809 SNB_PBAR2XLAT_OFFSET
);
810 if (ndev
->wa_flags
& WA_SNB_ERR
)
811 writeq(SNB_MBAR01_DSD_ADDR
, ndev
->reg_base
+
812 SNB_PBAR4XLAT_OFFSET
);
814 if (ndev
->split_bar
) {
815 writel(SNB_MBAR4_DSD_ADDR
,
817 SNB_PBAR4XLAT_OFFSET
);
818 writel(SNB_MBAR5_DSD_ADDR
,
820 SNB_PBAR5XLAT_OFFSET
);
822 writeq(SNB_MBAR4_DSD_ADDR
,
824 SNB_PBAR4XLAT_OFFSET
);
826 /* B2B_XLAT_OFFSET is a 64bit register, but can
827 * only take 32bit writes
829 writel(SNB_MBAR01_DSD_ADDR
& 0xffffffff,
830 ndev
->reg_base
+ SNB_B2B_XLAT_OFFSETL
);
831 writel(SNB_MBAR01_DSD_ADDR
>> 32,
832 ndev
->reg_base
+ SNB_B2B_XLAT_OFFSETU
);
835 writeq(SNB_MBAR01_USD_ADDR
, ndev
->reg_base
+
836 SNB_SBAR0BASE_OFFSET
);
837 writeq(SNB_MBAR23_USD_ADDR
, ndev
->reg_base
+
838 SNB_SBAR2BASE_OFFSET
);
839 if (ndev
->split_bar
) {
840 writel(SNB_MBAR4_USD_ADDR
, ndev
->reg_base
+
841 SNB_SBAR4BASE_OFFSET
);
842 writel(SNB_MBAR5_USD_ADDR
, ndev
->reg_base
+
843 SNB_SBAR5BASE_OFFSET
);
845 writeq(SNB_MBAR4_USD_ADDR
, ndev
->reg_base
+
846 SNB_SBAR4BASE_OFFSET
);
848 writeq(SNB_MBAR23_USD_ADDR
, ndev
->reg_base
+
849 SNB_PBAR2XLAT_OFFSET
);
850 if (ndev
->wa_flags
& WA_SNB_ERR
)
851 writeq(SNB_MBAR01_USD_ADDR
, ndev
->reg_base
+
852 SNB_PBAR4XLAT_OFFSET
);
854 if (ndev
->split_bar
) {
855 writel(SNB_MBAR4_USD_ADDR
,
857 SNB_PBAR4XLAT_OFFSET
);
858 writel(SNB_MBAR5_USD_ADDR
,
860 SNB_PBAR5XLAT_OFFSET
);
862 writeq(SNB_MBAR4_USD_ADDR
,
864 SNB_PBAR4XLAT_OFFSET
);
867 * B2B_XLAT_OFFSET is a 64bit register, but can
868 * only take 32bit writes
870 writel(SNB_MBAR01_USD_ADDR
& 0xffffffff,
871 ndev
->reg_base
+ SNB_B2B_XLAT_OFFSETL
);
872 writel(SNB_MBAR01_USD_ADDR
>> 32,
873 ndev
->reg_base
+ SNB_B2B_XLAT_OFFSETU
);
875 writeq(SNB_MBAR01_DSD_ADDR
, ndev
->reg_base
+
876 SNB_SBAR0BASE_OFFSET
);
877 writeq(SNB_MBAR23_DSD_ADDR
, ndev
->reg_base
+
878 SNB_SBAR2BASE_OFFSET
);
879 if (ndev
->split_bar
) {
880 writel(SNB_MBAR4_DSD_ADDR
, ndev
->reg_base
+
881 SNB_SBAR4BASE_OFFSET
);
882 writel(SNB_MBAR5_DSD_ADDR
, ndev
->reg_base
+
883 SNB_SBAR5BASE_OFFSET
);
885 writeq(SNB_MBAR4_DSD_ADDR
, ndev
->reg_base
+
886 SNB_SBAR4BASE_OFFSET
);
891 if (ndev
->wa_flags
& WA_SNB_ERR
) {
892 dev_err(&ndev
->pdev
->dev
,
893 "NTB-RP disabled due to hardware errata.\n");
897 /* Scratch pads need to have exclusive access from the primary
898 * or secondary side. Halve the num spads so that each side can
899 * have an equal amount.
901 ndev
->limits
.max_spads
= SNB_MAX_COMPAT_SPADS
/ 2;
902 ndev
->limits
.max_db_bits
= SNB_MAX_DB_BITS
;
903 /* Note: The SDOORBELL is the cause of the errata. You REALLY
904 * don't want to touch it.
906 ndev
->reg_ofs
.rdb
= ndev
->reg_base
+ SNB_SDOORBELL_OFFSET
;
907 ndev
->reg_ofs
.ldb
= ndev
->reg_base
+ SNB_PDOORBELL_OFFSET
;
908 ndev
->reg_ofs
.ldb_mask
= ndev
->reg_base
+ SNB_PDBMSK_OFFSET
;
909 /* Offset the start of the spads to correspond to whether it is
910 * primary or secondary
912 ndev
->reg_ofs
.spad_write
= ndev
->reg_base
+ SNB_SPAD_OFFSET
+
913 ndev
->limits
.max_spads
* 4;
914 ndev
->reg_ofs
.spad_read
= ndev
->reg_base
+ SNB_SPAD_OFFSET
;
915 ndev
->reg_ofs
.bar2_xlat
= ndev
->reg_base
+ SNB_SBAR2XLAT_OFFSET
;
916 ndev
->reg_ofs
.bar4_xlat
= ndev
->reg_base
+ SNB_SBAR4XLAT_OFFSET
;
917 if (ndev
->split_bar
) {
918 ndev
->reg_ofs
.bar5_xlat
=
919 ndev
->reg_base
+ SNB_SBAR5XLAT_OFFSET
;
920 ndev
->limits
.max_mw
= HSX_SPLITBAR_MAX_MW
;
922 ndev
->limits
.max_mw
= SNB_MAX_MW
;
924 case NTB_CONN_TRANSPARENT
:
925 if (ndev
->wa_flags
& WA_SNB_ERR
) {
926 dev_err(&ndev
->pdev
->dev
,
927 "NTB-TRANSPARENT disabled due to hardware errata.\n");
931 /* Scratch pads need to have exclusive access from the primary
932 * or secondary side. Halve the num spads so that each side can
933 * have an equal amount.
935 ndev
->limits
.max_spads
= SNB_MAX_COMPAT_SPADS
/ 2;
936 ndev
->limits
.max_db_bits
= SNB_MAX_DB_BITS
;
937 ndev
->reg_ofs
.rdb
= ndev
->reg_base
+ SNB_PDOORBELL_OFFSET
;
938 ndev
->reg_ofs
.ldb
= ndev
->reg_base
+ SNB_SDOORBELL_OFFSET
;
939 ndev
->reg_ofs
.ldb_mask
= ndev
->reg_base
+ SNB_SDBMSK_OFFSET
;
940 ndev
->reg_ofs
.spad_write
= ndev
->reg_base
+ SNB_SPAD_OFFSET
;
941 /* Offset the start of the spads to correspond to whether it is
942 * primary or secondary
944 ndev
->reg_ofs
.spad_read
= ndev
->reg_base
+ SNB_SPAD_OFFSET
+
945 ndev
->limits
.max_spads
* 4;
946 ndev
->reg_ofs
.bar2_xlat
= ndev
->reg_base
+ SNB_PBAR2XLAT_OFFSET
;
947 ndev
->reg_ofs
.bar4_xlat
= ndev
->reg_base
+ SNB_PBAR4XLAT_OFFSET
;
949 if (ndev
->split_bar
) {
950 ndev
->reg_ofs
.bar5_xlat
=
951 ndev
->reg_base
+ SNB_PBAR5XLAT_OFFSET
;
952 ndev
->limits
.max_mw
= HSX_SPLITBAR_MAX_MW
;
954 ndev
->limits
.max_mw
= SNB_MAX_MW
;
958 * we should never hit this. the detect function should've
959 * take cared of everything.
964 ndev
->reg_ofs
.lnk_cntl
= ndev
->reg_base
+ SNB_NTBCNTL_OFFSET
;
965 ndev
->reg_ofs
.lnk_stat
= ndev
->reg_base
+ SNB_SLINK_STATUS_OFFSET
;
966 ndev
->reg_ofs
.spci_cmd
= ndev
->reg_base
+ SNB_PCICMD_OFFSET
;
968 ndev
->limits
.msix_cnt
= SNB_MSIX_CNT
;
969 ndev
->bits_per_vector
= SNB_DB_BITS_PER_VEC
;
974 static int ntb_bwd_setup(struct ntb_device
*ndev
)
979 ndev
->hw_type
= BWD_HW
;
981 rc
= pci_read_config_dword(ndev
->pdev
, NTB_PPD_OFFSET
, &val
);
985 switch ((val
& BWD_PPD_CONN_TYPE
) >> 8) {
987 ndev
->conn_type
= NTB_CONN_B2B
;
991 dev_err(&ndev
->pdev
->dev
, "Unsupported NTB configuration\n");
995 if (val
& BWD_PPD_DEV_TYPE
)
996 ndev
->dev_type
= NTB_DEV_DSD
;
998 ndev
->dev_type
= NTB_DEV_USD
;
1000 /* Initiate PCI-E link training */
1001 rc
= pci_write_config_dword(ndev
->pdev
, NTB_PPD_OFFSET
,
1002 val
| BWD_PPD_INIT_LINK
);
1006 ndev
->reg_ofs
.ldb
= ndev
->reg_base
+ BWD_PDOORBELL_OFFSET
;
1007 ndev
->reg_ofs
.ldb_mask
= ndev
->reg_base
+ BWD_PDBMSK_OFFSET
;
1008 ndev
->reg_ofs
.rdb
= ndev
->reg_base
+ BWD_B2B_DOORBELL_OFFSET
;
1009 ndev
->reg_ofs
.bar2_xlat
= ndev
->reg_base
+ BWD_SBAR2XLAT_OFFSET
;
1010 ndev
->reg_ofs
.bar4_xlat
= ndev
->reg_base
+ BWD_SBAR4XLAT_OFFSET
;
1011 ndev
->reg_ofs
.lnk_cntl
= ndev
->reg_base
+ BWD_NTBCNTL_OFFSET
;
1012 ndev
->reg_ofs
.lnk_stat
= ndev
->reg_base
+ BWD_LINK_STATUS_OFFSET
;
1013 ndev
->reg_ofs
.spad_read
= ndev
->reg_base
+ BWD_SPAD_OFFSET
;
1014 ndev
->reg_ofs
.spad_write
= ndev
->reg_base
+ BWD_B2B_SPAD_OFFSET
;
1015 ndev
->reg_ofs
.spci_cmd
= ndev
->reg_base
+ BWD_PCICMD_OFFSET
;
1016 ndev
->limits
.max_mw
= BWD_MAX_MW
;
1017 ndev
->limits
.max_spads
= BWD_MAX_SPADS
;
1018 ndev
->limits
.max_db_bits
= BWD_MAX_DB_BITS
;
1019 ndev
->limits
.msix_cnt
= BWD_MSIX_CNT
;
1020 ndev
->bits_per_vector
= BWD_DB_BITS_PER_VEC
;
1022 /* Since bwd doesn't have a link interrupt, setup a poll timer */
1023 INIT_DELAYED_WORK(&ndev
->hb_timer
, bwd_link_poll
);
1024 INIT_DELAYED_WORK(&ndev
->lr_timer
, bwd_link_recovery
);
1025 schedule_delayed_work(&ndev
->hb_timer
, NTB_HB_TIMEOUT
);
1030 static int ntb_device_setup(struct ntb_device
*ndev
)
1034 if (is_ntb_xeon(ndev
))
1035 rc
= ntb_xeon_setup(ndev
);
1036 else if (is_ntb_atom(ndev
))
1037 rc
= ntb_bwd_setup(ndev
);
1044 if (ndev
->conn_type
== NTB_CONN_B2B
)
1045 /* Enable Bus Master and Memory Space on the secondary side */
1046 writew(PCI_COMMAND_MEMORY
| PCI_COMMAND_MASTER
,
1047 ndev
->reg_ofs
.spci_cmd
);
1052 static void ntb_device_free(struct ntb_device
*ndev
)
1054 if (is_ntb_atom(ndev
)) {
1055 cancel_delayed_work_sync(&ndev
->hb_timer
);
1056 cancel_delayed_work_sync(&ndev
->lr_timer
);
1060 static irqreturn_t
bwd_callback_msix_irq(int irq
, void *data
)
1062 struct ntb_db_cb
*db_cb
= data
;
1063 struct ntb_device
*ndev
= db_cb
->ndev
;
1066 dev_dbg(&ndev
->pdev
->dev
, "MSI-X irq %d received for DB %d\n", irq
,
1069 mask
= readw(ndev
->reg_ofs
.ldb_mask
);
1070 set_bit(db_cb
->db_num
* ndev
->bits_per_vector
, &mask
);
1071 writew(mask
, ndev
->reg_ofs
.ldb_mask
);
1073 tasklet_schedule(&db_cb
->irq_work
);
1075 /* No need to check for the specific HB irq, any interrupt means
1078 ndev
->last_ts
= jiffies
;
1080 writeq((u64
) 1 << db_cb
->db_num
, ndev
->reg_ofs
.ldb
);
1085 static irqreturn_t
xeon_callback_msix_irq(int irq
, void *data
)
1087 struct ntb_db_cb
*db_cb
= data
;
1088 struct ntb_device
*ndev
= db_cb
->ndev
;
1091 dev_dbg(&ndev
->pdev
->dev
, "MSI-X irq %d received for DB %d\n", irq
,
1094 mask
= readw(ndev
->reg_ofs
.ldb_mask
);
1095 set_bit(db_cb
->db_num
* ndev
->bits_per_vector
, &mask
);
1096 writew(mask
, ndev
->reg_ofs
.ldb_mask
);
1098 tasklet_schedule(&db_cb
->irq_work
);
1100 /* On Sandybridge, there are 16 bits in the interrupt register
1101 * but only 4 vectors. So, 5 bits are assigned to the first 3
1102 * vectors, with the 4th having a single bit for link
1105 writew(((1 << ndev
->bits_per_vector
) - 1) <<
1106 (db_cb
->db_num
* ndev
->bits_per_vector
), ndev
->reg_ofs
.ldb
);
1111 /* Since we do not have a HW doorbell in BWD, this is only used in JF/JT */
1112 static irqreturn_t
xeon_event_msix_irq(int irq
, void *dev
)
1114 struct ntb_device
*ndev
= dev
;
1117 dev_dbg(&ndev
->pdev
->dev
, "MSI-X irq %d received for Events\n", irq
);
1119 rc
= ntb_link_status(ndev
);
1121 dev_err(&ndev
->pdev
->dev
, "Error determining link status\n");
1123 /* bit 15 is always the link bit */
1124 writew(1 << SNB_LINK_DB
, ndev
->reg_ofs
.ldb
);
1129 static irqreturn_t
ntb_interrupt(int irq
, void *dev
)
1131 struct ntb_device
*ndev
= dev
;
1134 if (is_ntb_atom(ndev
)) {
1135 u64 ldb
= readq(ndev
->reg_ofs
.ldb
);
1137 dev_dbg(&ndev
->pdev
->dev
, "irq %d - ldb = %Lx\n", irq
, ldb
);
1142 bwd_callback_msix_irq(irq
, &ndev
->db_cb
[i
]);
1145 u16 ldb
= readw(ndev
->reg_ofs
.ldb
);
1147 dev_dbg(&ndev
->pdev
->dev
, "irq %d - ldb = %x\n", irq
, ldb
);
1149 if (ldb
& SNB_DB_HW_LINK
) {
1150 xeon_event_msix_irq(irq
, dev
);
1151 ldb
&= ~SNB_DB_HW_LINK
;
1157 xeon_callback_msix_irq(irq
, &ndev
->db_cb
[i
]);
1164 static int ntb_setup_snb_msix(struct ntb_device
*ndev
, int msix_entries
)
1166 struct pci_dev
*pdev
= ndev
->pdev
;
1167 struct msix_entry
*msix
;
1170 if (msix_entries
< ndev
->limits
.msix_cnt
)
1173 rc
= pci_enable_msix_exact(pdev
, ndev
->msix_entries
, msix_entries
);
1177 for (i
= 0; i
< msix_entries
; i
++) {
1178 msix
= &ndev
->msix_entries
[i
];
1179 WARN_ON(!msix
->vector
);
1181 if (i
== msix_entries
- 1) {
1182 rc
= request_irq(msix
->vector
,
1183 xeon_event_msix_irq
, 0,
1184 "ntb-event-msix", ndev
);
1188 rc
= request_irq(msix
->vector
,
1189 xeon_callback_msix_irq
, 0,
1190 "ntb-callback-msix",
1197 ndev
->num_msix
= msix_entries
;
1198 ndev
->max_cbs
= msix_entries
- 1;
1204 /* Code never reaches here for entry nr 'ndev->num_msix - 1' */
1205 msix
= &ndev
->msix_entries
[i
];
1206 free_irq(msix
->vector
, &ndev
->db_cb
[i
]);
1209 pci_disable_msix(pdev
);
1215 static int ntb_setup_bwd_msix(struct ntb_device
*ndev
, int msix_entries
)
1217 struct pci_dev
*pdev
= ndev
->pdev
;
1218 struct msix_entry
*msix
;
1221 msix_entries
= pci_enable_msix_range(pdev
, ndev
->msix_entries
,
1223 if (msix_entries
< 0)
1224 return msix_entries
;
1226 for (i
= 0; i
< msix_entries
; i
++) {
1227 msix
= &ndev
->msix_entries
[i
];
1228 WARN_ON(!msix
->vector
);
1230 rc
= request_irq(msix
->vector
, bwd_callback_msix_irq
, 0,
1231 "ntb-callback-msix", &ndev
->db_cb
[i
]);
1236 ndev
->num_msix
= msix_entries
;
1237 ndev
->max_cbs
= msix_entries
;
1243 free_irq(msix
->vector
, &ndev
->db_cb
[i
]);
1245 pci_disable_msix(pdev
);
1251 static int ntb_setup_msix(struct ntb_device
*ndev
)
1253 struct pci_dev
*pdev
= ndev
->pdev
;
1257 msix_entries
= pci_msix_vec_count(pdev
);
1258 if (msix_entries
< 0) {
1261 } else if (msix_entries
> ndev
->limits
.msix_cnt
) {
1266 ndev
->msix_entries
= kmalloc(sizeof(struct msix_entry
) * msix_entries
,
1268 if (!ndev
->msix_entries
) {
1273 for (i
= 0; i
< msix_entries
; i
++)
1274 ndev
->msix_entries
[i
].entry
= i
;
1276 if (is_ntb_atom(ndev
))
1277 rc
= ntb_setup_bwd_msix(ndev
, msix_entries
);
1279 rc
= ntb_setup_snb_msix(ndev
, msix_entries
);
1286 kfree(ndev
->msix_entries
);
1288 dev_err(&pdev
->dev
, "Error allocating MSI-X interrupt\n");
1292 static int ntb_setup_msi(struct ntb_device
*ndev
)
1294 struct pci_dev
*pdev
= ndev
->pdev
;
1297 rc
= pci_enable_msi(pdev
);
1301 rc
= request_irq(pdev
->irq
, ntb_interrupt
, 0, "ntb-msi", ndev
);
1303 pci_disable_msi(pdev
);
1304 dev_err(&pdev
->dev
, "Error allocating MSI interrupt\n");
1311 static int ntb_setup_intx(struct ntb_device
*ndev
)
1313 struct pci_dev
*pdev
= ndev
->pdev
;
1318 /* Verify intx is enabled */
1321 rc
= request_irq(pdev
->irq
, ntb_interrupt
, IRQF_SHARED
, "ntb-intx",
1329 static int ntb_setup_interrupts(struct ntb_device
*ndev
)
1333 /* On BWD, disable all interrupts. On SNB, disable all but Link
1334 * Interrupt. The rest will be unmasked as callbacks are registered.
1336 if (is_ntb_atom(ndev
))
1337 writeq(~0, ndev
->reg_ofs
.ldb_mask
);
1339 u16 var
= 1 << SNB_LINK_DB
;
1340 writew(~var
, ndev
->reg_ofs
.ldb_mask
);
1343 rc
= ntb_setup_msix(ndev
);
1347 ndev
->bits_per_vector
= 1;
1348 ndev
->max_cbs
= ndev
->limits
.max_db_bits
;
1350 rc
= ntb_setup_msi(ndev
);
1354 rc
= ntb_setup_intx(ndev
);
1356 dev_err(&ndev
->pdev
->dev
, "no usable interrupts\n");
1364 static void ntb_free_interrupts(struct ntb_device
*ndev
)
1366 struct pci_dev
*pdev
= ndev
->pdev
;
1368 /* mask interrupts */
1369 if (is_ntb_atom(ndev
))
1370 writeq(~0, ndev
->reg_ofs
.ldb_mask
);
1372 writew(~0, ndev
->reg_ofs
.ldb_mask
);
1374 if (ndev
->num_msix
) {
1375 struct msix_entry
*msix
;
1378 for (i
= 0; i
< ndev
->num_msix
; i
++) {
1379 msix
= &ndev
->msix_entries
[i
];
1380 if (is_ntb_xeon(ndev
) && i
== ndev
->num_msix
- 1)
1381 free_irq(msix
->vector
, ndev
);
1383 free_irq(msix
->vector
, &ndev
->db_cb
[i
]);
1385 pci_disable_msix(pdev
);
1386 kfree(ndev
->msix_entries
);
1388 free_irq(pdev
->irq
, ndev
);
1390 if (pci_dev_msi_enabled(pdev
))
1391 pci_disable_msi(pdev
);
1395 static int ntb_create_callbacks(struct ntb_device
*ndev
)
1399 /* Chicken-egg issue. We won't know how many callbacks are necessary
1400 * until we see how many MSI-X vectors we get, but these pointers need
1401 * to be passed into the MSI-X register function. So, we allocate the
1402 * max, knowing that they might not all be used, to work around this.
1404 ndev
->db_cb
= kcalloc(ndev
->limits
.max_db_bits
,
1405 sizeof(struct ntb_db_cb
),
1410 for (i
= 0; i
< ndev
->limits
.max_db_bits
; i
++) {
1411 ndev
->db_cb
[i
].db_num
= i
;
1412 ndev
->db_cb
[i
].ndev
= ndev
;
1418 static void ntb_free_callbacks(struct ntb_device
*ndev
)
1422 for (i
= 0; i
< ndev
->limits
.max_db_bits
; i
++)
1423 ntb_unregister_db_callback(ndev
, i
);
1428 static ssize_t
ntb_debugfs_read(struct file
*filp
, char __user
*ubuf
,
1429 size_t count
, loff_t
*offp
)
1431 struct ntb_device
*ndev
;
1433 ssize_t ret
, offset
, out_count
;
1437 buf
= kmalloc(out_count
, GFP_KERNEL
);
1441 ndev
= filp
->private_data
;
1443 offset
+= snprintf(buf
+ offset
, out_count
- offset
,
1444 "NTB Device Information:\n");
1445 offset
+= snprintf(buf
+ offset
, out_count
- offset
,
1446 "Connection Type - \t\t%s\n",
1447 ndev
->conn_type
== NTB_CONN_TRANSPARENT
?
1448 "Transparent" : (ndev
->conn_type
== NTB_CONN_B2B
) ?
1449 "Back to back" : "Root Port");
1450 offset
+= snprintf(buf
+ offset
, out_count
- offset
,
1451 "Device Type - \t\t\t%s\n",
1452 ndev
->dev_type
== NTB_DEV_USD
?
1453 "DSD/USP" : "USD/DSP");
1454 offset
+= snprintf(buf
+ offset
, out_count
- offset
,
1455 "Max Number of Callbacks - \t%u\n",
1457 offset
+= snprintf(buf
+ offset
, out_count
- offset
,
1458 "Link Status - \t\t\t%s\n",
1459 ntb_hw_link_status(ndev
) ? "Up" : "Down");
1460 if (ntb_hw_link_status(ndev
)) {
1461 offset
+= snprintf(buf
+ offset
, out_count
- offset
,
1462 "Link Speed - \t\t\tPCI-E Gen %u\n",
1464 offset
+= snprintf(buf
+ offset
, out_count
- offset
,
1465 "Link Width - \t\t\tx%u\n",
1469 if (is_ntb_xeon(ndev
)) {
1474 offset
+= snprintf(buf
+ offset
, out_count
- offset
,
1475 "\nNTB Device Statistics:\n");
1476 offset
+= snprintf(buf
+ offset
, out_count
- offset
,
1477 "Upstream Memory Miss - \t%u\n",
1478 readw(ndev
->reg_base
+
1479 SNB_USMEMMISS_OFFSET
));
1481 offset
+= snprintf(buf
+ offset
, out_count
- offset
,
1482 "\nNTB Hardware Errors:\n");
1484 rc
= pci_read_config_word(ndev
->pdev
, SNB_DEVSTS_OFFSET
,
1487 offset
+= snprintf(buf
+ offset
, out_count
- offset
,
1488 "DEVSTS - \t%#06x\n", status16
);
1490 rc
= pci_read_config_word(ndev
->pdev
, SNB_LINK_STATUS_OFFSET
,
1493 offset
+= snprintf(buf
+ offset
, out_count
- offset
,
1494 "LNKSTS - \t%#06x\n", status16
);
1496 rc
= pci_read_config_dword(ndev
->pdev
, SNB_UNCERRSTS_OFFSET
,
1499 offset
+= snprintf(buf
+ offset
, out_count
- offset
,
1500 "UNCERRSTS - \t%#010x\n", status32
);
1502 rc
= pci_read_config_dword(ndev
->pdev
, SNB_CORERRSTS_OFFSET
,
1505 offset
+= snprintf(buf
+ offset
, out_count
- offset
,
1506 "CORERRSTS - \t%#010x\n", status32
);
1509 if (offset
> out_count
)
1512 ret
= simple_read_from_buffer(ubuf
, count
, offp
, buf
, offset
);
1517 static const struct file_operations ntb_debugfs_info
= {
1518 .owner
= THIS_MODULE
,
1519 .open
= simple_open
,
1520 .read
= ntb_debugfs_read
,
1523 static void ntb_setup_debugfs(struct ntb_device
*ndev
)
1525 if (!debugfs_initialized())
1529 debugfs_dir
= debugfs_create_dir(KBUILD_MODNAME
, NULL
);
1531 ndev
->debugfs_dir
= debugfs_create_dir(pci_name(ndev
->pdev
),
1533 if (ndev
->debugfs_dir
)
1534 ndev
->debugfs_info
= debugfs_create_file("info", S_IRUSR
,
1540 static void ntb_free_debugfs(struct ntb_device
*ndev
)
1542 debugfs_remove_recursive(ndev
->debugfs_dir
);
1544 if (debugfs_dir
&& simple_empty(debugfs_dir
)) {
1545 debugfs_remove_recursive(debugfs_dir
);
1550 static void ntb_hw_link_up(struct ntb_device
*ndev
)
1552 if (ndev
->conn_type
== NTB_CONN_TRANSPARENT
)
1553 ntb_link_event(ndev
, NTB_LINK_UP
);
1557 /* Let's bring the NTB link up */
1558 ntb_cntl
= readl(ndev
->reg_ofs
.lnk_cntl
);
1559 ntb_cntl
&= ~(NTB_CNTL_LINK_DISABLE
| NTB_CNTL_CFG_LOCK
);
1560 ntb_cntl
|= NTB_CNTL_P2S_BAR23_SNOOP
| NTB_CNTL_S2P_BAR23_SNOOP
;
1561 ntb_cntl
|= NTB_CNTL_P2S_BAR4_SNOOP
| NTB_CNTL_S2P_BAR4_SNOOP
;
1562 if (ndev
->split_bar
)
1563 ntb_cntl
|= NTB_CNTL_P2S_BAR5_SNOOP
|
1564 NTB_CNTL_S2P_BAR5_SNOOP
;
1566 writel(ntb_cntl
, ndev
->reg_ofs
.lnk_cntl
);
1570 static void ntb_hw_link_down(struct ntb_device
*ndev
)
1574 if (ndev
->conn_type
== NTB_CONN_TRANSPARENT
) {
1575 ntb_link_event(ndev
, NTB_LINK_DOWN
);
1579 /* Bring NTB link down */
1580 ntb_cntl
= readl(ndev
->reg_ofs
.lnk_cntl
);
1581 ntb_cntl
&= ~(NTB_CNTL_P2S_BAR23_SNOOP
| NTB_CNTL_S2P_BAR23_SNOOP
);
1582 ntb_cntl
&= ~(NTB_CNTL_P2S_BAR4_SNOOP
| NTB_CNTL_S2P_BAR4_SNOOP
);
1583 if (ndev
->split_bar
)
1584 ntb_cntl
&= ~(NTB_CNTL_P2S_BAR5_SNOOP
|
1585 NTB_CNTL_S2P_BAR5_SNOOP
);
1586 ntb_cntl
|= NTB_CNTL_LINK_DISABLE
| NTB_CNTL_CFG_LOCK
;
1587 writel(ntb_cntl
, ndev
->reg_ofs
.lnk_cntl
);
1590 static void ntb_max_mw_detect(struct ntb_device
*ndev
)
1592 if (ndev
->split_bar
)
1593 ndev
->limits
.max_mw
= HSX_SPLITBAR_MAX_MW
;
1595 ndev
->limits
.max_mw
= SNB_MAX_MW
;
1598 static int ntb_xeon_detect(struct ntb_device
*ndev
)
1604 ndev
->hw_type
= SNB_HW
;
1606 rc
= pci_read_config_byte(ndev
->pdev
, NTB_PPD_OFFSET
, &ppd
);
1610 if (ppd
& SNB_PPD_DEV_TYPE
)
1611 ndev
->dev_type
= NTB_DEV_USD
;
1613 ndev
->dev_type
= NTB_DEV_DSD
;
1615 ndev
->split_bar
= (ppd
& SNB_PPD_SPLIT_BAR
) ? 1 : 0;
1617 switch (ppd
& SNB_PPD_CONN_TYPE
) {
1619 dev_info(&ndev
->pdev
->dev
, "Conn Type = B2B\n");
1620 ndev
->conn_type
= NTB_CONN_B2B
;
1623 dev_info(&ndev
->pdev
->dev
, "Conn Type = RP\n");
1624 ndev
->conn_type
= NTB_CONN_RP
;
1626 case NTB_CONN_TRANSPARENT
:
1627 dev_info(&ndev
->pdev
->dev
, "Conn Type = TRANSPARENT\n");
1628 ndev
->conn_type
= NTB_CONN_TRANSPARENT
;
1630 * This mode is default to USD/DSP. HW does not report
1631 * properly in transparent mode as it has no knowledge of
1632 * NTB. We will just force correct here.
1634 ndev
->dev_type
= NTB_DEV_USD
;
1637 * This is a way for transparent BAR to figure out if we
1638 * are doing split BAR or not. There is no way for the hw
1639 * on the transparent side to know and set the PPD.
1641 bars_mask
= pci_select_bars(ndev
->pdev
, IORESOURCE_MEM
);
1642 bars
= hweight32(bars_mask
);
1643 if (bars
== (HSX_SPLITBAR_MAX_MW
+ 1))
1644 ndev
->split_bar
= 1;
1648 dev_err(&ndev
->pdev
->dev
, "Unknown PPD %x\n", ppd
);
1652 ntb_max_mw_detect(ndev
);
1657 static int ntb_atom_detect(struct ntb_device
*ndev
)
1662 ndev
->hw_type
= BWD_HW
;
1663 ndev
->limits
.max_mw
= BWD_MAX_MW
;
1665 rc
= pci_read_config_dword(ndev
->pdev
, NTB_PPD_OFFSET
, &ppd
);
1669 switch ((ppd
& BWD_PPD_CONN_TYPE
) >> 8) {
1671 dev_info(&ndev
->pdev
->dev
, "Conn Type = B2B\n");
1672 ndev
->conn_type
= NTB_CONN_B2B
;
1676 dev_err(&ndev
->pdev
->dev
, "Unsupported NTB configuration\n");
1680 if (ppd
& BWD_PPD_DEV_TYPE
)
1681 ndev
->dev_type
= NTB_DEV_DSD
;
1683 ndev
->dev_type
= NTB_DEV_USD
;
1688 static int ntb_device_detect(struct ntb_device
*ndev
)
1692 if (is_ntb_xeon(ndev
))
1693 rc
= ntb_xeon_detect(ndev
);
1694 else if (is_ntb_atom(ndev
))
1695 rc
= ntb_atom_detect(ndev
);
1699 dev_info(&ndev
->pdev
->dev
, "Device Type = %s\n",
1700 ndev
->dev_type
== NTB_DEV_USD
? "USD/DSP" : "DSD/USP");
1705 static int ntb_pci_probe(struct pci_dev
*pdev
, const struct pci_device_id
*id
)
1707 struct ntb_device
*ndev
;
1710 ndev
= kzalloc(sizeof(struct ntb_device
), GFP_KERNEL
);
1716 ntb_set_errata_flags(ndev
);
1718 ndev
->link_status
= NTB_LINK_DOWN
;
1719 pci_set_drvdata(pdev
, ndev
);
1720 ntb_setup_debugfs(ndev
);
1722 rc
= pci_enable_device(pdev
);
1726 pci_set_master(ndev
->pdev
);
1728 rc
= ntb_device_detect(ndev
);
1732 ndev
->mw
= kcalloc(ndev
->limits
.max_mw
, sizeof(struct ntb_mw
),
1739 if (ndev
->split_bar
)
1740 rc
= pci_request_selected_regions(pdev
, NTB_SPLITBAR_MASK
,
1743 rc
= pci_request_selected_regions(pdev
, NTB_BAR_MASK
,
1749 ndev
->reg_base
= pci_ioremap_bar(pdev
, NTB_BAR_MMIO
);
1750 if (!ndev
->reg_base
) {
1751 dev_warn(&pdev
->dev
, "Cannot remap BAR 0\n");
1756 for (i
= 0; i
< ndev
->limits
.max_mw
; i
++) {
1757 ndev
->mw
[i
].bar_sz
= pci_resource_len(pdev
, MW_TO_BAR(i
));
1760 * with the errata we need to steal last of the memory
1761 * windows for workarounds and they point to MMIO registers.
1763 if ((ndev
->wa_flags
& WA_SNB_ERR
) &&
1764 (i
== (ndev
->limits
.max_mw
- 1))) {
1766 ioremap_nocache(pci_resource_start(pdev
,
1768 ndev
->mw
[i
].bar_sz
);
1771 ioremap_wc(pci_resource_start(pdev
,
1773 ndev
->mw
[i
].bar_sz
);
1776 dev_info(&pdev
->dev
, "MW %d size %llu\n", i
,
1777 (unsigned long long) ndev
->mw
[i
].bar_sz
);
1778 if (!ndev
->mw
[i
].vbase
) {
1779 dev_warn(&pdev
->dev
, "Cannot remap BAR %d\n",
1786 rc
= pci_set_dma_mask(pdev
, DMA_BIT_MASK(64));
1788 rc
= pci_set_dma_mask(pdev
, DMA_BIT_MASK(32));
1792 dev_warn(&pdev
->dev
, "Cannot DMA highmem\n");
1795 rc
= pci_set_consistent_dma_mask(pdev
, DMA_BIT_MASK(64));
1797 rc
= pci_set_consistent_dma_mask(pdev
, DMA_BIT_MASK(32));
1801 dev_warn(&pdev
->dev
, "Cannot DMA consistent highmem\n");
1804 rc
= ntb_device_setup(ndev
);
1808 rc
= ntb_create_callbacks(ndev
);
1812 rc
= ntb_setup_interrupts(ndev
);
1816 /* The scratchpad registers keep the values between rmmod/insmod,
1819 for (i
= 0; i
< ndev
->limits
.max_spads
; i
++) {
1820 ntb_write_local_spad(ndev
, i
, 0);
1821 ntb_write_remote_spad(ndev
, i
, 0);
1824 rc
= ntb_transport_init(pdev
);
1828 ntb_hw_link_up(ndev
);
1833 ntb_free_interrupts(ndev
);
1835 ntb_free_callbacks(ndev
);
1837 ntb_device_free(ndev
);
1839 for (i
--; i
>= 0; i
--)
1840 iounmap(ndev
->mw
[i
].vbase
);
1841 iounmap(ndev
->reg_base
);
1843 if (ndev
->split_bar
)
1844 pci_release_selected_regions(pdev
, NTB_SPLITBAR_MASK
);
1846 pci_release_selected_regions(pdev
, NTB_BAR_MASK
);
1850 pci_disable_device(pdev
);
1852 ntb_free_debugfs(ndev
);
1855 dev_err(&pdev
->dev
, "Error loading %s module\n", KBUILD_MODNAME
);
1859 static void ntb_pci_remove(struct pci_dev
*pdev
)
1861 struct ntb_device
*ndev
= pci_get_drvdata(pdev
);
1864 ntb_hw_link_down(ndev
);
1866 ntb_transport_free(ndev
->ntb_transport
);
1868 ntb_free_interrupts(ndev
);
1869 ntb_free_callbacks(ndev
);
1870 ntb_device_free(ndev
);
1872 /* need to reset max_mw limits so we can unmap properly */
1873 if (ndev
->hw_type
== SNB_HW
)
1874 ntb_max_mw_detect(ndev
);
1876 for (i
= 0; i
< ndev
->limits
.max_mw
; i
++)
1877 iounmap(ndev
->mw
[i
].vbase
);
1880 iounmap(ndev
->reg_base
);
1881 if (ndev
->split_bar
)
1882 pci_release_selected_regions(pdev
, NTB_SPLITBAR_MASK
);
1884 pci_release_selected_regions(pdev
, NTB_BAR_MASK
);
1885 pci_disable_device(pdev
);
1886 ntb_free_debugfs(ndev
);
1890 static struct pci_driver ntb_pci_driver
= {
1891 .name
= KBUILD_MODNAME
,
1892 .id_table
= ntb_pci_tbl
,
1893 .probe
= ntb_pci_probe
,
1894 .remove
= ntb_pci_remove
,
1897 module_pci_driver(ntb_pci_driver
);