epan/dissectors/pidl/ C99 drsuapi
[wireshark-sm.git] / epan / dissectors / packet-nvme-rdma.c
blob9ae6c403472905f71486bad269ff36874eeb7c3e
1 /* packet-nvme-rdma.c
2 * Routines for NVM Express over Fabrics(RDMA) dissection
3 * Copyright 2016
4 * Code by Parav Pandit
6 * Wireshark - Network traffic analyzer
7 * By Gerald Combs <gerald@wireshark.org>
8 * Copyright 1998 Gerald Combs
10 * SPDX-License-Identifier: GPL-2.0-or-later
14 NVM Express is high speed interface for accessing solid state drives.
15 NVM Express specifications are maintained by NVM Express industry
16 association at http://www.nvmexpress.org.
18 This file adds support to dissect NVM Express over fabrics packets
19 for RDMA. This adds very basic support for dissecting commands
20 completions.
22 Current dissection supports dissection of
23 (a) NVMe cmd and cqe
24 (b) NVMe Fabric command and cqe
25 As part of it, it also calculates cmd completion latencies.
27 This protocol is similar to iSCSI and SCSI dissection where iSCSI is
28 transport protocol for carying SCSI commands and responses. Similarly
29 NVMe Fabrics - RDMA transport protocol carries NVMe commands.
31 +----------+
32 | NVMe |
33 +------+---+
35 +-----------+---------+
36 | NVMe Fabrics |
37 +----+-----------+----+
38 | |
39 +----+---+ +---+----+
40 | RDMA | | FC |
41 +--------+ +--------+
43 References:
44 NVMe Express fabrics specification is located at
45 http://www.nvmexpress.org/wp-content/uploads/NVMe_over_Fabrics_1_0_Gold_20160605.pdf
47 NVMe Express specification is located at
48 http://www.nvmexpress.org/wp-content/uploads/NVM-Express-1_2a.pdf
50 NVM Express RDMA TCP port assigned by IANA that maps to RDMA IP service
51 TCP port can be found at
52 http://www.iana.org/assignments/service-names-port-numbers/service-names-port-numbers.xhtml?search=NVM+Express
55 #include "config.h"
57 #include <stdlib.h>
59 #include <epan/packet.h>
60 #include <epan/prefs.h>
61 #include <epan/conversation.h>
62 #include <epan/addr_resolv.h>
64 #include "packet-infiniband.h"
65 #include "packet-nvme.h"
67 #define SID_ULP_MASK 0x00000000FF000000
68 #define SID_PROTO_MASK 0x0000000000FF0000
69 #define SID_PORT_MASK 0x000000000000FFFF
71 #define SID_ULP 0x01
72 #define SID_PROTO_TCP 0x06
73 #define NVME_RDMA_TCP_PORT_RANGE "4420" /* IANA registered */
75 #define SID_MASK (SID_ULP_MASK | SID_PROTO_MASK)
76 #define SID_ULP_TCP ((SID_ULP << 3 * 8) | (SID_PROTO_TCP << 2 * 8))
78 #define NVME_FABRICS_RDMA "NVMe Fabrics RDMA"
80 #define NVME_FABRIC_CMD_SIZE NVME_CMD_SIZE
81 #define NVME_FABRIC_CQE_SIZE NVME_CQE_SIZE
83 struct nvme_rdma_cmd_ctx;
85 /* The idea of RDMA context matching is as follows:
86 * addresses, sizes, and keys are registred with nvme_add_data_request()
87 * at RDMA request, the packet is matched to queue (this is already done)
88 * at RDMA request, we see address, size, key, and find command with nvme_lookup_data_request()
89 * we store comand context and packet sequence in the queue
90 * the next RDMA transfer with the same sequence number will find a macth from queue to the command
91 * knowing command context, we can decode the buffer
92 * We expect all RDMA transfers to be done in order, so storing in queue context is OK
94 struct nvme_rdma_q_ctx {
95 struct nvme_q_ctx n_q_ctx;
96 struct {
97 struct nvme_rdma_cmd_ctx *cmd_ctx;
98 uint32_t first_psn;
99 uint32_t psn;
100 } rdma_ctx;
103 struct nvme_rdma_cmd_ctx {
104 struct nvme_cmd_ctx n_cmd_ctx;
107 void proto_reg_handoff_nvme_rdma(void);
108 void proto_register_nvme_rdma(void);
110 static int proto_nvme_rdma;
111 static dissector_handle_t ib_handler;
112 static int proto_ib;
114 /* NVMe Fabrics RDMA CM Private data */
115 static int hf_nvmeof_rdma_cm_req_recfmt;
116 static int hf_nvmeof_rdma_cm_req_qid;
117 static int hf_nvmeof_rdma_cm_req_hrqsize;
118 static int hf_nvmeof_rdma_cm_req_hsqsize;
119 static int hf_nvmeof_rdma_cm_req_cntlid;
120 static int hf_nvmeof_rdma_cm_req_reserved;
122 static int hf_nvmeof_rdma_cm_rsp_recfmt;
123 static int hf_nvmeof_rdma_cm_rsp_crqsize;
124 static int hf_nvmeof_rdma_cm_rsp_reserved;
126 static int hf_nvmeof_rdma_cm_rej_recfmt;
127 static int hf_nvmeof_rdma_cm_rej_status;
129 /* Data Transfers */
130 static int hf_nvmeof_from_host_unknown_data;
131 static int hf_nvmeof_read_to_host_req;
132 static int hf_nvmeof_read_to_host_unmatched;
133 static int hf_nvmeof_read_from_host_resp;
134 static int hf_nvmeof_read_from_host_prev;
135 static int hf_nvmeof_read_from_host_next;
136 static int hf_nvmeof_read_from_host_unmatched;
137 static int hf_nvmeof_write_to_host_req;
138 static int hf_nvmeof_write_to_host_prev;
139 static int hf_nvmeof_write_to_host_next;
140 static int hf_nvmeof_write_to_host_unmatched;
141 static int hf_nvmeof_to_host_unknown_data;
143 /* Tracking commands, transfers and CQEs */
144 static int hf_nvmeof_data_resp;
145 static int hf_nvmeof_cmd_qid;
148 /* Initialize the subtree pointers */
149 static int ett_cm;
150 static int ett_data;
152 static range_t *gPORT_RANGE;
154 static struct nvme_rdma_cmd_ctx* nvme_cmd_to_nvme_rdma_cmd(struct nvme_cmd_ctx *nvme_cmd)
156 return (struct nvme_rdma_cmd_ctx*)(((char *)nvme_cmd) - offsetof(struct nvme_rdma_cmd_ctx, n_cmd_ctx));
159 static conversation_infiniband_data *get_conversion_data(conversation_t *conv)
161 conversation_infiniband_data *conv_data;
163 conv_data = (conversation_infiniband_data *)conversation_get_proto_data(conv, proto_ib);
164 if (!conv_data)
165 return NULL;
167 if ((conv_data->service_id & SID_MASK) != SID_ULP_TCP)
168 return NULL; /* the service id doesn't match that of TCP ULP - nothing for us to do here */
170 if (!(value_is_in_range(gPORT_RANGE, (uint32_t)(conv_data->service_id & SID_PORT_MASK))))
171 return NULL; /* the port doesn't match that of NVM Express Fabrics - nothing for us to do here */
172 return conv_data;
175 static conversation_t*
176 find_ib_conversation(packet_info *pinfo, conversation_infiniband_data **uni_conv_data)
178 conversation_t *conv;
179 conversation_infiniband_data *conv_data;
181 conv = find_conversation(pinfo->num, &pinfo->dst, &pinfo->dst,
182 CONVERSATION_IBQP, pinfo->destport, pinfo->destport,
183 NO_ADDR_B|NO_PORT_B);
184 if (!conv)
185 return NULL; /* nothing to do with no conversation context */
187 conv_data = get_conversion_data(conv);
188 *uni_conv_data = conv_data;
189 if (!conv_data)
190 return NULL;
192 /* now that we found unidirectional conversation, find bidirectional
193 * conversation, so that we can relate to nvme q.
195 return find_conversation(pinfo->num, &pinfo->src, &pinfo->dst,
196 CONVERSATION_IBQP, pinfo->srcport, pinfo->destport, 0);
199 static uint16_t find_nvme_qid(packet_info *pinfo)
201 conversation_t *conv;
202 conversation_infiniband_data *conv_data;
203 uint16_t qid;
205 conv = find_conversation(pinfo->num, &pinfo->dst, &pinfo->dst,
206 CONVERSATION_IBQP, pinfo->destport, pinfo->destport,
207 NO_ADDR_B|NO_PORT_B);
208 if (!conv)
209 return 0; /* nothing to do with no conversation context */
211 conv_data = get_conversion_data(conv);
212 if (!conv_data)
213 return 0;
215 if (conv_data->client_to_server == false) {
216 memcpy(&qid, &conv_data->mad_private_data[178], 2);
217 return qid;
219 conv = find_conversation(pinfo->num, &pinfo->src, &pinfo->src,
220 CONVERSATION_IBQP, conv_data->src_qp, conv_data->src_qp,
221 NO_ADDR_B|NO_PORT_B);
222 if (!conv)
223 return 0;
224 conv_data = get_conversion_data(conv);
225 if (!conv_data)
226 return 0;
227 memcpy(&qid, &conv_data->mad_private_data[178], 2);
228 return qid;
231 static struct nvme_rdma_q_ctx*
232 find_add_q_ctx(packet_info *pinfo, conversation_t *conv)
234 struct nvme_rdma_q_ctx *q_ctx;
235 uint16_t qid;
237 q_ctx = (struct nvme_rdma_q_ctx*)conversation_get_proto_data(conv, proto_nvme_rdma);
238 if (!q_ctx) {
239 qid = find_nvme_qid(pinfo);
240 q_ctx = wmem_new0(wmem_file_scope(), struct nvme_rdma_q_ctx);
241 q_ctx->n_q_ctx.pending_cmds = wmem_tree_new(wmem_file_scope());
242 q_ctx->n_q_ctx.done_cmds = wmem_tree_new(wmem_file_scope());
243 q_ctx->n_q_ctx.data_requests = wmem_tree_new(wmem_file_scope());
244 q_ctx->n_q_ctx.data_responses = wmem_tree_new(wmem_file_scope());
245 q_ctx->n_q_ctx.data_offsets = wmem_tree_new(wmem_file_scope());
246 q_ctx->n_q_ctx.qid = qid;
247 conversation_add_proto_data(conv, proto_nvme_rdma, q_ctx);
249 return q_ctx;
252 static conversation_infiniband_data*
253 find_ib_cm_conversation(packet_info *pinfo)
255 conversation_t *conv;
257 conv = find_conversation(pinfo->num, &pinfo->src, &pinfo->dst,
258 CONVERSATION_IBQP, pinfo->srcport, pinfo->destport, 0);
259 if (!conv)
260 return NULL;
262 return get_conversion_data(conv);
265 static void add_rdma_cm_qid(char *result, uint32_t val)
267 snprintf(result, ITEM_LABEL_LENGTH, "%x (%s)", val, val ? "IOQ" : "AQ");
270 static void add_zero_base(char *result, uint32_t val)
272 snprintf(result, ITEM_LABEL_LENGTH, "%u", val+1);
275 static void dissect_rdma_cm_req_packet(tvbuff_t *tvb, proto_tree *tree)
277 proto_tree *cm_tree;
278 proto_item *ti;
279 /* NVME-RDMA connect private data starts at offset 0 of RDMA-CM
280 * private data
283 /* create display subtree for private data */
284 ti = proto_tree_add_item(tree, proto_nvme_rdma, tvb, 0, 32, ENC_NA);
285 cm_tree = proto_item_add_subtree(ti, ett_cm);
287 proto_tree_add_item(cm_tree, hf_nvmeof_rdma_cm_req_recfmt, tvb,
288 0, 2, ENC_LITTLE_ENDIAN);
290 proto_tree_add_item(cm_tree, hf_nvmeof_rdma_cm_req_qid, tvb,
291 2, 2, ENC_LITTLE_ENDIAN);
292 proto_tree_add_item(cm_tree, hf_nvmeof_rdma_cm_req_hrqsize, tvb,
293 4, 2, ENC_LITTLE_ENDIAN);
294 proto_tree_add_item(cm_tree, hf_nvmeof_rdma_cm_req_hsqsize, tvb,
295 6, 2, ENC_LITTLE_ENDIAN);
296 proto_tree_add_item(cm_tree, hf_nvmeof_rdma_cm_req_cntlid, tvb,
297 8, 2, ENC_LITTLE_ENDIAN);
298 proto_tree_add_item(cm_tree, hf_nvmeof_rdma_cm_req_reserved, tvb,
299 10, 22, ENC_NA);
302 static void dissect_rdma_cm_rsp_packet(tvbuff_t *tvb, proto_tree *tree)
304 proto_tree *cm_tree;
305 proto_item *ti;
307 /* create display subtree for the private datat that start at offset 0 */
308 ti = proto_tree_add_item(tree, proto_nvme_rdma, tvb, 0, 32, ENC_NA);
309 cm_tree = proto_item_add_subtree(ti, ett_cm);
311 proto_tree_add_item(cm_tree, hf_nvmeof_rdma_cm_rsp_recfmt, tvb,
312 0, 2, ENC_LITTLE_ENDIAN);
313 proto_tree_add_item(cm_tree, hf_nvmeof_rdma_cm_rsp_crqsize, tvb,
314 2, 2, ENC_LITTLE_ENDIAN);
315 proto_tree_add_item(cm_tree, hf_nvmeof_rdma_cm_rsp_reserved, tvb,
316 4, 28, ENC_NA);
319 static void dissect_rdma_cm_rej_packet(tvbuff_t *tvb, proto_tree *tree)
321 proto_tree *cm_tree;
322 proto_item *ti;
324 /* create display subtree for the private datat that start at offset 0 */
325 ti = proto_tree_add_item(tree, proto_nvme_rdma, tvb, 0, 4, ENC_NA);
326 cm_tree = proto_item_add_subtree(ti, ett_cm);
328 proto_tree_add_item(cm_tree, hf_nvmeof_rdma_cm_rej_recfmt, tvb,
329 0, 2, ENC_LITTLE_ENDIAN);
330 proto_tree_add_item(cm_tree, hf_nvmeof_rdma_cm_rej_status, tvb,
331 2, 2, ENC_LITTLE_ENDIAN);
334 static bool dissect_rdma_cm_packet(tvbuff_t *tvb, proto_tree *tree,
335 uint16_t cm_attribute_id)
337 switch (cm_attribute_id) {
338 case ATTR_CM_REQ:
339 dissect_rdma_cm_req_packet(tvb, tree);
340 break;
341 case ATTR_CM_REP:
342 dissect_rdma_cm_rsp_packet(tvb, tree);
343 break;
344 case ATTR_CM_REJ:
345 dissect_rdma_cm_rej_packet(tvb, tree);
346 break;
347 default:
348 break;
350 return true;
353 static bool
354 dissect_nvme_ib_cm(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree,
355 void *data)
357 /* infiniband dissector dissects RDMA-CM header and passes RDMA-CM
358 * private data for further decoding, so we start at RDMA-CM
359 * private data here
361 conversation_infiniband_data *conv_data = NULL;
362 struct infinibandinfo *info = (struct infinibandinfo *)data;
364 conv_data = find_ib_cm_conversation(pinfo);
365 if (!conv_data)
366 return false;
368 col_set_str(pinfo->cinfo, COL_PROTOCOL, NVME_FABRICS_RDMA);
369 return dissect_rdma_cm_packet(tvb, tree, info->cm_attribute_id);
373 static struct nvme_rdma_cmd_ctx*
374 bind_cmd_to_qctx(packet_info *pinfo, struct nvme_q_ctx *q_ctx,
375 uint16_t cmd_id)
377 struct nvme_rdma_cmd_ctx *ctx;
379 if (!PINFO_FD_VISITED(pinfo)) {
380 ctx = wmem_new0(wmem_file_scope(), struct nvme_rdma_cmd_ctx);
382 nvme_add_cmd_to_pending_list(pinfo, q_ctx,
383 &ctx->n_cmd_ctx, (void*)ctx, cmd_id);
384 } else {
385 /* Already visited this frame */
386 ctx = (struct nvme_rdma_cmd_ctx*)
387 nvme_lookup_cmd_in_done_list(pinfo, q_ctx, cmd_id);
388 /* if we have already visited frame but haven't found completion yet,
389 * we won't find cmd in done q, so allocate a dummy ctx for doing
390 * rest of the processing.
392 if (!ctx)
393 ctx = wmem_new0(wmem_file_scope(), struct nvme_rdma_cmd_ctx);
395 return ctx;
398 static void
399 dissect_nvme_rdma_cmd(tvbuff_t *nvme_tvb, packet_info *pinfo, proto_tree *root_tree,
400 proto_tree *nvme_tree, struct nvme_rdma_q_ctx *q_ctx)
402 struct nvme_rdma_cmd_ctx *cmd_ctx;
403 uint16_t cmd_id;
404 uint8_t opcode;
406 opcode = tvb_get_uint8(nvme_tvb, 0);
407 cmd_id = tvb_get_uint16(nvme_tvb, 2, ENC_LITTLE_ENDIAN);
408 cmd_ctx = bind_cmd_to_qctx(pinfo, &q_ctx->n_q_ctx, cmd_id);
409 if (opcode == NVME_FABRIC_OPC) {
410 cmd_ctx->n_cmd_ctx.fabric = true;
411 dissect_nvmeof_fabric_cmd(nvme_tvb, pinfo, nvme_tree, &q_ctx->n_q_ctx, &cmd_ctx->n_cmd_ctx, 0, true);
412 } else {
413 cmd_ctx->n_cmd_ctx.fabric = false;
414 dissect_nvme_cmd(nvme_tvb, pinfo, root_tree, &q_ctx->n_q_ctx, &cmd_ctx->n_cmd_ctx);
418 static void dissect_rdma_read_transfer(tvbuff_t *data_tvb, packet_info *pinfo, proto_tree *data_tree,
419 struct nvme_rdma_q_ctx *q_ctx, struct nvme_rdma_cmd_ctx *rdma_cmd, unsigned len)
421 if (rdma_cmd->n_cmd_ctx.fabric == true)
422 dissect_nvmeof_cmd_data(data_tvb, pinfo, data_tree, 0, &q_ctx->n_q_ctx, &rdma_cmd->n_cmd_ctx, len);
423 else
424 dissect_nvme_data_response(data_tvb, pinfo, data_tree, &q_ctx->n_q_ctx, &rdma_cmd->n_cmd_ctx, len, false);
427 static void
428 dissect_nvme_from_host(tvbuff_t *nvme_tvb, packet_info *pinfo,
429 proto_tree *root_tree, proto_tree *nvme_tree,
430 struct infinibandinfo *info,
431 struct nvme_rdma_q_ctx *q_ctx,
432 unsigned len)
435 switch (info->opCode) {
436 case RC_RDMA_READ_RESPONSE_FIRST:
437 case RC_RDMA_READ_RESPONSE_MIDDLE:
438 case RC_RDMA_READ_RESPONSE_LAST:
439 case RC_RDMA_READ_RESPONSE_ONLY:
441 struct nvme_cmd_ctx *cmd = NULL;
442 unsigned idx = 0;
443 if (info->opCode == RC_RDMA_READ_RESPONSE_FIRST || info->opCode == RC_RDMA_READ_RESPONSE_ONLY) {
444 cmd = nvme_lookup_data_tr_pkt(&q_ctx->n_q_ctx, 0, info->packet_seq_num);
445 if (cmd && !PINFO_FD_VISITED(pinfo)) {
446 q_ctx->rdma_ctx.cmd_ctx = nvme_cmd_to_nvme_rdma_cmd(cmd);
447 q_ctx->rdma_ctx.psn = q_ctx->rdma_ctx.first_psn = info->packet_seq_num;
448 cmd->tr_bytes = 0;
449 cmd->first_tr_psn = info->packet_seq_num;
450 cmd->data_tr_pkt_num[0] = pinfo->num;
452 } else {
453 if (!PINFO_FD_VISITED(pinfo)) {
454 if (q_ctx->rdma_ctx.cmd_ctx && (q_ctx->rdma_ctx.psn + 1) == info->packet_seq_num) {
455 idx = info->packet_seq_num - q_ctx->rdma_ctx.first_psn;
456 q_ctx->rdma_ctx.psn++;
457 cmd = &q_ctx->rdma_ctx.cmd_ctx->n_cmd_ctx;
458 if (idx < NVME_CMD_MAX_TRS)
459 cmd->data_tr_pkt_num[idx] = pinfo->num;
460 nvme_add_data_tr_pkt(&q_ctx->n_q_ctx, cmd, 0, info->packet_seq_num);
461 nvme_add_data_tr_off(&q_ctx->n_q_ctx, cmd->tr_bytes, pinfo->num);
463 } else {
464 cmd = nvme_lookup_data_tr_pkt(&q_ctx->n_q_ctx, 0, info->packet_seq_num);
465 if (cmd)
466 idx = info->packet_seq_num - cmd->first_tr_psn;
469 if (cmd) {
470 proto_item *ti = proto_tree_add_item(nvme_tree, hf_nvmeof_read_from_host_resp, nvme_tvb, 0, len, ENC_NA);
471 proto_tree *rdma_tree = proto_item_add_subtree(ti, ett_data);
472 nvme_publish_to_cmd_link(rdma_tree, nvme_tvb, hf_nvmeof_cmd_pkt, cmd);
473 nvme_publish_to_data_req_link(rdma_tree, nvme_tvb, hf_nvmeof_data_req, cmd);
474 if (idx && (idx-1) < NVME_CMD_MAX_TRS)
475 nvme_publish_link(rdma_tree, nvme_tvb, hf_nvmeof_read_from_host_prev , cmd->data_tr_pkt_num[idx-1], false);
476 if ((idx + 1) < NVME_CMD_MAX_TRS)
477 nvme_publish_link(rdma_tree, nvme_tvb, hf_nvmeof_read_from_host_next , cmd->data_tr_pkt_num[idx+1], false);
479 dissect_rdma_read_transfer(nvme_tvb, pinfo, rdma_tree, q_ctx, nvme_cmd_to_nvme_rdma_cmd(cmd), len);
480 if (!PINFO_FD_VISITED(pinfo))
481 cmd->tr_bytes += len;
482 } else {
483 proto_tree_add_item(nvme_tree, hf_nvmeof_read_from_host_unmatched,
484 nvme_tvb, 0, len, ENC_NA);
486 break;
488 case RC_SEND_ONLY:
489 if (len >= NVME_FABRIC_CMD_SIZE)
490 dissect_nvme_rdma_cmd(nvme_tvb, pinfo, root_tree, nvme_tree, q_ctx);
491 else
492 proto_tree_add_item(nvme_tree, hf_nvmeof_from_host_unknown_data,
493 nvme_tvb, 0, len, ENC_NA);
494 break;
495 default:
496 proto_tree_add_item(nvme_tree, hf_nvmeof_from_host_unknown_data, nvme_tvb,
497 0, len, ENC_NA);
498 break;
502 static void
503 dissect_nvme_rdma_cqe(tvbuff_t *nvme_tvb, packet_info *pinfo,
504 proto_tree *root_tree, proto_tree *nvme_tree,
505 struct nvme_rdma_q_ctx *q_ctx)
507 struct nvme_rdma_cmd_ctx *cmd_ctx;
508 uint16_t cmd_id;
510 cmd_id = tvb_get_uint16(nvme_tvb, 12, ENC_LITTLE_ENDIAN);
512 if (!PINFO_FD_VISITED(pinfo)) {
514 cmd_ctx = (struct nvme_rdma_cmd_ctx*)
515 nvme_lookup_cmd_in_pending_list(&q_ctx->n_q_ctx, cmd_id);
516 if (!cmd_ctx)
517 goto not_found;
519 /* we have already seen this cqe, or an identical one */
520 if (cmd_ctx->n_cmd_ctx.cqe_pkt_num)
521 goto not_found;
523 cmd_ctx->n_cmd_ctx.cqe_pkt_num = pinfo->num;
524 nvme_add_cmd_cqe_to_done_list(&q_ctx->n_q_ctx, &cmd_ctx->n_cmd_ctx, cmd_id);
525 } else {
526 /* Already visited this frame */
527 cmd_ctx = (struct nvme_rdma_cmd_ctx*)
528 nvme_lookup_cmd_in_done_list(pinfo, &q_ctx->n_q_ctx, cmd_id);
529 if (!cmd_ctx)
530 goto not_found;
533 nvme_update_cmd_end_info(pinfo, &cmd_ctx->n_cmd_ctx);
535 if (cmd_ctx->n_cmd_ctx.fabric)
536 dissect_nvmeof_fabric_cqe(nvme_tvb, pinfo, nvme_tree, &cmd_ctx->n_cmd_ctx, 0);
537 else
538 dissect_nvme_cqe(nvme_tvb, pinfo, root_tree, &q_ctx->n_q_ctx, &cmd_ctx->n_cmd_ctx);
539 return;
541 not_found:
542 proto_tree_add_item(nvme_tree, hf_nvmeof_to_host_unknown_data, nvme_tvb,
543 0, NVME_FABRIC_CQE_SIZE, ENC_NA);
546 static void
547 dissect_nvme_to_host(tvbuff_t *nvme_tvb, packet_info *pinfo,
548 proto_tree *root_tree, proto_tree *nvme_tree,
549 struct infinibandinfo *info,
550 struct nvme_rdma_q_ctx *q_ctx, unsigned len)
552 switch (info->opCode) {
553 case RC_RDMA_READ_REQUEST:
555 struct keyed_data_req req = {
556 .addr = info->reth_remote_address,
557 .key = info->reth_remote_key,
558 .size = info->reth_dma_length
560 struct nvme_cmd_ctx *cmd = NULL;
561 if (!PINFO_FD_VISITED(pinfo)) {
562 cmd = nvme_lookup_data_request(&q_ctx->n_q_ctx, &req);
563 if (cmd)
564 nvme_add_data_tr_pkt(&q_ctx->n_q_ctx, cmd, 0, info->packet_seq_num);
565 } else {
566 cmd = nvme_lookup_data_tr_pkt(&q_ctx->n_q_ctx, 0, info->packet_seq_num);
568 if (cmd) {
569 proto_item *ti = proto_tree_add_item(nvme_tree,
570 hf_nvmeof_read_to_host_req, nvme_tvb, 0, 0, ENC_NA);
571 proto_tree *rdma_tree = proto_item_add_subtree(ti, ett_data);
572 cmd->data_req_pkt_num = pinfo->num;
573 nvme_publish_to_data_resp_link(rdma_tree, nvme_tvb,
574 hf_nvmeof_data_resp, cmd);
575 nvme_publish_to_cmd_link(rdma_tree, nvme_tvb,
576 hf_nvmeof_cmd_pkt, cmd);
577 nvme_update_transfer_request(pinfo, cmd, &q_ctx->n_q_ctx);
578 } else {
579 proto_tree_add_item(nvme_tree, hf_nvmeof_read_to_host_unmatched,
580 nvme_tvb, 0, len, ENC_NA);
582 break;
584 case RC_SEND_ONLY:
585 case RC_SEND_ONLY_INVAL:
586 if (len == NVME_FABRIC_CQE_SIZE)
587 dissect_nvme_rdma_cqe(nvme_tvb, pinfo, root_tree, nvme_tree, q_ctx);
588 else
589 proto_tree_add_item(nvme_tree, hf_nvmeof_to_host_unknown_data, nvme_tvb,
590 0, len, ENC_NA);
591 break;
592 case RC_RDMA_WRITE_ONLY:
593 case RC_RDMA_WRITE_FIRST:
594 case RC_RDMA_WRITE_LAST:
595 case RC_RDMA_WRITE_MIDDLE:
597 struct nvme_cmd_ctx *cmd = NULL;
598 unsigned idx = 0;
599 if (info->opCode == RC_RDMA_WRITE_ONLY || info->opCode == RC_RDMA_WRITE_FIRST) {
600 struct keyed_data_req req = {
601 .addr = info->reth_remote_address,
602 .key = info->reth_remote_key,
603 .size = info->reth_dma_length
605 if (!PINFO_FD_VISITED(pinfo)) {
606 cmd = nvme_lookup_data_request(&q_ctx->n_q_ctx, &req);
607 if (cmd) {
608 nvme_add_data_tr_pkt(&q_ctx->n_q_ctx, cmd, 0, info->packet_seq_num);
609 cmd->first_tr_psn = info->packet_seq_num;
610 cmd->data_tr_pkt_num[0] = pinfo->num;
611 q_ctx->rdma_ctx.cmd_ctx = nvme_cmd_to_nvme_rdma_cmd(cmd);
612 q_ctx->rdma_ctx.first_psn = q_ctx->rdma_ctx.psn = info->packet_seq_num;
614 } else {
615 cmd = nvme_lookup_data_tr_pkt(&q_ctx->n_q_ctx, 0, info->packet_seq_num);
617 } else {
618 if (PINFO_FD_VISITED(pinfo)) {
619 cmd = nvme_lookup_data_tr_pkt(&q_ctx->n_q_ctx, 0, info->packet_seq_num);
620 if (cmd)
621 idx = info->packet_seq_num - cmd->first_tr_psn;
622 } else if (q_ctx->rdma_ctx.cmd_ctx && (q_ctx->rdma_ctx.psn + 1) == info->packet_seq_num) {
623 idx = info->packet_seq_num - q_ctx->rdma_ctx.first_psn;
624 q_ctx->rdma_ctx.psn++;
625 cmd = &q_ctx->rdma_ctx.cmd_ctx->n_cmd_ctx;
626 if (idx < NVME_CMD_MAX_TRS)
627 cmd->data_tr_pkt_num[idx] = pinfo->num;
628 nvme_add_data_tr_pkt(&q_ctx->n_q_ctx, cmd, 0, info->packet_seq_num);
629 nvme_add_data_tr_off(&q_ctx->n_q_ctx, cmd->tr_bytes, pinfo->num);
632 if (cmd) {
633 proto_item *ti = proto_tree_add_item(nvme_tree, hf_nvmeof_write_to_host_req, nvme_tvb, 0, 0, ENC_NA);
634 proto_tree *rdma_tree = proto_item_add_subtree(ti, ett_data);
635 nvme_publish_to_cmd_link(rdma_tree, nvme_tvb, hf_nvmeof_cmd_pkt, cmd);
636 if (idx && (idx-1) < NVME_CMD_MAX_TRS)
637 nvme_publish_link(rdma_tree, nvme_tvb, hf_nvmeof_write_to_host_prev , cmd->data_tr_pkt_num[idx-1], false);
638 if ((idx + 1) < NVME_CMD_MAX_TRS)
639 nvme_publish_link(rdma_tree, nvme_tvb, hf_nvmeof_write_to_host_next , cmd->data_tr_pkt_num[idx+1], false);
640 dissect_nvme_data_response(nvme_tvb, pinfo, root_tree, &q_ctx->n_q_ctx, cmd, len, false);
641 if (!PINFO_FD_VISITED(pinfo))
642 cmd->tr_bytes += len;
643 } else {
644 proto_tree_add_item(nvme_tree, hf_nvmeof_write_to_host_unmatched, nvme_tvb, 0, len, ENC_NA);
646 break;
648 default:
649 proto_tree_add_item(nvme_tree, hf_nvmeof_to_host_unknown_data, nvme_tvb,
650 0, len, ENC_NA);
651 break;
655 static bool
656 dissect_nvme_ib(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree, void *data)
658 struct infinibandinfo *info = (struct infinibandinfo *)data;
659 conversation_infiniband_data *conv_data = NULL;
660 conversation_t *conv;
661 proto_tree *nvme_tree;
662 proto_item *ti;
663 struct nvme_rdma_q_ctx *q_ctx;
664 unsigned len = tvb_reported_length(tvb);
666 conv = find_ib_conversation(pinfo, &conv_data);
667 if (!conv)
668 return false;
670 q_ctx = find_add_q_ctx(pinfo, conv);
671 if (!q_ctx)
672 return false;
674 col_set_str(pinfo->cinfo, COL_PROTOCOL, NVME_FABRICS_RDMA);
676 ti = proto_tree_add_item(tree, proto_nvme_rdma, tvb, 0, len, ENC_NA);
677 nvme_tree = proto_item_add_subtree(ti, ett_data);
679 nvme_publish_qid(nvme_tree, hf_nvmeof_cmd_qid, q_ctx->n_q_ctx.qid);
681 if (conv_data->client_to_server)
682 dissect_nvme_from_host(tvb, pinfo, tree, nvme_tree, info, q_ctx, len);
683 else
684 dissect_nvme_to_host(tvb, pinfo, tree, nvme_tree, info, q_ctx, len);
686 return true;
689 void
690 proto_register_nvme_rdma(void)
692 module_t *nvme_rdma_module;
693 static hf_register_info hf[] = {
694 /* IB RDMA CM fields */
695 { &hf_nvmeof_rdma_cm_req_recfmt,
696 { "Record Format", "nvme-rdma.cm.req.recfmt",
697 FT_UINT16, BASE_DEC, NULL, 0x0, NULL, HFILL}
699 { &hf_nvmeof_rdma_cm_req_qid,
700 { "Queue Id", "nvme-rdma.cm.req.qid",
701 FT_UINT16, BASE_CUSTOM, CF_FUNC(add_rdma_cm_qid), 0x0, NULL, HFILL}
703 { &hf_nvmeof_rdma_cm_req_hrqsize,
704 { "RDMA QP Host Receive Queue Size", "nvme-rdma.cm.req.hrqsize",
705 FT_UINT16, BASE_DEC, NULL, 0x0, NULL, HFILL}
707 { &hf_nvmeof_rdma_cm_req_hsqsize,
708 { "RDMA QP Host Send Queue Size", "nvme-rdma.cm.req.hsqsize",
709 FT_UINT16, BASE_CUSTOM, CF_FUNC(add_zero_base), 0x0, NULL, HFILL}
711 { &hf_nvmeof_rdma_cm_req_cntlid,
712 { "Controller ID", "nvme-rdma.cm.req.cntlid",
713 FT_UINT16, BASE_HEX, NULL, 0x0, NULL, HFILL}
715 { &hf_nvmeof_rdma_cm_req_reserved,
716 { "Reserved", "nvme-rdma.cm.req.reserved",
717 FT_BYTES, BASE_NONE, NULL, 0x0, NULL, HFILL}
719 { &hf_nvmeof_rdma_cm_rsp_recfmt,
720 { "Record Format", "nvme-rdma.cm.rsp.recfmt",
721 FT_UINT16, BASE_DEC, NULL, 0x0, NULL, HFILL}
723 { &hf_nvmeof_rdma_cm_rsp_crqsize,
724 { "RDMA QP Controller Receive Queue Size", "nvme-rdma.cm.rsp.crqsize",
725 FT_UINT16, BASE_DEC, NULL, 0x0, NULL, HFILL}
727 { &hf_nvmeof_rdma_cm_rsp_reserved,
728 { "Reserved", "nvme-rdma.cm.rsp.reserved",
729 FT_BYTES, BASE_NONE, NULL, 0x0, NULL, HFILL}
731 { &hf_nvmeof_rdma_cm_rej_recfmt,
732 { "Record Format", "nvme-rdma.cm.rej.recfmt",
733 FT_UINT16, BASE_DEC, NULL, 0x0, NULL, HFILL}
735 { &hf_nvmeof_rdma_cm_rej_status,
736 { "Status", "nvme-rdma.cm.rej.status",
737 FT_UINT16, BASE_HEX, NULL, 0x0, NULL, HFILL}
739 { &hf_nvmeof_from_host_unknown_data,
740 { "Dissection unsupported", "nvme-rdma.unknown_data",
741 FT_BYTES, BASE_NONE, NULL, 0x0, NULL, HFILL}
743 { &hf_nvmeof_read_to_host_req,
744 { "RDMA Read Request Sent to Host", "nvme-rdma.read_to_host_req",
745 FT_NONE, BASE_NONE, NULL, 0x0, NULL, HFILL}
747 { &hf_nvmeof_read_to_host_unmatched,
748 { "RDMA Read Request Sent to Host (no Command Match)", "nvme-rdma.read_to_host_req",
749 FT_NONE, BASE_NONE, NULL, 0x0, NULL, HFILL}
751 { &hf_nvmeof_read_from_host_resp,
752 { "RDMA Read Transfer Sent from Host", "nvme-rdma.read_from_host_resp",
753 FT_BYTES, BASE_NONE, NULL, 0x0, NULL, HFILL}
755 { &hf_nvmeof_read_from_host_prev,
756 { "Previous Read Transfer", "nvme-rdma.read_from_host_prev",
757 FT_FRAMENUM, BASE_NONE, NULL, 0x0, "Previous read transfer is in this frame", HFILL}
759 { &hf_nvmeof_read_from_host_next,
760 { "Next Read Transfer", "nvme-rdma.read_from_host_next",
761 FT_FRAMENUM, BASE_NONE, NULL, 0x0, "Next read transfer is in this frame", HFILL}
763 { &hf_nvmeof_read_from_host_unmatched,
764 { "RDMA Read Transfer Sent from Host (no Command Match)", "nvme-rdma.read_from_host_resp",
765 FT_BYTES, BASE_NONE, NULL, 0x0, NULL, HFILL}
767 { &hf_nvmeof_write_to_host_req,
768 { "RDMA Write Request Sent to Host", "nvme-rdma.write_to_host_req",
769 FT_NONE, BASE_NONE, NULL, 0x0, NULL, HFILL}
771 { &hf_nvmeof_write_to_host_prev,
772 { "Previous Write Transfer", "nvme-rdma.write_to_host_prev",
773 FT_FRAMENUM, BASE_NONE, NULL, 0x0, "Previous write transfer is in this frame", HFILL}
775 { &hf_nvmeof_write_to_host_next,
776 { "Next Write Transfer", "nvme-rdma.write_to_host_next",
777 FT_FRAMENUM, BASE_NONE, NULL, 0x0, "Next write transfer is in this frame", HFILL}
779 { &hf_nvmeof_write_to_host_unmatched,
780 { "RDMA Write Request Sent to Host (no Command Match)", "nvme-rdma.write_to_host_req",
781 FT_NONE, BASE_NONE, NULL, 0x0, NULL, HFILL}
783 { &hf_nvmeof_to_host_unknown_data,
784 { "Dissection unsupported", "nvme-rdma.unknown_data",
785 FT_BYTES, BASE_NONE, NULL, 0x0, NULL, HFILL}
787 { &hf_nvmeof_data_resp,
788 { "DATA Transfer Response", "nvme-rdma.data_resp",
789 FT_FRAMENUM, BASE_NONE, NULL, 0,
790 "DATA transfer response for this transaction is in this frame", HFILL }
792 { &hf_nvmeof_cmd_qid,
793 { "Cmd Qid", "nvme-rdma.cmd.qid",
794 FT_UINT16, BASE_HEX, NULL, 0x0,
795 "Qid on which command is issued", HFILL }
798 static int *ett[] = {
799 &ett_cm,
800 &ett_data,
803 proto_nvme_rdma = proto_register_protocol("NVM Express Fabrics RDMA",
804 NVME_FABRICS_RDMA, "nvme-rdma");
806 proto_register_field_array(proto_nvme_rdma, hf, array_length(hf));
807 proto_register_subtree_array(ett, array_length(ett));
809 /* Register preferences */
810 nvme_rdma_module = prefs_register_protocol(proto_nvme_rdma, NULL);
812 range_convert_str(wmem_epan_scope(), &gPORT_RANGE, NVME_RDMA_TCP_PORT_RANGE, MAX_TCP_PORT);
813 prefs_register_range_preference(nvme_rdma_module,
814 "subsystem_ports",
815 "Subsystem Ports Range",
816 "Range of NVMe Subsystem ports"
817 "(default " NVME_RDMA_TCP_PORT_RANGE ")",
818 &gPORT_RANGE, MAX_TCP_PORT);
821 void
822 proto_reg_handoff_nvme_rdma(void)
824 heur_dissector_add("infiniband.mad.cm.private", dissect_nvme_ib_cm,
825 "NVMe Fabrics RDMA CM packets",
826 "nvme_rdma_cm_private", proto_nvme_rdma, HEURISTIC_ENABLE);
827 heur_dissector_add("infiniband.payload", dissect_nvme_ib,
828 "NVMe Fabrics RDMA packets",
829 "nvme_rdma", proto_nvme_rdma, HEURISTIC_ENABLE);
830 ib_handler = find_dissector_add_dependency("infiniband", proto_nvme_rdma);
831 proto_ib = dissector_handle_get_protocol_index(ib_handler);
835 * Editor modelines - https://www.wireshark.org/tools/modelines.html
837 * Local variables:
838 * c-basic-offset: 4
839 * tab-width: 8
840 * indent-tabs-mode: nil
841 * End:
843 * vi: set shiftwidth=4 tabstop=8 expandtab:
844 * :indentSize=4:tabSize=8:noTabs=true: