目次
構造体
reqのiovを管理するreq->iovがあると、pduのデータを管理するpdu->iov、pdu->data_iovがある。
req->iovはR2T送信前にio_unit_sizeで確保するバッファ領域を管理する。
struct nvme_tcp_pdu {
union {
/* to hold error pdu data */
uint8_t raw[SPDK_NVME_TCP_TERM_REQ_PDU_MAX_SIZE];
struct spdk_nvme_tcp_common_pdu_hdr common;
struct spdk_nvme_tcp_ic_req ic_req;
struct spdk_nvme_tcp_term_req_hdr term_req;
struct spdk_nvme_tcp_cmd capsule_cmd;
struct spdk_nvme_tcp_h2c_data_hdr h2c_data;
struct spdk_nvme_tcp_ic_resp ic_resp;
struct spdk_nvme_tcp_rsp capsule_resp;
struct spdk_nvme_tcp_c2h_data_hdr c2h_data;
struct spdk_nvme_tcp_r2t_hdr r2t;
} hdr;
bool has_hdgst;
bool ddgst_enable;
uint32_t data_digest_crc32;
uint8_t data_digest[SPDK_NVME_TCP_DIGEST_LEN];
uint8_t ch_valid_bytes;
uint8_t psh_valid_bytes;
uint8_t psh_len;
nvme_tcp_qpair_xfer_complete_cb cb_fn;
void *cb_arg;
/* The sock request ends with a 0 length iovec. Place the actual iovec immediately
* after it. There is a static assert below to check if the compiler inserted
* any unwanted padding */
struct spdk_sock_request sock_req;
struct iovec iov[NVME_TCP_MAX_SGL_DESCRIPTORS * 2];★
struct iovec data_iov[NVME_TCP_MAX_SGL_DESCRIPTORS];★
uint32_t data_iovcnt;
uint32_t data_len;
uint32_t rw_offset;
TAILQ_ENTRY(nvme_tcp_pdu) tailq;
uint32_t remaining;
uint32_t padding_len;
struct spdk_iov_sgl sgl;
struct spdk_dif_ctx *dif_ctx;
void *req; /* data tied to a tcp request */
void *qpair;
SLIST_ENTRY(nvme_tcp_pdu) slist;
};
struct spdk_nvmf_request {
struct spdk_nvmf_qpair *qpair;
uint32_t length;
uint8_t xfer; /* type enum spdk_nvme_data_transfer */
bool data_from_pool;
bool dif_enabled;
void *data;
union nvmf_h2c_msg *cmd;
union nvmf_c2h_msg *rsp;
STAILQ_ENTRY(spdk_nvmf_request) buf_link;
uint64_t timeout_tsc;
uint32_t iovcnt;
struct iovec iov[NVMF_REQ_MAX_BUFFERS];
void *buffers[NVMF_REQ_MAX_BUFFERS];
struct spdk_nvmf_stripped_data *stripped_data;
struct spdk_nvmf_dif_info dif;
struct spdk_bdev_io_wait_entry bdev_io_wait;
spdk_nvmf_nvme_passthru_cmd_cb cmd_cb_fn;
struct spdk_nvmf_request *first_fused_req;
struct spdk_nvmf_request *req_to_abort;
struct spdk_poller *poller;
struct spdk_bdev_io *zcopy_bdev_io; /* Contains the bdev_io when using ZCOPY */
enum spdk_nvmf_zcopy_phase zcopy_phase;
TAILQ_ENTRY(spdk_nvmf_request) link;
};
iovはそれぞれsgl構造体で管理する。
struct spdk_iov_sgl {
struct iovec *iov;
int iovcnt;
uint32_t iov_offset;
uint32_t total_size;
}
req->iov初期化
R2T送信前にnvmf_tcp_req_processで、req->iovに受信バッファを確保する。
static bool
nvmf_tcp_req_process(struct spdk_nvmf_tcp_transport *ttransport,
struct spdk_nvmf_tcp_req *tcp_req)
{
(..)
case TCP_REQUEST_STATE_NEED_BUFFER:
spdk_trace_record(TRACE_TCP_REQUEST_STATE_NEED_BUFFER, tqpair->qpair.qid, 0, (uintptr_t)tcp_req,
tqpair);
assert(tcp_req->req.xfer != SPDK_NVME_DATA_NONE);
if (!tcp_req->has_in_capsule_data && (&tcp_req->req != STAILQ_FIRST(&group->pending_buf_queue))) {
SPDK_DEBUGLOG(nvmf_tcp,
"Not the first element to wait for the buf for tcp_req(%p) on tqpair=%p\n",
tcp_req, tqpair);
/* This request needs to wait in line to obtain a buffer */
break;
}
/* Try to get a data buffer */
if (nvmf_tcp_req_parse_sgl(tcp_req, transport, group) < 0) {
break;
}
static int
nvmf_tcp_req_parse_sgl(struct spdk_nvmf_tcp_req *tcp_req,
struct spdk_nvmf_transport *transport,
struct spdk_nvmf_transport_poll_group *group)
{
struct spdk_nvmf_request *req = &tcp_req->req;
struct spdk_nvme_cmd *cmd;
struct spdk_nvme_sgl_descriptor *sgl;
struct spdk_nvmf_tcp_poll_group *tgroup;
enum spdk_nvme_tcp_term_req_fes fes;
struct nvme_tcp_pdu *pdu;
struct spdk_nvmf_tcp_qpair *tqpair;
uint32_t length, error_offset = 0;
cmd = &req->cmd->nvme_cmd;
sgl = &cmd->dptr.sgl1;
if (sgl->generic.type == SPDK_NVME_SGL_TYPE_TRANSPORT_DATA_BLOCK &&
sgl->unkeyed.subtype == SPDK_NVME_SGL_SUBTYPE_TRANSPORT) {
/* get request length from sgl */
length = sgl->unkeyed.length;
if (spdk_unlikely(length > transport->opts.max_io_size)) {
SPDK_ERRLOG("SGL length 0x%x exceeds max io size 0x%x\n",
length, transport->opts.max_io_size);
fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_LIMIT_EXCEEDED;
goto fatal_err;
}
/* fill request length and populate iovs */
req->length = length;
SPDK_DEBUGLOG(nvmf_tcp, "Data requested length= 0x%x\n", length);
if (spdk_unlikely(req->dif_enabled)) {
req->dif.orig_length = length;
length = spdk_dif_get_length_with_md(length, &req->dif.dif_ctx);
req->dif.elba_length = length;
}
if (nvmf_ctrlr_use_zcopy(req)) {
SPDK_DEBUGLOG(nvmf_tcp, "Using zero-copy to execute request %p\n", tcp_req);
req->data_from_pool = false;
return 0;
}
if (spdk_nvmf_request_get_buffers(req, group, transport, length)) {★ここでreq->iovにio_unit_size分のバッファを確保。
/* No available buffers. Queue this request up. */
SPDK_DEBUGLOG(nvmf_tcp, "No available large data buffers. Queueing request %p\n",
tcp_req);
return 0;
}
/* backward compatible */
req->data = req->iov[0].iov_base;
SPDK_DEBUGLOG(nvmf_tcp, "Request %p took %d buffer/s from central pool, and data=%p\n",
tcp_req, req->iovcnt, req->data);
return 0;
} else if (sgl->generic.type == SPDK_NVME_SGL_TYPE_DATA_BLOCK &&
sgl->unkeyed.subtype == SPDK_NVME_SGL_SUBTYPE_OFFSET) {
uint64_t offset = sgl->address;
uint32_t max_len = transport->opts.in_capsule_data_size;
assert(tcp_req->has_in_capsule_data);
/* Capsule Cmd with In-capsule Data should get data length from pdu header */
tqpair = tcp_req->pdu->qpair;
/* receiving pdu is not same with the pdu in tcp_req */
pdu = tqpair->pdu_in_progress;
length = pdu->hdr.common.plen - pdu->psh_len - sizeof(struct spdk_nvme_tcp_common_pdu_hdr);
if (tqpair->host_ddgst_enable) {
length -= SPDK_NVME_TCP_DIGEST_LEN;
}
/* This error is not defined in NVMe/TCP spec, take this error as fatal error */
if (spdk_unlikely(length != sgl->unkeyed.length)) {
SPDK_ERRLOG("In-Capsule Data length 0x%x is not equal to SGL data length 0x%x\n",
length, sgl->unkeyed.length);
fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, plen);
goto fatal_err;
}
SPDK_DEBUGLOG(nvmf_tcp, "In-capsule data: offset 0x%" PRIx64 ", length 0x%x\n",
offset, length);
/* The NVMe/TCP transport does not use ICDOFF to control the in-capsule data offset. ICDOFF should be '0' */
if (spdk_unlikely(offset != 0)) {
/* Not defined fatal error in NVMe/TCP spec, handle this error as a fatal error */
SPDK_ERRLOG("In-capsule offset 0x%" PRIx64 " should be ZERO in NVMe/TCP\n", offset);
fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER;
error_offset = offsetof(struct spdk_nvme_tcp_cmd, ccsqe.dptr.sgl1.address);
goto fatal_err;
}
if (spdk_unlikely(length > max_len)) {
/* According to the SPEC we should support ICD up to 8192 bytes for admin and fabric commands */
if (length <= SPDK_NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE &&
(cmd->opc == SPDK_NVME_OPC_FABRIC || req->qpair->qid == 0)) {
/* Get a buffer from dedicated list */
SPDK_DEBUGLOG(nvmf_tcp, "Getting a buffer from control msg list\n");
tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group);
assert(tgroup->control_msg_list);
req->data = nvmf_tcp_control_msg_get(tgroup->control_msg_list);
if (!req->data) {
/* No available buffers. Queue this request up. */
SPDK_DEBUGLOG(nvmf_tcp, "No available ICD buffers. Queueing request %p\n", tcp_req);
return 0;
}
} else {
SPDK_ERRLOG("In-capsule data length 0x%x exceeds capsule length 0x%x\n",
length, max_len);
fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_LIMIT_EXCEEDED;
goto fatal_err;
}
} else {
req->data = tcp_req->buf;
}
req->length = length;
req->data_from_pool = false;
if (spdk_unlikely(req->dif_enabled)) {
length = spdk_dif_get_length_with_md(length, &req->dif.dif_ctx);
req->dif.elba_length = length;
}
req->iov[0].iov_base = req->data;
req->iov[0].iov_len = length;
req->iovcnt = 1;
return 0;
}
/* If we want to handle the problem here, then we can't skip the following data segment.
* Because this function runs before reading data part, now handle all errors as fatal errors. */
SPDK_ERRLOG("Invalid NVMf I/O Command SGL: Type 0x%x, Subtype 0x%x\n",
sgl->generic.type, sgl->generic.subtype);
fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER;
error_offset = offsetof(struct spdk_nvme_tcp_cmd, ccsqe.dptr.sgl1.generic);
fatal_err:
nvmf_tcp_send_c2h_term_req(tcp_req->pdu->qpair, tcp_req->pdu, fes, error_offset);
return -1;
}
H2C Data処理時のバッファ管理
PSH処理時は、nvmf_tcp_h2c_data_hdr_handleでPDU->iovにreq->iovで確保したバッファを割り当てる。これによりpdu->iovにて既に割り当て済のバッファ領域にアクセスできる。割り当てはnvme_tcp_pdu_set_data_bufで行う。
static int
nvmf_tcp_sock_process(struct spdk_nvmf_tcp_qpair *tqpair)
{
(..)
case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH:
rc = nvme_tcp_read_data(tqpair->sock,
pdu->psh_len - pdu->psh_valid_bytes,
(void *)&pdu->hdr.raw + sizeof(struct spdk_nvme_tcp_common_pdu_hdr) + pdu->psh_valid_bytes);
if (rc < 0) {
return NVME_TCP_PDU_FATAL;
} else if (rc > 0) {
spdk_trace_record(TRACE_TCP_READ_FROM_SOCKET_DONE, tqpair->qpair.qid, rc, 0, tqpair);
pdu->psh_valid_bytes += rc;
}
if (pdu->psh_valid_bytes < pdu->psh_len) {
return NVME_TCP_PDU_IN_PROGRESS;
}
/* All header(ch, psh, head digist) of this PDU has now been read from the socket. */
nvmf_tcp_pdu_psh_handle(tqpair, ttransport);★ここでH2C用のバッファ領域を割り当てる。
break;
(..)
case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD:
/* check whether the data is valid, if not we just return */
if (!pdu->data_len) {
return NVME_TCP_PDU_IN_PROGRESS;
}
data_len = pdu->data_len;
/* data digest */
if (spdk_unlikely((pdu->hdr.common.pdu_type != SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ) &&
tqpair->host_ddgst_enable)) {
data_len += SPDK_NVME_TCP_DIGEST_LEN;
pdu->ddgst_enable = true;
}
rc = nvme_tcp_read_payload_data(tqpair->sock, pdu);★ここで受信データを。読み取る。
if (rc < 0) {
return NVME_TCP_PDU_FATAL;
}
pdu->rw_offset += rc;
if (pdu->rw_offset < data_len) {
return NVME_TCP_PDU_IN_PROGRESS;
}
/* Generate and insert DIF to whole data block received if DIF is enabled */
if (spdk_unlikely(pdu->dif_ctx != NULL) &&
spdk_dif_generate_stream(pdu->data_iov, pdu->data_iovcnt, 0, data_len,
pdu->dif_ctx) != 0) {
SPDK_ERRLOG("DIF generate failed\n");
return NVME_TCP_PDU_FATAL;
}
/* All of this PDU has now been read from the socket. */
nvmf_tcp_pdu_payload_handle(tqpair, pdu);
break;
static void
nvmf_tcp_h2c_data_hdr_handle(struct spdk_nvmf_tcp_transport *ttransport,
struct spdk_nvmf_tcp_qpair *tqpair,
struct nvme_tcp_pdu *pdu)
{
struct spdk_nvmf_tcp_req *tcp_req;
uint32_t error_offset = 0;
enum spdk_nvme_tcp_term_req_fes fes = 0;
struct spdk_nvme_tcp_h2c_data_hdr *h2c_data;
h2c_data = &pdu->hdr.h2c_data;
SPDK_DEBUGLOG(nvmf_tcp, "tqpair=%p, r2t_info: datao=%u, datal=%u, cccid=%u, ttag=%u\n",
tqpair, h2c_data->datao, h2c_data->datal, h2c_data->cccid, h2c_data->ttag);
if (h2c_data->ttag > tqpair->resource_count) {
SPDK_DEBUGLOG(nvmf_tcp, "ttag %u is larger than allowed %u.\n", h2c_data->ttag,
tqpair->resource_count);
fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR;
error_offset = offsetof(struct spdk_nvme_tcp_h2c_data_hdr, ttag);
goto err;
}
tcp_req = &tqpair->reqs[h2c_data->ttag - 1];★qpairが管理するreq構造体をH2Cに割当。
if (spdk_unlikely(tcp_req->state != TCP_REQUEST_STATE_TRANSFERRING_HOST_TO_CONTROLLER &&
tcp_req->state != TCP_REQUEST_STATE_AWAITING_R2T_ACK)) {
SPDK_DEBUGLOG(nvmf_tcp, "tcp_req(%p), tqpair=%p, has error state in %d\n", tcp_req, tqpair,
tcp_req->state);
fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
error_offset = offsetof(struct spdk_nvme_tcp_h2c_data_hdr, ttag);
goto err;
}
if (spdk_unlikely(tcp_req->req.cmd->nvme_cmd.cid != h2c_data->cccid)) {
SPDK_DEBUGLOG(nvmf_tcp, "tcp_req(%p), tqpair=%p, expected %u but %u for cccid.\n", tcp_req, tqpair,
tcp_req->req.cmd->nvme_cmd.cid, h2c_data->cccid);
fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR;
error_offset = offsetof(struct spdk_nvme_tcp_h2c_data_hdr, cccid);
goto err;
}
if (tcp_req->h2c_offset != h2c_data->datao) {
SPDK_DEBUGLOG(nvmf_tcp,
"tcp_req(%p), tqpair=%p, expected data offset %u, but data offset is %u\n",
tcp_req, tqpair, tcp_req->h2c_offset, h2c_data->datao);
fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE;
goto err;
}
if ((h2c_data->datao + h2c_data->datal) > tcp_req->req.length) {
SPDK_DEBUGLOG(nvmf_tcp,
"tcp_req(%p), tqpair=%p, (datao=%u + datal=%u) exceeds requested length=%u\n",
tcp_req, tqpair, h2c_data->datao, h2c_data->datal, tcp_req->req.length);
fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE;
goto err;
}
pdu->req = tcp_req;
if (spdk_unlikely(tcp_req->req.dif_enabled)) {
pdu->dif_ctx = &tcp_req->req.dif.dif_ctx;
}
nvme_tcp_pdu_set_data_buf(pdu, tcp_req->req.iov, tcp_req->req.iovcnt,
h2c_data->datao, h2c_data->datal);★pdu->data_iovにtcp_req->iovを割当。
nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD);
return;
err:
nvmf_tcp_send_c2h_term_req(tqpair, pdu, fes, error_offset);
}
static void
nvme_tcp_pdu_set_data_buf(struct nvme_tcp_pdu *pdu,
struct iovec *iov /* tcp_req->req.io */, int iovcnt /* tcp_req->req.iovcnt */,
uint32_t data_offset /* = h2c_data->datao */, uint32_t data_len /* = h2c_data->datal */)
{
uint32_t buf_offset, buf_len, remain_len, len;
uint8_t *buf;
struct spdk_iov_sgl *pdu_sgl, buf_sgl;
pdu->data_len = data_len; ★pdu->data_lenにh2c_data->datalを代入。
if (spdk_likely(!pdu->dif_ctx)) {
buf_offset = data_offset;
buf_len = data_len;
} else {
spdk_dif_ctx_set_data_offset(pdu->dif_ctx, data_offset);
spdk_dif_get_range_with_md(data_offset, data_len,
&buf_offset, &buf_len, pdu->dif_ctx);
}
if (iovcnt == 1) {
_nvme_tcp_pdu_set_data(pdu, (void *)((uint64_t)iov[0].iov_base + buf_offset), buf_len);
} else {
pdu_sgl = &pdu->sgl;
spdk_iov_sgl_init(pdu_sgl, pdu->data_iov, NVME_TCP_MAX_SGL_DESCRIPTORS, 0);★pdu_sglにpdu->data_iovをれいる。
spdk_iov_sgl_init(&buf_sgl, iov, iovcnt, 0);
spdk_iov_sgl_advance(&buf_sgl, buf_offset);
remain_len = buf_len;
while (remain_len > 0) {
_nvme_tcp_sgl_get_buf(&buf_sgl, (void *)&buf, &len);
len = spdk_min(len, remain_len);
spdk_iov_sgl_advance(&buf_sgl, len);
remain_len -= len;
if (!spdk_iov_sgl_append(pdu_sgl, buf, len)) {
break;
}
}
assert(remain_len == 0);
assert(pdu_sgl->total_size == buf_len);
pdu->data_iovcnt = NVME_TCP_MAX_SGL_DESCRIPTORS - pdu_sgl->iovcnt;
}
}
/**
* Initialize struct spdk_iov_sgl with iov, iovcnt and iov_offset.
*
* \param s the spdk_iov_sgl to be filled.
* \param iov the io vector to fill the s
* \param iovcnt the size the iov
* \param iov_offset the current filled iov_offset for s.
*/
static inline void
spdk_iov_sgl_init(struct spdk_iov_sgl *s, struct iovec *iov, int iovcnt,
uint32_t iov_offset)
{
s->iov = iov;
s->iovcnt = iovcnt;
s->iov_offset = iov_offset;
s->total_size = 0;
}
Payloadデータ受信
static int
nvme_tcp_read_payload_data(struct spdk_sock *sock /* = tqpair->sock */, struct nvme_tcp_pdu *pdu)
{
struct iovec iov[NVME_TCP_MAX_SGL_DESCRIPTORS + 1];
int iovcnt;
iovcnt = nvme_tcp_build_payload_iovs(iov, NVME_TCP_MAX_SGL_DESCRIPTORS + 1, pdu,
pdu->ddgst_enable, NULL);★iovのbuild
assert(iovcnt >= 0);
return nvme_tcp_readv_data(sock, iov, iovcnt);★データ受信
}
static int
nvme_tcp_build_payload_iovs(struct iovec *iov /* = stack内のiov[] */, int iovcnt /* = NVME_TCP_MAX_SGL_DESCRIPTORS + 1 */, struct nvme_tcp_pdu *pdu,
bool ddgst_enable, uint32_t *_mapped_length)
{
struct spdk_iov_sgl *sgl;
if (iovcnt == 0) {
return 0;
}
sgl = &pdu->sgl;
spdk_iov_sgl_init(sgl, iov, iovcnt, pdu->rw_offset);
if (spdk_likely(!pdu->dif_ctx)) {
if (!_nvme_tcp_sgl_append_multi(sgl, pdu->data_iov, pdu->data_iovcnt)) {
goto end;
}
} else {
if (!_nvme_tcp_sgl_append_multi_with_md(sgl, pdu->data_iov, pdu->data_iovcnt,
pdu->data_len, pdu->dif_ctx)) {
goto end;
}
}
/* Data Digest */
if (ddgst_enable) {
spdk_iov_sgl_append(sgl, pdu->data_digest, SPDK_NVME_TCP_DIGEST_LEN);
}
end:
if (_mapped_length != NULL) {
*_mapped_length = sgl->total_size;
}
return iovcnt - sgl->iovcnt;
}
static inline bool
_nvme_tcp_sgl_append_multi(struct spdk_iov_sgl *s, struct iovec *iov /* = pdu->data_iov */, int iovcnt /* = pdu->data_iovcnt */)
{
int i;
for (i = 0; i < iovcnt; i++) {
if (!spdk_iov_sgl_append(s, iov[i].iov_base, iov[i].iov_len)) {
return false;
}
}
return true;
}
/**
* Append the data to the struct spdk_iov_sgl pointed by s
*
* \param s the address of the struct spdk_iov_sgl
* \param data the data buffer to be appended
* \param data_len the length of the data.
*
* \return true if all the data is appended.
*/
static inline bool
spdk_iov_sgl_append(struct spdk_iov_sgl *s, uint8_t *data, uint32_t data_len)
{
if (s->iov_offset >= data_len) {
s->iov_offset -= data_len;
} else {
assert(s->iovcnt > 0);
s->iov->iov_base = data + s->iov_offset;
s->iov->iov_len = data_len - s->iov_offset;
s->total_size += data_len - s->iov_offset;
s->iov_offset = 0;
s->iov++;
s->iovcnt--;
if (s->iovcnt == 0) {
return false;
}
}
return true;
}
static int
nvme_tcp_readv_data(struct spdk_sock *sock, struct iovec *iov, int iovcnt)
{
int ret;
assert(sock != NULL);
if (iov == NULL || iovcnt == 0) {
return 0;
}
if (iovcnt == 1) {
return nvme_tcp_read_data(sock, iov->iov_len, iov->iov_base);
}
ret = spdk_sock_readv(sock, iov, iovcnt);
if (ret > 0) {
return ret;
}
if (ret < 0) {
if (errno == EAGAIN || errno == EWOULDBLOCK) {
return 0;
}
/* For connect reset issue, do not output error log */
if (errno != ECONNRESET) {
SPDK_ERRLOG("spdk_sock_readv() failed, errno %d: %s\n",
errno, spdk_strerror(errno));
}
}
/* connection closed */
return NVME_TCP_CONNECTION_FATAL;
}
■元の動作
①CapsulePDU処理中:nvmf_tcp_req_parse_sglでtcp_reqにio_unitサイズのspdk_nvmf_request_get_buffersでバッファを必要な数用意。
②H2CData PDU受信:nvmf_tcp_h2c_data_hdr_handleから、nvme_tcp_pdu_set_data_bufを呼出し。
pdu_sglを初期化。buf_sglにtcp_reqのsglで割当。 spdk_iov_sgl_initで初期化。
pdu->sglを初期化。tcp_reqのバッファ領域をPDUに割当。※
③case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD→nvmf_tcp_h2c_data_payload_handle。
④nvmf_tcp_h2c_data_payload_handleでデータを全て受信したら、TCP_REQUEST_STATE_READY_TO_EXECUTEに移行。
→カーネル受信バッファから、pduのバッファ(実体はtcp_reqのバッファ)にコピー
nvmf_tcp_h2c_data_payload_handleでデータを途中まで受信したら、③に戻る。
※sglとは。
struct spdk_iov_sgl {
struct iovec *iov; # IOベクタ
int iovcnt; # IOベクタの要素数
uint32_t iov_offset; #これまで埋まっているIOベクタのオフセット
uint32_t total_size; # IOベクタのサイズ(データの長さ)
};
②確保buf(8KB) ← コピー
②確保buf(8KB) ← コピー
②確保buf(8KB) ← コピー
②確保buf(8KB) ← コピー