目次
main処理
main a
spdk_app_start a
- spdk_reactors_start a
- 99.52% reactor_run a
- 97.79% _reactor_run a
- 88.54% spdk_thread_poll a
- 82.88% thread_poll a
- 67.61% thread_execute_poller a
- 61.15% nvmf_poll_group_poll a
- 58.74% nvmf_transport_poll_group_poll a
- 57.96% nvmf_tcp_poll_group_poll a
- 56.39% spdk_sock_group_poll a
- 56.25% spdk_sock_group_poll_count a
- 54.49% sock_group_impl_poll_count a
- 45.63% epoll_wait a
- 15.33% entry_SYSCALL_64_after_hwframe - 14.74% do_syscall_64 a
10.01% syscall_enter_from_user_mode a
- 4.41% __x64_sys_epoll_wait a
- 3.68% do_epoll_wait a
- 2.48% __fdget a
2.44% __fget_light a
0.54% __x64_sys_epoll_wait a
- 5.94% posix_sock_group_impl_poll a
- 2.17% _sock_flush a
spdk_sock_prep_reqs a
0.94% __libc_enable_asynccancel a
0.74% __libc_disable_asynccancel a
0.53% posix_sock_group_impl_poll a
0.69% nvmf_tcp_poll_group_poll a
1.01% malloc_completion_poller a
0.70% accel_comp_poll a
- 5.29% msg_queue_run_batch a
spdk_ring_dequeue a
1.17% __memset_avx2_unaligned a
0.83% accel_comp_poll a
0.60% nvmf_poll_group_poll a
0.53% malloc_completion_poller a
- 3.00% spdk_get_ticks a
- 2.85% rte_get_timer_cycles a
- 2.61% rte_get_tsc_cycles a
rte_rdtsc a
0.65% thread_update_stats a
- 2.65% event_queue_run_batch a
spdk_ring_dequeue a
- 1.26% reactor_post_process_lw_thread a
0.53% spdk_thread_is_exited a
0.60% __memset_avx2_unaligned a
0.54% spdk_thread_get_last_tsc
2.10% nvmf_transport_poll_group_poll a
nvmf_tcp_poll_group_poll a
0.55% spdk_sock_group_poll a
spdk_sock_group_poll_count a
0.53% sock_group_impl_poll_count a
1.30% __libc_start_main a main a spdk_app_start a spdk_reactors_start a reactor_run a _reactor_run a spdk_thread_poll a thread_poll a
thread_execute_poller a
1.16% nvmf_poll_group_poll a
nvmf_transport_poll_group_poll
SPDKの構成要素
Poller/SPDK thread
スレッド初期化
SPDKはtgt初期化時に、nvmfの処理起点となるnvmf_poll_group_pollをpollグループに登録する。以降、SPDKのreactorスレッドがnvmf_poll_group_pollから、tcpソケット処理を開始する。
nvmf_tgt_create_poll_group
static int
nvmf_tgt_create_poll_group(void *io_device, void *ctx_buf)
{
struct spdk_nvmf_tgt *tgt = io_device;
struct spdk_nvmf_poll_group *group = ctx_buf;
struct spdk_nvmf_transport *transport;
struct spdk_thread *thread = spdk_get_thread();
uint32_t sid;
int rc;
★spdk_nvmf_poll_groupの初期化。
TAILQ_INIT(&group->tgroups); ★スレッドグループ初期化
TAILQ_INIT(&group->qpairs); ★qpairグループ初期化
group->thread = thread;
group->poller = SPDK_POLLER_REGISTER(nvmf_poll_group_poll, group, 0);
SPDK_DTRACE_PROBE1(nvmf_create_poll_group, spdk_thread_get_id(thread));
★nvmfの全てのトランスポートにpollグループを追加
TAILQ_FOREACH(transport, &tgt->transports, link) {
rc = nvmf_poll_group_add_transport(group, transport);
if (rc != 0) {
nvmf_tgt_cleanup_poll_group(group);
return rc;
}
}
group->num_sgroups = tgt->max_subsystems;
group->sgroups = calloc(tgt->max_subsystems, sizeof(struct spdk_nvmf_subsystem_poll_group));
if (!group->sgroups) {
nvmf_tgt_cleanup_poll_group(group);
return -ENOMEM;
}
for (sid = 0; sid < tgt->max_subsystems; sid++) {
struct spdk_nvmf_subsystem *subsystem;
subsystem = tgt->subsystems[sid];
if (!subsystem) {
continue;
}
if (nvmf_poll_group_add_subsystem(group, subsystem, NULL, NULL) != 0) {
nvmf_tgt_cleanup_poll_group(group);
return -1;
}
}
pthread_mutex_lock(&tgt->mutex);
TAILQ_INSERT_TAIL(&tgt->poll_groups, group, link);
pthread_mutex_unlock(&tgt->mutex);
return 0;
}c
spdk_nvmf_tgt
struct spdk_nvmf_tgt *
spdk_nvmf_tgt_create(struct spdk_nvmf_target_opts *opts)
{
struct spdk_nvmf_tgt *tgt, *tmp_tgt;
if (strnlen(opts->name, NVMF_TGT_NAME_MAX_LENGTH) == NVMF_TGT_NAME_MAX_LENGTH) {
SPDK_ERRLOG("Provided target name exceeds the max length of %u.\n", NVMF_TGT_NAME_MAX_LENGTH);
return NULL;
}
TAILQ_FOREACH(tmp_tgt, &g_nvmf_tgts, link) {
if (!strncmp(opts->name, tmp_tgt->name, NVMF_TGT_NAME_MAX_LENGTH)) {
SPDK_ERRLOG("Provided target name must be unique.\n");
return NULL;
}
}
tgt = calloc(1, sizeof(*tgt));
if (!tgt) {
return NULL;
}
snprintf(tgt->name, NVMF_TGT_NAME_MAX_LENGTH, "%s", opts->name);
if (!opts || !opts->max_subsystems) {
tgt->max_subsystems = SPDK_NVMF_DEFAULT_MAX_SUBSYSTEMS;
} else {
tgt->max_subsystems = opts->max_subsystems;
}
if (!opts) {
tgt->crdt[0] = 0;
tgt->crdt[1] = 0;
tgt->crdt[2] = 0;
} else {
tgt->crdt[0] = opts->crdt[0];
tgt->crdt[1] = opts->crdt[1];
tgt->crdt[2] = opts->crdt[2];
}
if (!opts) {
tgt->discovery_filter = SPDK_NVMF_TGT_DISCOVERY_MATCH_ANY;
} else {
tgt->discovery_filter = opts->discovery_filter;
}
tgt->discovery_genctr = 0;
TAILQ_INIT(&tgt->transports);
TAILQ_INIT(&tgt->poll_groups);
tgt->subsystems = calloc(tgt->max_subsystems, sizeof(struct spdk_nvmf_subsystem *));
if (!tgt->subsystems) {
free(tgt);
return NULL;
}
pthread_mutex_init(&tgt->mutex, NULL);
spdk_io_device_register(tgt,
nvmf_tgt_create_poll_group,
nvmf_tgt_destroy_poll_group,
sizeof(struct spdk_nvmf_poll_group),
tgt->name);
TAILQ_INSERT_HEAD(&g_nvmf_tgts, tgt, link);
return tgt;
}
SetProperty
nvmf_property_set
spdk_nvmf_ctrlrオブジェクトにpropertyを設定する。
static int
nvmf_property_set(struct spdk_nvmf_request *req)
{
struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
struct spdk_nvmf_fabric_prop_set_cmd *cmd = &req->cmd->prop_set_cmd;
struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
const struct nvmf_prop *prop;
uint64_t value;
uint8_t size;
bool ret;
SPDK_DEBUGLOG(nvmf, "size %d, offset 0x%x, value 0x%" PRIx64 "\n",
cmd->attrib.size, cmd->ofst, cmd->value.u64);
★attributeでPropertyのサイズを指定。
switch (cmd->attrib.size) {
case SPDK_NVMF_PROP_SIZE_4:
size = 4;
break;
case SPDK_NVMF_PROP_SIZE_8:
size = 8;
break;
default:
SPDK_DEBUGLOG(nvmf, "Invalid size value %d\n", cmd->attrib.size);
response->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
response->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_PARAM;
return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
}
prop = find_prop(cmd->ofst, size);
if (prop == NULL || prop->set_cb == NULL) {
SPDK_INFOLOG(nvmf, "Invalid offset 0x%x\n", cmd->ofst);
response->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
response->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_PARAM;
return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
}
SPDK_DEBUGLOG(nvmf, "name: %s\n", prop->name);
value = cmd->value.u64;
if (prop->size == 4) {
ret = prop->set_cb(ctrlr, (uint32_t)value);
} else if (size != prop->size) {
/* The size must be 4 and the prop->size is 8. Figure out which part of the property to write. */
assert(size == 4);
assert(prop->size == 8);
if (cmd->ofst == prop->ofst) {
ret = prop->set_cb(ctrlr, (uint32_t)value);
} else {
ret = prop->set_upper_cb(ctrlr, (uint32_t)value);
}
} else {
ret = prop->set_cb(ctrlr, (uint32_t)value);
if (ret) {
ret = prop->set_upper_cb(ctrlr, (uint32_t)(value >> 32));
}
}
if (!ret) {
SPDK_ERRLOG("prop set_cb failed\n");
response->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
response->status.sc = SPDK_NVMF_FABRIC_SC_INVALID_PARAM;
return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
}
return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
}
Discovery
(gdb) bt
#0 nvmf_generate_discovery_log (tgt=0x7369685420202e6d, hostnqn=0x6574737973627573 <error: Cannot access memory at address 0x6574737973627573>,
log_page_size=0x20797265766f6373, cmd_source_trid=0x696420656874206f) at ctrlr_discovery.c:70
#1 0x000055ab16988a1e in nvmf_get_discovery_log_page (tgt=0x55ab176855f0,
hostnqn=0x55ab1769e712 "nqn.2014-08.org.nvmexpress:uuid:a7da905e-547a-4c8c-878a-67ab9bffcfa9", iov=0x55ab1774b400, iovcnt=1, offset=0, length=16,
cmd_source_trid=0x7ffe82071810) at ctrlr_discovery.c:185
#2 0x000055ab16982ee0 in nvmf_ctrlr_get_log_page (req=0x55ab1774b3c0) at ctrlr.c:2470
#3 0x000055ab16985744 in nvmf_ctrlr_process_admin_cmd (req=0x55ab1774b3c0) at ctrlr.c:3344
#4 0x000055ab16987c91 in spdk_nvmf_request_exec (req=0x55ab1774b3c0) at ctrlr.c:4304
#5 0x000055ab169a68c6 in nvmf_tcp_req_process (ttransport=0x55ab176a1820, tcp_req=0x55ab1774b3c0) at tcp.c:2876
#6 0x000055ab169a288e in nvmf_tcp_capsule_cmd_hdr_handle (ttransport=0x55ab176a1820, tqpair=0x55ab1769c5e0, pdu=0x2000164f3bb0) at tcp.c:1516
#7 0x000055ab169a485e in nvmf_tcp_sock_process (tqpair=0x55ab1769c5e0) at tcp.c:2190
#8 0x000055ab169a716f in nvmf_tcp_sock_cb (arg=0x55ab1769c5e0, group=0x55ab176a1930, sock=0x55ab1769b9c0) at tcp.c:3009
#9 0x000055ab16a34bf4 in sock_group_impl_poll_count (group_impl=0x55ab176a1c00, group=0x55ab176a1930, max_events=32) at sock.c:695
#10 0x000055ab16a34c8e in spdk_sock_group_poll_count (group=0x55ab176a1930, max_events=32) at sock.c:721
#11 0x000055ab16a34aed in spdk_sock_group_poll (group=0x55ab176a1930) at sock.c:672
#12 0x000055ab169a779c in nvmf_tcp_poll_group_poll (group=0x55ab176a1b60) at tcp.c:3162
#13 0x000055ab1699d440 in nvmf_transport_poll_group_poll (group=0x55ab176a1b60) at transport.c:582
#14 0x000055ab16992889 in nvmf_poll_group_poll (ctx=0x55ab17687ee0) at nvmf.c:71
#15 0x000055ab16a2c802 in thread_execute_poller (thread=0x55ab17687af0, poller=0x55ab17687f60) at thread.c:795
#16 0x000055ab16a2ccb2 in thread_poll (thread=0x55ab17687af0, max_msgs=0, now=12220954473300) at thread.c:917
#17 0x000055ab16a2cde3 in spdk_thread_poll (thread=0x55ab17687af0, max_msgs=0, now=12220954473300) at thread.c:968
#18 0x000055ab169fcc1b in _reactor_run (reactor=0x55ab176841c0) at reactor.c:901
#19 0x000055ab169fcd0d in reactor_run (arg=0x55ab176841c0) at reactor.c:939
#20 0x000055ab169fd0e1 in spdk_reactors_start () at reactor.c:1041
#21 0x000055ab169f99a9 in spdk_app_start (opts_user=0x7ffe82072350, start_fn=0x55ab168ce0ba <nvmf_tgt_started>, arg1=0x0) at app.c:628
#22 0x000055ab168ce206 in main (argc=1, argv=0x7ffe82072518) at nvmf_main.c:47
Connect時のキュー作成
SPDKはnvmf_tcp_handle_connectで、ホストからの接続要求を処理する。
#0 spdk_nvmf_tgt_new_qpair (tgt=0x55ab1769b9c0, qpair=0x55ab1769c800) at nvmf.c:861
#1 0x000055ab169a1b67 in nvmf_tcp_handle_connect (transport=0x55ab176a1820, port=0x55ab177f0e60, sock=0x55ab1769b9c0) at tcp.c:1188
#2 0x000055ab169a1bc2 in nvmf_tcp_port_accept (transport=0x55ab176a1820, port=0x55ab177f0e60) at tcp.c:1204
#3 0x000055ab169a1c1e in nvmf_tcp_accept (ctx=0x55ab176a1820) at tcp.c:1221
#4 0x000055ab16a2ca91 in thread_execute_timed_poller (thread=0x55ab17684780, poller=0x55ab176a1970, now=12682910563652) at thread.c:854
#5 0x000055ab16a2cd3d in thread_poll (thread=0x55ab17684780, max_msgs=0, now=12682910563652) at thread.c:942
#6 0x000055ab16a2cde3 in spdk_thread_poll (thread=0x55ab17684780, max_msgs=0, now=12682910563652) at thread.c:968
#7 0x000055ab169fcc1b in _reactor_run (reactor=0x55ab176841c0) at reactor.c:901
#8 0x000055ab169fcd0d in reactor_run (arg=0x55ab176841c0) at reactor.c:939
#9 0x000055ab169fd0e1 in spdk_reactors_start () at reactor.c:1041
#10 0x000055ab169f99a9 in spdk_app_start (opts_user=0x7ffe82072350, start_fn=0x55ab168ce0ba <nvmf_tgt_started>, arg1=0x0) at app.c:628
#11 0x000055ab168ce206 in main (argc=1, argv=0x7ffe82072518) at nvmf_main.c:47
nvmf_tcp_accept
static int
nvmf_tcp_accept(void *ctx)
{
struct spdk_nvmf_transport *transport = ctx;
struct spdk_nvmf_tcp_transport *ttransport;
struct spdk_nvmf_tcp_port *port;
uint32_t count = 0;
ttransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_tcp_transport, transport);★spdk_nvmd_transport構造体を、spdk_nvmf_tcp_transport構造体のメンバにセット。
TAILQ_FOREACH(port, &ttransport->ports, link) {
★TCPポートのaccept処理を呼び出す。
count += nvmf_tcp_port_accept(transport, port);
}
return count > 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE;
}
nvmf_tcp_port_accept
nvmf_tcp_port_accept(struct spdk_nvmf_transport *transport, struct spdk_nvmf_tcp_port *port)
{
struct spdk_sock *sock;
uint32_t count = 0;
int i;
for (i = 0; i < NVMF_TCP_MAX_ACCEPT_SOCK_ONE_TIME; i++) {
★ホストからの接続要求のaccept待ち。
sock = spdk_sock_accept(port->listen_sock);
if (sock == NULL) {
break;
}
count++;
★ホストから接続要求があったので、接続要求を処理する。
nvmf_tcp_handle_connect(transport, port, sock);
}
return count;
}
nvmf_tcp_handle_connect
static void
nvmf_tcp_handle_connect(struct spdk_nvmf_transport *transport,
struct spdk_nvmf_tcp_port *port,
struct spdk_sock *sock)
{
struct spdk_nvmf_tcp_qpair *tqpair;
int rc;
SPDK_DEBUGLOG(nvmf_tcp, "New connection accepted on %s port %s\n",
port->trid->traddr, port->trid->trsvcid);
★新しいコネクション用にtcp_qpairを作成。
tqpair = calloc(1, sizeof(struct spdk_nvmf_tcp_qpair));
if (tqpair == NULL) {
SPDK_ERRLOG("Could not allocate new connection.\n");
spdk_sock_close(&sock);
return;
}
tqpair->sock = sock;
tqpair->state_cntr[TCP_REQUEST_STATE_FREE] = 0;
tqpair->port = port;
tqpair->qpair.transport = transport;
rc = spdk_sock_getaddr(tqpair->sock, tqpair->target_addr,
sizeof(tqpair->target_addr), &tqpair->target_port,
tqpair->initiator_addr, sizeof(tqpair->initiator_addr),
&tqpair->initiator_port);
if (rc < 0) {
SPDK_ERRLOG("spdk_sock_getaddr() failed of tqpair=%p\n", tqpair);
nvmf_tcp_qpair_destroy(tqpair);
return;
}
★qpairの初期化処理。spdk_nvmf_transport構造体に設定されたtgtとqpairを引数に渡す。
spdk_nvmf_tgt_new_qpair(transport->tgt, &tqpair->qpair);
}
spdk_nvmf_tgt_new_qpair
void
spdk_nvmf_tgt_new_qpair(struct spdk_nvmf_tgt *tgt, struct spdk_nvmf_qpair *qpair)
{
struct spdk_nvmf_poll_group *group;
struct nvmf_new_qpair_ctx *ctx;
group = spdk_nvmf_get_optimal_poll_group(qpair);
if (group == NULL) {
if (tgt->next_poll_group == NULL) {
tgt->next_poll_group = TAILQ_FIRST(&tgt->poll_groups);
if (tgt->next_poll_group == NULL) {
SPDK_ERRLOG("No poll groups exist.\n");
spdk_nvmf_qpair_disconnect(qpair, NULL, NULL);
return;
}
}
group = tgt->next_poll_group;
tgt->next_poll_group = TAILQ_NEXT(group, link);
}
ctx = calloc(1, sizeof(*ctx));
if (!ctx) {
SPDK_ERRLOG("Unable to send message to poll group.\n");
spdk_nvmf_qpair_disconnect(qpair, NULL, NULL);
return;
}
ctx->qpair = qpair;
ctx->group = group;
★ポーリング対象をセット。
spdk_thread_send_msg(group->thread, _nvmf_poll_group_add, ctx);
}
Write
(gdb) bt
#0 nvmf_bdev_ctrlr_write_cmd (bdev=0x55ab17699a70, desc=0x2000165a7000, ch=0x1000, req=0x7ffe820719b0) at ctrlr_bdev.c:294
#1 0x000055ab16986e76 in nvmf_ctrlr_process_io_cmd (req=0x55ab17762550) at ctrlr.c:4008
#2 0x000055ab16987ca2 in spdk_nvmf_request_exec (req=0x55ab17762550) at ctrlr.c:4306
#3 0x000055ab169a68c6 in nvmf_tcp_req_process (ttransport=0x55ab176a1820, tcp_req=0x55ab17762550) at tcp.c:2876
#4 0x000055ab169a29f9 in nvmf_tcp_capsule_cmd_payload_handle (ttransport=0x55ab176a1820, tqpair=0x55ab1769b2a0, pdu=0x2000164f3bb0) at tcp.c:1552
#5 0x000055ab169a35c7 in _nvmf_tcp_pdu_payload_handle (tqpair=0x55ab1769b2a0, pdu=0x2000164f3bb0) at tcp.c:1813
#6 0x000055ab169a3973 in nvmf_tcp_pdu_payload_handle (tqpair=0x55ab1769b2a0, pdu=0x2000164f3bb0) at tcp.c:1877
#7 0x000055ab169a4978 in nvmf_tcp_sock_process (tqpair=0x55ab1769b2a0) at tcp.c:2224
#8 0x000055ab169a716f in nvmf_tcp_sock_cb (arg=0x55ab1769b2a0, group=0x55ab176a1930, sock=0x55ab17699a70) at tcp.c:3009
#9 0x000055ab16a34bf4 in sock_group_impl_poll_count (group_impl=0x55ab176a1c00, group=0x55ab176a1930, max_events=32) at sock.c:695
#10 0x000055ab16a34c8e in spdk_sock_group_poll_count (group=0x55ab176a1930, max_events=32) at sock.c:721
#11 0x000055ab16a34aed in spdk_sock_group_poll (group=0x55ab176a1930) at sock.c:672
#12 0x000055ab169a779c in nvmf_tcp_poll_group_poll (group=0x55ab176a1b60) at tcp.c:3162
#13 0x000055ab1699d440 in nvmf_transport_poll_group_poll (group=0x55ab176a1b60) at transport.c:582
#14 0x000055ab16992889 in nvmf_poll_group_poll (ctx=0x55ab17687ee0) at nvmf.c:71
#15 0x000055ab16a2c802 in thread_execute_poller (thread=0x55ab17687af0, poller=0x55ab17687f60) at thread.c:795
#16 0x000055ab16a2ccb2 in thread_poll (thread=0x55ab17687af0, max_msgs=0, now=16896592086508) at thread.c:917
#17 0x000055ab16a2cde3 in spdk_thread_poll (thread=0x55ab17687af0, max_msgs=0, now=16896592086508) at thread.c:968
#18 0x000055ab169fcc1b in _reactor_run (reactor=0x55ab176841c0) at reactor.c:901
#19 0x000055ab169fcd0d in reactor_run (arg=0x55ab176841c0) at reactor.c:939
#20 0x000055ab169fd0e1 in spdk_reactors_start () at reactor.c:1041
#21 0x000055ab169f99a9 in spdk_app_start (opts_user=0x7ffe82072350, start_fn=0x55ab168ce0ba <nvmf_tgt_started>, arg1=0x0) at app.c:628
#22 0x000055ab168ce206 in main (argc=1, argv=0x7ffe82072518) at nvmf_main.c:47
nvmf_tcp_sock_process
static int
nvmf_tcp_sock_process(struct spdk_nvmf_tcp_qpair *tqpair)
{
int rc = 0;
struct nvme_tcp_pdu *pdu;
enum nvme_tcp_pdu_recv_state prev_state;
uint32_t data_len;
struct spdk_nvmf_tcp_transport *ttransport = SPDK_CONTAINEROF(tqpair->qpair.transport,
struct spdk_nvmf_tcp_transport, transport);
/* The loop here is to allow for several back-to-back state changes. */
do {
prev_state = tqpair->recv_state;
SPDK_DEBUGLOG(nvmf_tcp, "tqpair(%p) recv pdu entering state %d\n", tqpair, prev_state);
pdu = tqpair->pdu_in_progress;
assert(pdu || tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
switch (tqpair->recv_state) {
/* Wait for the common header */
case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY:
★一番最初の状態。NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CHに遷移。
if (!pdu) {
pdu = SLIST_FIRST(&tqpair->tcp_pdu_free_queue);
if (spdk_unlikely(!pdu)) {
return NVME_TCP_PDU_IN_PROGRESS;
}
SLIST_REMOVE_HEAD(&tqpair->tcp_pdu_free_queue, slist);
tqpair->pdu_in_progress = pdu;
}
memset(pdu, 0, offsetof(struct nvme_tcp_pdu, qpair));
nvmf_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH);
/* FALLTHROUGH */
case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH:
★ここで、TCPのデータを読む。
if (spdk_unlikely(tqpair->state == NVME_TCP_QPAIR_STATE_INITIALIZING)) {
return rc;
}
rc = nvme_tcp_read_data(tqpair->sock,
sizeof(struct spdk_nvme_tcp_common_pdu_hdr) - pdu->ch_valid_bytes,
(void *)&pdu->hdr.common + pdu->ch_valid_bytes);
if (rc < 0) {
SPDK_DEBUGLOG(nvmf_tcp, "will disconnect tqpair=%p\n", tqpair);
return NVME_TCP_PDU_FATAL;
} else if (rc > 0) {
pdu->ch_valid_bytes += rc;
spdk_trace_record(TRACE_TCP_READ_FROM_SOCKET_DONE, tqpair->qpair.qid, rc, 0, tqpair);
}
if (pdu->ch_valid_bytes < sizeof(struct spdk_nvme_tcp_common_pdu_hdr)) {
return NVME_TCP_PDU_IN_PROGRESS;
}
/* The command header of this PDU has now been read from the socket. */
nvmf_tcp_pdu_ch_handle(tqpair);
break;
/* Wait for the pdu specific header */
case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH:
rc = nvme_tcp_read_data(tqpair->sock,
pdu->psh_len - pdu->psh_valid_bytes,
(void *)&pdu->hdr.raw + sizeof(struct spdk_nvme_tcp_common_pdu_hdr) + pdu->psh_valid_bytes);
if (rc < 0) {
return NVME_TCP_PDU_FATAL;
} else if (rc > 0) {
spdk_trace_record(TRACE_TCP_READ_FROM_SOCKET_DONE, tqpair->qpair.qid, rc, 0, tqpair);
pdu->psh_valid_bytes += rc;
}
if (pdu->psh_valid_bytes < pdu->psh_len) {
return NVME_TCP_PDU_IN_PROGRESS;
}
/* All header(ch, psh, head digist) of this PDU has now been read from the socket. */
nvmf_tcp_pdu_psh_handle(tqpair, ttransport);
break;
/* Wait for the req slot */
case NVME_TCP_PDU_RECV_STATE_AWAIT_REQ:
nvmf_tcp_capsule_cmd_hdr_handle(ttransport, tqpair, pdu);★コマンドヘッダの処理。
break;
case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD:
/* check whether the data is valid, if not we just return */
if (!pdu->data_len) {
return NVME_TCP_PDU_IN_PROGRESS;
}
data_len = pdu->data_len;
/* data digest */
★データDigest要の設定。ここではPDU構造体のフラグを立てるだけで、実際のチェックはしない。
if (spdk_unlikely((pdu->hdr.common.pdu_type != SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ) &&
tqpair->host_ddgst_enable)) {
data_len += SPDK_NVME_TCP_DIGEST_LEN;
pdu->ddgst_enable = true;
}
★ここでペイロードを読む。nvme_tcp_read_payload_dataの延長でbdevにWriteを発行。
rc = nvme_tcp_read_payload_data(tqpair->sock, pdu);
if (rc < 0) {
return NVME_TCP_PDU_FATAL;
}
pdu->rw_offset += rc;
if (spdk_unlikely(pdu->dif_ctx != NULL)) {
rc = nvmf_tcp_pdu_payload_insert_dif(pdu, pdu->rw_offset - rc, rc);
if (rc != 0) {
return NVME_TCP_PDU_FATAL;
}
}
if (pdu->rw_offset < data_len) {
return NVME_TCP_PDU_IN_PROGRESS;
}
/* All of this PDU has now been read from the socket. */
nvmf_tcp_pdu_payload_handle(tqpair, pdu);
break;
case NVME_TCP_PDU_RECV_STATE_ERROR:
if (!spdk_sock_is_connected(tqpair->sock)) {
return NVME_TCP_PDU_FATAL;
}
break;
default:
SPDK_ERRLOG("The state(%d) is invalid\n", tqpair->recv_state);
abort();
break;
}
} while (tqpair->recv_state != prev_state);
return rc;
}
nvmf_ctrlr_process_io_cmd
io_cmdの起点。ここで、各種IOチェックを行い、個別のコマンド処理を呼び出す。もし、fuseオペレーションの場合、nvmf_ctrlr_process_io_fused_cmdを呼び出しfuse処理する。reservation関連のコマンドではsubsystemスレッドにreservation要求を出す。
<IOチェック>
- Namespaceが存在するか?
- ANAが有効の場合に、最適化されているか?
- reservation conflictがないか?
int
nvmf_ctrlr_process_io_cmd(struct spdk_nvmf_request *req)
{
uint32_t nsid;
struct spdk_nvmf_ns *ns;
struct spdk_bdev *bdev;
struct spdk_bdev_desc *desc;
struct spdk_io_channel *ch;
struct spdk_nvmf_poll_group *group = req->qpair->group;
struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
struct spdk_nvmf_subsystem_pg_ns_info *ns_info;
★ANAの状態を持つ。
enum spdk_nvme_ana_state ana_state;
/* pre-set response details for this command */
response->status.sc = SPDK_NVME_SC_SUCCESS;
nsid = cmd->nsid;
if (spdk_unlikely(ctrlr == NULL)) {
SPDK_ERRLOG("I/O command sent before CONNECT\n");
response->status.sct = SPDK_NVME_SCT_GENERIC;
response->status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR;
return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
}
if (spdk_unlikely(ctrlr->vcprop.cc.bits.en != 1)) {
SPDK_ERRLOG("I/O command sent to disabled controller\n");
response->status.sct = SPDK_NVME_SCT_GENERIC;
response->status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR;
return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
}
★namespaceをsusbsytem構造体がから取得する。
ns = _nvmf_subsystem_get_ns(ctrlr->subsys, nsid);
if (ns == NULL || ns->bdev == NULL) {
SPDK_DEBUGLOG(nvmf, "Unsuccessful query for nsid %u\n", cmd->nsid);
response->status.sc = SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT;
response->status.dnr = 1;
return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
}
★ana_stateを出力する。ANAがOPTIMIZED_STATEまたはSPDK_NVME_ANA_NON_OPTIMIZED_STATE出ないア場合、IO失敗を応答する。
ana_state = nvmf_ctrlr_get_ana_state(ctrlr, ns->anagrpid);
if (spdk_unlikely(ana_state != SPDK_NVME_ANA_OPTIMIZED_STATE &&
ana_state != SPDK_NVME_ANA_NON_OPTIMIZED_STATE)) {
SPDK_DEBUGLOG(nvmf, "Fail I/O command due to ANA state %d\n",
ana_state);
response->status.sct = SPDK_NVME_SCT_PATH;
response->status.sc = _nvme_ana_state_to_path_status(ana_state);
return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
}
if (spdk_likely(ctrlr->listener != NULL)) {
SPDK_DTRACE_PROBE3(nvmf_request_io_exec_path, req,
ctrlr->listener->trid->traddr,
ctrlr->listener->trid->trsvcid);
}
/* scan-build falsely reporting dereference of null pointer */
assert(group != NULL && group->sgroups != NULL);
ns_info = &group->sgroups[ctrlr->subsys->id].ns_info[nsid - 1];
★reservation conflictのチェック。
if (nvmf_ns_reservation_request_check(ns_info, ctrlr, req)) {
SPDK_DEBUGLOG(nvmf, "Reservation Conflict for nsid %u, opcode %u\n",
cmd->nsid, cmd->opc);
return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
}
bdev = ns->bdev;
desc = ns->desc;
ch = ns_info->channel;
★fuse処理。
if (spdk_unlikely(cmd->fuse & SPDK_NVME_CMD_FUSE_MASK)) {
return nvmf_ctrlr_process_io_fused_cmd(req, bdev, desc, ch);
} else if (spdk_unlikely(req->qpair->first_fused_req != NULL)) {
struct spdk_nvme_cpl *fused_response = &req->qpair->first_fused_req->rsp->nvme_cpl;
SPDK_ERRLOG("Expected second of fused commands - failing first of fused commands\n");
/* abort req->qpair->first_fused_request and continue with new command */
fused_response->status.sc = SPDK_NVME_SC_ABORTED_MISSING_FUSED;
fused_response->status.sct = SPDK_NVME_SCT_GENERIC;
_nvmf_request_complete(req->qpair->first_fused_req);
req->qpair->first_fused_req = NULL;
}
if (spdk_nvmf_request_using_zcopy(req)) {
assert(req->zcopy_phase == NVMF_ZCOPY_PHASE_INIT);
return nvmf_bdev_ctrlr_zcopy_start(bdev, desc, ch, req);
} else {
switch (cmd->opc) {
case SPDK_NVME_OPC_READ:
return nvmf_bdev_ctrlr_read_cmd(bdev, desc, ch, req);
case SPDK_NVME_OPC_WRITE:
return nvmf_bdev_ctrlr_write_cmd(bdev, desc, ch, req);
case SPDK_NVME_OPC_COMPARE:
return nvmf_bdev_ctrlr_compare_cmd(bdev, desc, ch, req);
case SPDK_NVME_OPC_WRITE_ZEROES:
return nvmf_bdev_ctrlr_write_zeroes_cmd(bdev, desc, ch, req);
case SPDK_NVME_OPC_FLUSH:
return nvmf_bdev_ctrlr_flush_cmd(bdev, desc, ch, req);
case SPDK_NVME_OPC_DATASET_MANAGEMENT:
return nvmf_bdev_ctrlr_dsm_cmd(bdev, desc, ch, req);
★subsystemスレッドにreservation要求
case SPDK_NVME_OPC_RESERVATION_REGISTER:
case SPDK_NVME_OPC_RESERVATION_ACQUIRE:
case SPDK_NVME_OPC_RESERVATION_RELEASE:
case SPDK_NVME_OPC_RESERVATION_REPORT:
spdk_thread_send_msg(ctrlr->subsys->thread, nvmf_ns_reservation_request, req);
return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
default:
return nvmf_bdev_ctrlr_nvme_passthru_io(bdev, desc, ch, req);
}
}
}
_nvmf_subsystem_get_ns
subsystemのnamespaceを返す。subsystemないで、spdk_nvmf_ns構造体の配列を持ち、引数で渡されたnsidに対応するnamespaceを返す。
static inline struct spdk_nvmf_ns *
_nvmf_subsystem_get_ns(struct spdk_nvmf_subsystem *subsystem, uint32_t nsid)
{
/* NOTE: This implicitly also checks for 0, since 0 - 1 wraps around to UINT32_MAX. */
if (spdk_unlikely(nsid - 1 >= subsystem->max_nsid)) {
return NULL;
}
return subsystem->ns[nsid - 1];
}
nvmf_bdev_ctrlr_write_cmd
nvmfとbdevの切れ目。ここからbdevのspdk_bdev_writev_blocksを呼び出す。
nvmfでは、bdev層への要求発行と応答作成を担当するが、キューやバッファ確保などはbdevに任せる。
引数はspdk_nvmf_requestに含まれるiovとio数をそのまま使う。spdk_bdev_writev_blocksのコールバック関数に、nvmf_bdev_ctrlr_complete_cmdを指定する。
int
nvmf_bdev_ctrlr_write_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
{
uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
uint32_t block_size = spdk_bdev_get_block_size(bdev);
struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl; ★cpl=capsule?
uint64_t start_lba;
uint64_t num_blocks;
int rc;
nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
SPDK_ERRLOG("end of media\n");
rsp->status.sct = SPDK_NVME_SCT_GENERIC;
rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
}
if (spdk_unlikely(num_blocks * block_size > req->length)) {
SPDK_ERRLOG("Write NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
num_blocks, block_size, req->length);
rsp->status.sct = SPDK_NVME_SCT_GENERIC;
rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
}
assert(!spdk_nvmf_request_using_zcopy(req));
★bdev層にIOを発行。
rc = spdk_bdev_writev_blocks(desc, ch, req->iov, req->iovcnt, start_lba, num_blocks,
nvmf_bdev_ctrlr_complete_cmd, req);
if (spdk_unlikely(rc)) {
if (rc == -ENOMEM) {
nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
}
rsp->status.sct = SPDK_NVME_SCT_GENERIC;
rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
}
return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
}
spdk_bdev_writev_blocks
bdev_writev_blocks_with_mdを呼び出すだけ。
int
spdk_bdev_writev_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
struct iovec *iov, int iovcnt,
uint64_t offset_blocks, uint64_t num_blocks,
spdk_bdev_io_completion_cb cb, void *cb_arg)
{
return bdev_writev_blocks_with_md(desc, ch, iov, iovcnt, NULL, offset_blocks,
num_blocks, cb, cb_arg, NULL, false);
}
bdev_writev_blocks_with_md
bdev_writevのbdevラッパー層の起点。bdevラッパ層では、spdk_bdevオブジェクトにbioを投げる前の排他やIO分割、QoSを担当する。
まず最初にwrite可否の判定。
write可否の判定内容
- write要求か?
- アクセス範囲がbdevのサイズを超えていないか?
- bdev_channelにメモリ確保できるか?
static int
bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
struct iovec *iov, int iovcnt, void *md_buf,
uint64_t offset_blocks, uint64_t num_blocks,
spdk_bdev_io_completion_cb cb, void *cb_arg,
struct spdk_bdev_ext_io_opts *opts, bool copy_opts)
{
struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
struct spdk_bdev_io *bdev_io;
struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
if (!desc->write) {
return -EBADF;
}
if (!bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) {
return -EINVAL;
}
bdev_io = bdev_channel_get_io(channel);
if (!bdev_io) {
return -ENOMEM;
}
bdev_io->internal.ch = channel;
bdev_io->internal.desc = desc;
bdev_io->type = SPDK_BDEV_IO_TYPE_WRITE;
bdev_io->u.bdev.iovs = iov;
bdev_io->u.bdev.iovcnt = iovcnt;
bdev_io->u.bdev.md_buf = md_buf;
bdev_io->u.bdev.num_blocks = num_blocks;
bdev_io->u.bdev.offset_blocks = offset_blocks;
bdev_io_init(bdev_io, bdev, cb_arg, cb);
bdev_io->internal.ext_opts = opts;
bdev_io->u.bdev.ext_opts = opts;
_bdev_io_submit_ext(desc, bdev_io, opts, copy_opts);
return 0;
}
_bdev_io_submit_ext
bdev_iosubmitの一部。copy_optが指定されて行場合に、最適化がされるらしい。詳細不明。
static inline void
_bdev_io_submit_ext(struct spdk_bdev_desc *desc, struct spdk_bdev_io *bdev_io,
struct spdk_bdev_ext_io_opts *opts, bool copy_opts)
{
if (opts) {
bool use_pull_push = opts->memory_domain && !desc->memory_domains_supported;
assert(opts->size <= sizeof(*opts));
/*
* copy if size is smaller than opts struct to avoid having to check size
* on every access to bdev_io->u.bdev.ext_opts
*/
if (copy_opts || use_pull_push || opts->size < sizeof(*opts)) {
_bdev_io_copy_ext_opts(bdev_io, opts);
if (use_pull_push) {
_bdev_io_ext_use_bounce_buffer(bdev_io);
return;
}
}
}
bdev_io_submit(bdev_io);
}
bdev_io_submit
ロック確認、Read/Writeバッファーの分割し、QoSが有効かどうかを判定する。QoSがない場合、_bdev_io_submitを呼び出す。
SPDKでロック、バッファーの分割、QoSを使用する場合、この関数を経由する。
キューの種別
- ch->locked_ranges: ロックされた範囲のキュー
- ch->io_locked: ロックまちのIOキュー
void
bdev_io_submit(struct spdk_bdev_io *bdev_io)
{
struct spdk_bdev *bdev = bdev_io->bdev;
struct spdk_thread *thread = spdk_bdev_io_get_thread(bdev_io);
struct spdk_bdev_channel *ch = bdev_io->internal.ch;
assert(thread != NULL);
assert(bdev_io->internal.status == SPDK_BDEV_IO_STATUS_PENDING);
★レンジロックがあるかを調べる。
if (!TAILQ_EMPTY(&ch->locked_ranges)) {
struct lba_range *range;
TAILQ_FOREACH(range, &ch->locked_ranges, tailq) {
if (bdev_io_range_is_locked(bdev_io, range)) {
TAILQ_INSERT_TAIL(&ch->io_locked, bdev_io, internal.ch_link);
return;
}
}
}
★io_submittedキューにbdev_ioを入れる。
TAILQ_INSERT_TAIL(&ch->io_submitted, bdev_io, internal.ch_link);
★bdevをsplitするかを判定。READかWriteの場合、全てsplitする。
if (bdev_io_should_split(bdev_io)) {
bdev_io->internal.submit_tsc = spdk_get_ticks();
spdk_trace_record_tsc(bdev_io->internal.submit_tsc, TRACE_BDEV_IO_START, 0, 0,
(uintptr_t)bdev_io, (uint64_t)bdev_io->type, bdev_io->internal.caller_ctx,
bdev_io->u.bdev.offset_blocks, bdev_io->u.bdev.num_blocks);
bdev_io_split(NULL, bdev_io);
return;
}
★QOSが有効にされている場合、QoSスレッドを使って非同期ライト。
if (ch->flags & BDEV_CH_QOS_ENABLED) {
if ((thread == bdev->internal.qos->thread) || !bdev->internal.qos->thread) {
_bdev_io_submit(bdev_io);
} else {
bdev_io->internal.io_submit_ch = ch;
bdev_io->internal.ch = bdev->internal.qos->ch;
spdk_thread_send_msg(bdev->internal.qos->thread, _bdev_io_submit, bdev_io);
}
} else {
★QOSが有効にされていない場合、直接同期ライト。
_bdev_io_submit(bdev_io);
}
}
_bdev_io_submit
/* Explicitly mark this inline, since it's used as a function pointer and otherwise won't
* be inlined, at least on some compilers.
*/
static inline void
_bdev_io_submit(void *ctx)
{
struct spdk_bdev_io *bdev_io = ctx;
struct spdk_bdev *bdev = bdev_io->bdev;
struct spdk_bdev_channel *bdev_ch = bdev_io->internal.ch;
uint64_t tsc;
tsc = spdk_get_ticks();
bdev_io->internal.submit_tsc = tsc;
spdk_trace_record_tsc(tsc, TRACE_BDEV_IO_START, 0, 0, (uintptr_t)bdev_io,
(uint64_t)bdev_io->type, bdev_io->internal.caller_ctx,
bdev_io->u.bdev.offset_blocks, bdev_io->u.bdev.num_blocks);
★チャネルのRESTやQOS flagが立っていなかったら、bdev_io_do_submitを実行。
if (spdk_likely(bdev_ch->flags == 0)) {
bdev_io_do_submit(bdev_ch, bdev_io);
return;
}
if (bdev_ch->flags & BDEV_CH_RESET_IN_PROGRESS) {
_bdev_io_complete_in_submit(bdev_ch, bdev_io, SPDK_BDEV_IO_STATUS_ABORTED);
} else if (bdev_ch->flags & BDEV_CH_QOS_ENABLED) {
if (spdk_unlikely(bdev_io->type == SPDK_BDEV_IO_TYPE_ABORT) &&
bdev_abort_queued_io(&bdev->internal.qos->queued, bdev_io->u.abort.bio_to_abort)) {
_bdev_io_complete_in_submit(bdev_ch, bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS);
} else {
TAILQ_INSERT_TAIL(&bdev->internal.qos->queued, bdev_io, internal.link);
bdev_qos_io_submit(bdev_ch, bdev->internal.qos);
}
} else {
SPDK_ERRLOG("unknown bdev_ch flag %x found\n", bdev_ch->flags);
_bdev_io_complete_in_submit(bdev_ch, bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
}
}
bdev_io_do_submit
bdevのfn_table関数ポインタにsubmit_requestにIO submit。
static inline void
bdev_io_do_submit(struct spdk_bdev_channel *bdev_ch, struct spdk_bdev_io *bdev_io)
{
struct spdk_bdev *bdev = bdev_io->bdev;
struct spdk_io_channel *ch = bdev_ch->channel;
struct spdk_bdev_shared_resource *shared_resource = bdev_ch->shared_resource;
★ABORT要求の場合、IO abortを実行。
if (spdk_unlikely(bdev_io->type == SPDK_BDEV_IO_TYPE_ABORT)) {
struct spdk_bdev_mgmt_channel *mgmt_channel = shared_resource->mgmt_ch;
struct spdk_bdev_io *bio_to_abort = bdev_io->u.abort.bio_to_abort;
if (bdev_abort_queued_io(&shared_resource->nomem_io, bio_to_abort) ||
bdev_abort_buf_io(&mgmt_channel->need_buf_small, bio_to_abort) ||
bdev_abort_buf_io(&mgmt_channel->need_buf_large, bio_to_abort)) {
_bdev_io_complete_in_submit(bdev_ch, bdev_io,
SPDK_BDEV_IO_STATUS_SUCCESS);
return;
}
}
if (spdk_likely(TAILQ_EMPTY(&shared_resource->nomem_io))) {
bdev_ch->io_outstanding++;
shared_resource->io_outstanding++;
bdev_io->internal.in_submit_request = true;
★bdevのfn_table関数ポインタにsubmit_request。
bdev->fn_table->submit_request(ch, bdev_io);
bdev_io->internal.in_submit_request = false;
} else {
TAILQ_INSERT_TAIL(&shared_resource->nomem_io, bdev_io, internal.link);
}
}
bdev_io_split
ioを分割する。分割するかはIO種別によって決まる。Read、Writeは分割対象。
bdev用にバッファーが確保されているかを調べ、
static void
bdev_io_split(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
{
bdev_io->u.bdev.split_current_offset_blocks = bdev_io->u.bdev.offset_blocks;
bdev_io->u.bdev.split_remaining_num_blocks = bdev_io->u.bdev.num_blocks;
bdev_io->u.bdev.split_outstanding = 0;
bdev_io->internal.status = SPDK_BDEV_IO_STATUS_SUCCESS;
switch (bdev_io->type) {
case SPDK_BDEV_IO_TYPE_READ:
case SPDK_BDEV_IO_TYPE_WRITE:
★バッファが確保されている場合、splitする。
if (_is_buf_allocated(bdev_io->u.bdev.iovs)) {
_bdev_rw_split(bdev_io);
} else {
assert(bdev_io->type == SPDK_BDEV_IO_TYPE_READ);
spdk_bdev_io_get_buf(bdev_io, bdev_rw_split_get_buf_cb,
bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
}
break;
case SPDK_BDEV_IO_TYPE_UNMAP:
bdev_unmap_split(bdev_io);
break;
case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
bdev_write_zeroes_split(bdev_io);
break;
default:
assert(false);
break;
}
}
_bdev_rw_split
Read、Write向けにバッファを分ける。
static void
_bdev_rw_split(void *_bdev_io)
{
struct iovec *parent_iov, *iov;
struct spdk_bdev_io *bdev_io = _bdev_io;
struct spdk_bdev *bdev = bdev_io->bdev;
uint64_t parent_offset, current_offset, remaining;
uint32_t parent_iov_offset, parent_iovcnt, parent_iovpos, child_iovcnt;
uint32_t to_next_boundary, to_next_boundary_bytes, to_last_block_bytes;
uint32_t iovcnt, iov_len, child_iovsize;
uint32_t blocklen = bdev->blocklen;
uint32_t io_boundary = bdev->optimal_io_boundary;
uint32_t max_segment_size = bdev->max_segment_size;
uint32_t max_child_iovcnt = bdev->max_num_segments;
void *md_buf = NULL;
int rc;
★bdevのセグメントサイズを設定。bdevのセグメントサイズはnamespace作成時に指定されたbdevによる。
max_segment_size = max_segment_size ? max_segment_size : UINT32_MAX;
max_child_iovcnt = max_child_iovcnt ? spdk_min(max_child_iovcnt, BDEV_IO_NUM_CHILD_IOV) :
BDEV_IO_NUM_CHILD_IOV;
io_boundary = bdev->split_on_optimal_io_boundary ? io_boundary : UINT32_MAX;
remaining = bdev_io->u.bdev.split_remaining_num_blocks;
current_offset = bdev_io->u.bdev.split_current_offset_blocks;
parent_offset = bdev_io->u.bdev.offset_blocks;
parent_iov_offset = (current_offset - parent_offset) * blocklen;
parent_iovcnt = bdev_io->u.bdev.iovcnt;
for (parent_iovpos = 0; parent_iovpos < parent_iovcnt; parent_iovpos++) {
parent_iov = &bdev_io->u.bdev.iovs[parent_iovpos];
if (parent_iov_offset < parent_iov->iov_len) {
break;
}
parent_iov_offset -= parent_iov->iov_len;
}
child_iovcnt = 0;
while (remaining > 0 && parent_iovpos < parent_iovcnt && child_iovcnt < BDEV_IO_NUM_CHILD_IOV) {
to_next_boundary = _to_next_boundary(current_offset, io_boundary);
to_next_boundary = spdk_min(remaining, to_next_boundary);
to_next_boundary_bytes = to_next_boundary * blocklen;
iov = &bdev_io->child_iov[child_iovcnt];
iovcnt = 0;
if (bdev_io->u.bdev.md_buf) {
md_buf = (char *)bdev_io->u.bdev.md_buf +
(current_offset - parent_offset) * spdk_bdev_get_md_size(bdev);
}
child_iovsize = spdk_min(BDEV_IO_NUM_CHILD_IOV - child_iovcnt, max_child_iovcnt);
while (to_next_boundary_bytes > 0 && parent_iovpos < parent_iovcnt &&
iovcnt < child_iovsize) {
parent_iov = &bdev_io->u.bdev.iovs[parent_iovpos];
iov_len = parent_iov->iov_len - parent_iov_offset;
iov_len = spdk_min(iov_len, max_segment_size);
iov_len = spdk_min(iov_len, to_next_boundary_bytes);
to_next_boundary_bytes -= iov_len;
bdev_io->child_iov[child_iovcnt].iov_base = parent_iov->iov_base + parent_iov_offset;
bdev_io->child_iov[child_iovcnt].iov_len = iov_len;
if (iov_len < parent_iov->iov_len - parent_iov_offset) {
parent_iov_offset += iov_len;
} else {
parent_iovpos++;
parent_iov_offset = 0;
}
child_iovcnt++;
iovcnt++;
}
if (to_next_boundary_bytes > 0) {
/* We had to stop this child I/O early because we ran out of
* child_iov space or were limited by max_num_segments.
* Ensure the iovs to be aligned with block size and
* then adjust to_next_boundary before starting the
* child I/O.
*/
assert(child_iovcnt == BDEV_IO_NUM_CHILD_IOV ||
iovcnt == child_iovsize);
to_last_block_bytes = to_next_boundary_bytes % blocklen;
if (to_last_block_bytes != 0) {
uint32_t child_iovpos = child_iovcnt - 1;
/* don't decrease child_iovcnt when it equals to BDEV_IO_NUM_CHILD_IOV
* so the loop will naturally end
*/
to_last_block_bytes = blocklen - to_last_block_bytes;
to_next_boundary_bytes += to_last_block_bytes;
while (to_last_block_bytes > 0 && iovcnt > 0) {
iov_len = spdk_min(to_last_block_bytes,
bdev_io->child_iov[child_iovpos].iov_len);
bdev_io->child_iov[child_iovpos].iov_len -= iov_len;
if (bdev_io->child_iov[child_iovpos].iov_len == 0) {
child_iovpos--;
if (--iovcnt == 0) {
/* If the child IO is less than a block size just return.
* If the first child IO of any split round is less than
* a block size, an error exit.
*/
if (bdev_io->u.bdev.split_outstanding == 0) {
SPDK_ERRLOG("The first child io was less than a block size\n");
bdev_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
spdk_trace_record(TRACE_BDEV_IO_DONE, 0, 0, (uintptr_t)bdev_io, bdev_io->internal.caller_ctx);
TAILQ_REMOVE(&bdev_io->internal.ch->io_submitted, bdev_io, internal.ch_link);
bdev_io->internal.cb(bdev_io, false, bdev_io->internal.caller_ctx);
}
return;
}
}
to_last_block_bytes -= iov_len;
if (parent_iov_offset == 0) {
parent_iovpos--;
parent_iov_offset = bdev_io->u.bdev.iovs[parent_iovpos].iov_len;
}
parent_iov_offset -= iov_len;
}
assert(to_last_block_bytes == 0);
}
to_next_boundary -= to_next_boundary_bytes / blocklen;
}
rc = bdev_io_split_submit(bdev_io, iov, iovcnt, md_buf, to_next_boundary,
¤t_offset, &remaining);
if (spdk_unlikely(rc)) {
return;
}
}
}
nvmf_bdev_ctrlr_complete_cmd
Writeコマンド完了時にコールバックされる関数。他のコマンドでも流用?ここで、応答用のspdk_nvme_cpl構造体を作成する。
static void
nvmf_bdev_ctrlr_complete_cmd(struct spdk_bdev_io *bdev_io, bool success,
void *cb_arg)
{
struct spdk_nvmf_request *req = cb_arg;
struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
int first_sc = 0, first_sct = 0, sc = 0, sct = 0;
uint32_t cdw0 = 0;
struct spdk_nvmf_request *first_req = req->first_fused_req;
if (spdk_unlikely(first_req != NULL)) {
★fused operationの場合、最初のコマンドの応答を最初に作成する。
/* fused commands - get status for both operations */
struct spdk_nvme_cpl *first_response = &first_req->rsp->nvme_cpl;
spdk_bdev_io_get_nvme_fused_status(bdev_io, &cdw0, &first_sct, &first_sc, &sct, &sc);
first_response->cdw0 = cdw0;
first_response->status.sc = first_sc;
first_response->status.sct = first_sct;
/* first request should be completed */
spdk_nvmf_request_complete(first_req);
req->first_fused_req = NULL;
} else {
spdk_bdev_io_get_nvme_status(bdev_io, &cdw0, &sct, &sc);
}
response->cdw0 = cdw0;
response->status.sc = sc;
response->status.sct = sct;
spdk_nvmf_request_complete(req);
spdk_bdev_free_io(bdev_io);
}
Read
HostからのRead要求受信からblock deviceモジュールへのIO発行まで。
Readのようなbdev以下のblock deviceモジュールのIOを含むコマンドはHostのコマンド要求処理を非同期処理する。nvmf用のPoller(nvmf_tcp_poll_group_poll)はホスト要求を受信したら、PDUからNVMe-oFコマンドを読み取り、bdev APIを介してblock deviceモジュールにIOを発行する。
(gdb) bt
#0 bdev_malloc_readv (mdisk=0x2000164b27d8, ch=0x200016b2d000, task=0x100017bbb620, iov=0x20000729ed40,
iovcnt=21860, len=135168, offset=2240512) at bdev_malloc.c:131
#1 0x0000556415ba79cf in _bdev_malloc_submit_request (mch=0x556417bbb620, bdev_io=0x20000729ed40)
at bdev_malloc.c:230
#2 0x0000556415ba7c51 in bdev_malloc_submit_request (ch=0x556417bbb5c0, bdev_io=0x20000729ed40)
at bdev_malloc.c:303
#3 0x0000556415ce2877 in bdev_io_do_submit (bdev_ch=0x556417bbb4f0, bdev_io=0x20000729ed40) at bdev.c:2172
#4 0x0000556415ce3fe8 in _bdev_io_submit (ctx=0x20000729ed40) at bdev.c:2755
#5 0x0000556415ce459a in bdev_io_submit (bdev_io=0x20000729ed40) at bdev.c:2872
#6 0x0000556415ce4754 in _bdev_io_submit_ext (desc=0x556417bbb350, bdev_io=0x20000729ed40, opts=0x0,
copy_opts=false) at bdev.c:2925
#7 0x0000556415ce7d56 in bdev_readv_blocks_with_md (desc=0x556417bbb350, ch=0x556417bbb490,
iov=0x556417b15650, iovcnt=1, md_buf=0x0, offset_blocks=4376, num_blocks=8,
cb=0x556415c61e1c <nvmf_bdev_ctrlr_complete_cmd>, cb_arg=0x556417b15610, opts=0x0, copy_opts=false)
at bdev.c:4410
#8 0x0000556415ce7db5 in spdk_bdev_readv_blocks (desc=0x556417bbb350, ch=0x556417bbb490, iov=0x556417b15650,
iovcnt=1, offset_blocks=4376, num_blocks=8, cb=0x556415c61e1c <nvmf_bdev_ctrlr_complete_cmd>,
cb_arg=0x556417b15610) at bdev.c:4421
#9 0x0000556415c6269c in nvmf_bdev_ctrlr_read_cmd (bdev=0x556417b137d0, desc=0x556417bbb350,
ch=0x556417bbb490, req=0x556417b15610) at ctrlr_bdev.c:276
#10 0x0000556415c5fe59 in nvmf_ctrlr_process_io_cmd (req=0x556417b15610) at ctrlr.c:4006
#11 0x0000556415c60ca2 in spdk_nvmf_request_exec (req=0x556417b15610) at ctrlr.c:4306
#12 0x0000556415c7f8c6 in nvmf_tcp_req_process (ttransport=0x556417a6c820, tcp_req=0x556417b15610)
at tcp.c:2876
#13 0x0000556415c7b88e in nvmf_tcp_capsule_cmd_hdr_handle (ttransport=0x556417a6c820, tqpair=0x556417a67a10,
pdu=0x2000170fdbb0) at tcp.c:1516
#14 0x0000556415c7d85e in nvmf_tcp_sock_process (tqpair=0x556417a67a10) at tcp.c:2190
#15 0x0000556415c8016f in nvmf_tcp_sock_cb (arg=0x556417a67a10, group=0x556417a6c930, sock=0x556417a64ad0)
at tcp.c:3009
#16 0x0000556415d0dbf4 in sock_group_impl_poll_count (group_impl=0x556417a6cc00, group=0x556417a6c930,
max_events=32) at sock.c:695
#17 0x0000556415d0dc8e in spdk_sock_group_poll_count (group=0x556417a6c930, max_events=32) at sock.c:721
#18 0x0000556415d0daed in spdk_sock_group_poll (group=0x556417a6c930) at sock.c:672
#19 0x0000556415c8079c in nvmf_tcp_poll_group_poll (group=0x556417a6cb60) at tcp.c:3162
#20 0x0000556415c76440 in nvmf_transport_poll_group_poll (group=0x556417a6cb60) at transport.c:582
#21 0x0000556415c6b889 in nvmf_poll_group_poll (ctx=0x556417a52ee0) at nvmf.c:71
#22 0x0000556415d05802 in thread_execute_poller (thread=0x556417a52af0, poller=0x556417a52f60) at thread.c:795
#23 0x0000556415d05cb2 in thread_poll (thread=0x556417a52af0, max_msgs=0, now=24285274773898) at thread.c:917
#24 0x0000556415d05de3 in spdk_thread_poll (thread=0x556417a52af0, max_msgs=0, now=24285274773898)
at thread.c:968
#25 0x0000556415cd5c1b in _reactor_run (reactor=0x556417a4f1c0) at reactor.c:901
#26 0x0000556415cd5d0d in reactor_run (arg=0x556417a4f1c0) at reactor.c:939
#27 0x0000556415cd60e1 in spdk_reactors_start () at reactor.c:1041
#28 0x0000556415cd29a9 in spdk_app_start (opts_user=0x7ffd9f888010,
start_fn=0x556415ba70ba <nvmf_tgt_started>, arg1=0x0) at app.c:628
#29 0x0000556415ba7206 in main (argc=1, argv=0x7ffd9f8881d8) at nvmf_main.c:47
Block deviceモジュールの処理完了からホスト応答まで
Block deviceの処理完了後、bdev側から完了命令がnvme_of用Pollarに通知がある。
#0 _nvmf_request_complete (ctx=0x556417b16850) at ctrlr.c:4058
#1 0x0000556415c551ed in spdk_thread_exec_msg (thread=0x556417a52af0,
fn=0x556415c6005e <_nvmf_request_complete>, ctx=0x556417b17170)
at /home/takayuki/repos/spdk/include/spdk/thread.h:500
#2 0x0000556415c60629 in spdk_nvmf_request_complete (req=0x556417b17170) at ctrlr.c:4176
#3 0x0000556415c61f86 in nvmf_bdev_ctrlr_complete_cmd (bdev_io=0x20000729f280, success=true,
cb_arg=0x556417b17170) at ctrlr_bdev.c:93
#4 0x0000556415ceb5be in bdev_io_complete (ctx=0x20000729f280) at bdev.c:5916
#5 0x0000556415ceb8dc in spdk_bdev_io_complete (bdev_io=0x20000729f280, status=SPDK_BDEV_IO_STATUS_SUCCESS)
at bdev.c:5999
#6 0x0000556415ba72b5 in malloc_done (ref=0x20000729f640, status=0) at bdev_malloc.c:54
#7 0x0000556415cf5917 in spdk_accel_task_complete (accel_task=0x556417a6d0e0, status=0) at accel.c:122
#8 0x0000556415cf7c9d in accel_comp_poll (arg=0x556417aa4dd0) at accel_sw.c:321
#9 0x0000556415d05802 in thread_execute_poller (thread=0x556417a52af0, poller=0x556417ace510) at thread.c:795
#10 0x0000556415d05cb2 in thread_poll (thread=0x556417a52af0, max_msgs=0, now=22560147027628) at thread.c:917
#11 0x0000556415d05de3 in spdk_thread_poll (thread=0x556417a52af0, max_msgs=0, now=22560147027628)
at thread.c:968
#12 0x0000556415cd5c1b in _reactor_run (reactor=0x556417a4f1c0) at reactor.c:901
#13 0x0000556415cd5d0d in reactor_run (arg=0x556417a4f1c0) at reactor.c:939
#14 0x0000556415cd60e1 in spdk_reactors_start () at reactor.c:1041
#15 0x0000556415cd29a9 in spdk_app_start (opts_user=0x7ffd9f888010,
start_fn=0x556415ba70ba <nvmf_tgt_started>, arg1=0x0) at app.c:628
#16 0x0000556415ba7206 in main (argc=1, argv=0x7ffd9f8881d8) at nvmf_main.c:47
spdk_thread_exec_msg(qpair->group->thread, _nvmf_request_complete, req)でQPに割り当てられたspdk_threadにメッセージを投げる。メッセージのBTはとれていないが、恐らく以下のadminキュー作成時のメッセージ処理と同等のBTになると推測される。
#0 nvmf_tcp_send_capsule_resp_pdu (tcp_req=0x56031140e9e0, tqpair=0xc00000000) at tcp.c:1635
#1 0x00005603105cd83e in request_transfer_out (req=0x56031140e9e0) at tcp.c:2565
#2 0x00005603105cebd8 in nvmf_tcp_req_process (ttransport=0x560311305820, tcp_req=0x56031140e9e0) at tcp.c:2919
#3 0x00005603105cf614 in nvmf_tcp_req_complete (req=0x56031140e9e0) at tcp.c:3115
#4 0x00005603105c54a2 in nvmf_transport_req_complete (req=0x56031140e9e0) at transport.c:594
#5 0x00005603105af3f7 in _nvmf_request_complete (ctx=0x56031140e9e0) at ctrlr.c:4122
#6 0x00005603105a4b6c in _nvmf_ctrlr_add_admin_qpair (ctx=0x56031140e9e0) at ctrlr.c:257
#7 0x0000560310654439 in msg_queue_run_batch (thread=0x5603112ebaf0, max_msgs=8) at thread.c:692
#8 0x0000560310654c52 in thread_poll (thread=0x5603112ebaf0, max_msgs=0, now=23091399598314) at thread.c:908
#9 0x0000560310654de3 in spdk_thread_poll (thread=0x5603112ebaf0, max_msgs=0, now=23091399598314) at thread.c:968
#10 0x0000560310624c1b in _reactor_run (reactor=0x5603112e81c0) at reactor.c:901
#11 0x0000560310624d0d in reactor_run (arg=0x5603112e81c0) at reactor.c:939
#12 0x00005603106250e1 in spdk_reactors_start () at reactor.c:1041
#13 0x00005603106219a9 in spdk_app_start (opts_user=0x7ffd3e18d050, start_fn=0x5603104f60ba <nvmf_tgt_started>, arg1=0x0) at app.c:628
#14 0x00005603104f6206 in main (argc=1, argv=0x7ffd3e18d218) at nvmf_main.c:47
nvmf_bdev_ctrlr_read_cmd
bdevにspdk_bdev_readv_blocksを呼び出す。この関数がnvmf層とbdev層の切れ目になる。
int
nvmf_bdev_ctrlr_read_cmd(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
struct spdk_io_channel *ch, struct spdk_nvmf_request *req)
{
uint64_t bdev_num_blocks = spdk_bdev_get_num_blocks(bdev);
uint32_t block_size = spdk_bdev_get_block_size(bdev);
struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
uint64_t start_lba;
uint64_t num_blocks;
int rc;
nvmf_bdev_ctrlr_get_rw_params(cmd, &start_lba, &num_blocks);
if (spdk_unlikely(!nvmf_bdev_ctrlr_lba_in_range(bdev_num_blocks, start_lba, num_blocks))) {
SPDK_ERRLOG("end of media\n");
rsp->status.sct = SPDK_NVME_SCT_GENERIC;
rsp->status.sc = SPDK_NVME_SC_LBA_OUT_OF_RANGE;
return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
}
if (spdk_unlikely(num_blocks * block_size > req->length)) {
SPDK_ERRLOG("Read NLB %" PRIu64 " * block size %" PRIu32 " > SGL length %" PRIu32 "\n",
num_blocks, block_size, req->length);
rsp->status.sct = SPDK_NVME_SCT_GENERIC;
rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
}
assert(!spdk_nvmf_request_using_zcopy(req));
rc = spdk_bdev_readv_blocks(desc, ch, req->iov, req->iovcnt, start_lba, num_blocks,
nvmf_bdev_ctrlr_complete_cmd, req);
if (spdk_unlikely(rc)) {
if (rc == -ENOMEM) {
nvmf_bdev_ctrl_queue_io(req, bdev, ch, nvmf_ctrlr_process_io_cmd_resubmit, req);
return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
}
rsp->status.sct = SPDK_NVME_SCT_GENERIC;
rsp->status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
}
return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
}
int
spdk_nvmf_request_complete(struct spdk_nvmf_request *req)
{
struct spdk_nvmf_qpair *qpair = req->qpair;
spdk_thread_exec_msg(qpair->group->thread, _nvmf_request_complete, req);
return 0;
}
Reservation
reservation処理はnvmf_ctrlr_process_io_cmdで、SPDK_NVME_OPC_RESERVATION_REGISTER、ACQUIRE、RELEASE、REPORTの4種類の要求を受け取った場合に、ctrlr->subsys->threadに対しspdk_thread_send_msgを発行する。
spdk_thread_send_msg
spdk_thread_send_msgは、ctrlr->subsys->threadのリングキューにreservation処理の起点であるnvmf_ns_reservation_requestの関数ポイントを渡すことで、同スレッドにreservation処理を指示する。
int
spdk_thread_send_msg(const struct spdk_thread *thread, spdk_msg_fn fn, void *ctx)
{
struct spdk_thread *local_thread;
struct spdk_msg *msg;
int rc;
assert(thread != NULL);
if (spdk_unlikely(thread->state == SPDK_THREAD_STATE_EXITED)) {
SPDK_ERRLOG("Thread %s is marked as exited.\n", thread->name);
return -EIO;
}
local_thread = _get_thread();
msg = NULL;
if (local_thread != NULL) {
if (local_thread->msg_cache_count > 0) {
msg = SLIST_FIRST(&local_thread->msg_cache);
assert(msg != NULL);
SLIST_REMOVE_HEAD(&local_thread->msg_cache, link);
local_thread->msg_cache_count--;
}
}
if (msg == NULL) {
msg = spdk_mempool_get(g_spdk_msg_mempool);
if (!msg) {
SPDK_ERRLOG("msg could not be allocated\n");
return -ENOMEM;
}
}
msg->fn = fn;
msg->arg = ctx;
rc = spdk_ring_enqueue(thread->messages, (void **)&msg, 1, NULL);
if (rc != 1) {
SPDK_ERRLOG("msg could not be enqueued\n");
spdk_mempool_put(g_spdk_msg_mempool, msg);
return -EIO;
}
return thread_send_msg_notification(thread);
}
nvmf_ns_reservation_request
void
nvmf_ns_reservation_request(void *ctx)
{
struct spdk_nvmf_request *req = (struct spdk_nvmf_request *)ctx;
struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
struct subsystem_update_ns_ctx *update_ctx;
uint32_t nsid;
struct spdk_nvmf_ns *ns;
bool update_sgroup = false;
nsid = cmd->nsid;
ns = _nvmf_subsystem_get_ns(ctrlr->subsys, nsid);
assert(ns != NULL);
switch (cmd->opc) {
case SPDK_NVME_OPC_RESERVATION_REGISTER:
update_sgroup = nvmf_ns_reservation_register(ns, ctrlr, req);
break;
case SPDK_NVME_OPC_RESERVATION_ACQUIRE:
update_sgroup = nvmf_ns_reservation_acquire(ns, ctrlr, req);
break;
case SPDK_NVME_OPC_RESERVATION_RELEASE:
update_sgroup = nvmf_ns_reservation_release(ns, ctrlr, req);
break;
case SPDK_NVME_OPC_RESERVATION_REPORT:
nvmf_ns_reservation_report(ns, ctrlr, req);
break;
default:
break;
}
/* update reservation information to subsystem's poll group */
if (update_sgroup) {
update_ctx = calloc(1, sizeof(*update_ctx));
if (update_ctx == NULL) {
SPDK_ERRLOG("Can't alloc subsystem poll group update context\n");
goto update_done;
}
update_ctx->subsystem = ctrlr->subsys;
update_ctx->cb_fn = _nvmf_ns_reservation_update_done;
update_ctx->cb_arg = req;
nvmf_subsystem_update_ns(ctrlr->subsys, subsystem_update_ns_done, update_ctx);
return;
}
update_done:
_nvmf_ns_reservation_update_done(ctrlr->subsys, (void *)req, 0);
}
Fused operation
static int
nvmf_ctrlr_process_io_fused_cmd(struct spdk_nvmf_request *req, struct spdk_bdev *bdev,
struct spdk_bdev_desc *desc, struct spdk_io_channel *ch)
{
struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
struct spdk_nvmf_request *first_fused_req = req->qpair->first_fused_req;
int rc;
if (cmd->fuse == SPDK_NVME_CMD_FUSE_FIRST) {
/* first fused operation (should be compare) */
if (first_fused_req != NULL) {
struct spdk_nvme_cpl *fused_response = &first_fused_req->rsp->nvme_cpl;
SPDK_ERRLOG("Wrong sequence of fused operations\n");
/* abort req->qpair->first_fused_request and continue with new fused command */
fused_response->status.sc = SPDK_NVME_SC_ABORTED_MISSING_FUSED;
fused_response->status.sct = SPDK_NVME_SCT_GENERIC;
_nvmf_request_complete(first_fused_req);
} else if (cmd->opc != SPDK_NVME_OPC_COMPARE) {
SPDK_ERRLOG("Wrong op code of fused operations\n");
rsp->status.sct = SPDK_NVME_SCT_GENERIC;
rsp->status.sc = SPDK_NVME_SC_INVALID_OPCODE;
return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
}
req->qpair->first_fused_req = req;
return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
} else if (cmd->fuse == SPDK_NVME_CMD_FUSE_SECOND) {
/* second fused operation (should be write) */
if (first_fused_req == NULL) {
SPDK_ERRLOG("Wrong sequence of fused operations\n");
rsp->status.sct = SPDK_NVME_SCT_GENERIC;
rsp->status.sc = SPDK_NVME_SC_ABORTED_MISSING_FUSED;
return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
} else if (cmd->opc != SPDK_NVME_OPC_WRITE) {
struct spdk_nvme_cpl *fused_response = &first_fused_req->rsp->nvme_cpl;
SPDK_ERRLOG("Wrong op code of fused operations\n");
/* abort req->qpair->first_fused_request and fail current command */
fused_response->status.sc = SPDK_NVME_SC_ABORTED_MISSING_FUSED;
fused_response->status.sct = SPDK_NVME_SCT_GENERIC;
_nvmf_request_complete(first_fused_req);
rsp->status.sct = SPDK_NVME_SCT_GENERIC;
rsp->status.sc = SPDK_NVME_SC_INVALID_OPCODE;
req->qpair->first_fused_req = NULL;
return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
}
/* save request of first command to generate response later */
req->first_fused_req = first_fused_req;
req->qpair->first_fused_req = NULL;
} else {
SPDK_ERRLOG("Invalid fused command fuse field.\n");
rsp->status.sct = SPDK_NVME_SCT_GENERIC;
rsp->status.sc = SPDK_NVME_SC_INVALID_FIELD;
return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
}
rc = nvmf_bdev_ctrlr_compare_and_write_cmd(bdev, desc, ch, req->first_fused_req, req);
if (rc == SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE) {
if (spdk_nvme_cpl_is_error(rsp)) {
struct spdk_nvme_cpl *fused_response = &first_fused_req->rsp->nvme_cpl;
fused_response->status = rsp->status;
rsp->status.sct = SPDK_NVME_SCT_GENERIC;
rsp->status.sc = SPDK_NVME_SC_ABORTED_FAILED_FUSED;
/* Complete first of fused commands. Second will be completed by upper layer */
_nvmf_request_complete(first_fused_req);
req->first_fused_req = NULL;
}
}
return rc;
ホストのアクセス可否判定(Discovery)
(gdb) bt
#0 spdk_nvmf_subsystem_host_allowed (subsystem=0x55ab16965ce3 <spdk_ring_enqueue+762>, hostnqn=0x7ffe82071d30 "\260\333\017\027") at subsystem.c:976
#1 0x000055ab1697df9c in nvmf_qpair_access_allowed (qpair=0x55ab1769c5e0, subsystem=0x55ab17687900, hostnqn=0x2000170ff200 "nqn.2014-08.org.nvmexpress:uuid:a7da905e-547a-4c8c-878a-67ab9bffcfa9") at ctrlr.c:644
#2 0x000055ab1697ed10 in nvmf_ctrlr_cmd_connect (req=0x55ab177aa9e0) at ctrlr.c:895
#3 0x000055ab1698586b in nvmf_ctrlr_process_fabrics_cmd (req=0x55ab177aa9e0) at ctrlr.c:3387
#4 0x000055ab16987c6c in spdk_nvmf_request_exec (req=0x55ab177aa9e0) at ctrlr.c:4302
#5 0x000055ab169a68c6 in nvmf_tcp_req_process (ttransport=0x55ab176a1820, tcp_req=0x55ab177aa9e0) at tcp.c:2876
#6 0x000055ab169a29f9 in nvmf_tcp_capsule_cmd_payload_handle (ttransport=0x55ab176a1820, tqpair=0x55ab1769c5e0, pdu=0x2000170fdbb0) at tcp.c:1552
#7 0x000055ab169a35c7 in _nvmf_tcp_pdu_payload_handle (tqpair=0x55ab1769c5e0, pdu=0x2000170fdbb0) at tcp.c:1813
#8 0x000055ab169a3973 in nvmf_tcp_pdu_payload_handle (tqpair=0x55ab1769c5e0, pdu=0x2000170fdbb0) at tcp.c:1877
#9 0x000055ab169a4978 in nvmf_tcp_sock_process (tqpair=0x55ab1769c5e0) at tcp.c:2224
#10 0x000055ab169a716f in nvmf_tcp_sock_cb (arg=0x55ab1769c5e0, group=0x55ab176a1930, sock=0x55ab1769b9c0) at tcp.c:3009
#11 0x000055ab16a34bf4 in sock_group_impl_poll_count (group_impl=0x55ab176a1c00, group=0x55ab176a1930, max_events=32) at sock.c:695
#12 0x000055ab16a34c8e in spdk_sock_group_poll_count (group=0x55ab176a1930, max_events=32) at sock.c:721
#13 0x000055ab16a34aed in spdk_sock_group_poll (group=0x55ab176a1930) at sock.c:672
#14 0x000055ab169a779c in nvmf_tcp_poll_group_poll (group=0x55ab176a1b60) at tcp.c:3162
#15 0x000055ab1699d440 in nvmf_transport_poll_group_poll (group=0x55ab176a1b60) at transport.c:582
#16 0x000055ab16992889 in nvmf_poll_group_poll (ctx=0x55ab17687ee0) at nvmf.c:71
#17 0x000055ab16a2c802 in thread_execute_poller (thread=0x55ab17687af0, poller=0x55ab17687f60) at thread.c:795
#18 0x000055ab16a2ccb2 in thread_poll (thread=0x55ab17687af0, max_msgs=0, now=20812668666420) at thread.c:917
#19 0x000055ab16a2cde3 in spdk_thread_poll (thread=0x55ab17687af0, max_msgs=0, now=20812668666420) at thread.c:968
#20 0x000055ab169fcc1b in _reactor_run (reactor=0x55ab176841c0) at reactor.c:901
#21 0x000055ab169fcd0d in reactor_run (arg=0x55ab176841c0) at reactor.c:939
#22 0x000055ab169fd0e1 in spdk_reactors_start () at reactor.c:1041
#23 0x000055ab169f99a9 in spdk_app_start (opts_user=0x7ffe82072350, start_fn=0x55ab168ce0ba <nvmf_tgt_started>, arg1=0x0) at app.c:628
#24 0x000055ab168ce206 in main (argc=1, argv=0x7ffe82072518) at nvmf_main.c:47
nvmf_ctrlr_process_fabrics_cmd
fabircコマンドの起点。fctype(おそらくfabric command type)に応じて、各コマンドの処理を呼び出す。
static int
nvmf_ctrlr_process_fabrics_cmd(struct spdk_nvmf_request *req)
{
struct spdk_nvmf_qpair *qpair = req->qpair; ★qpairをとる。
struct spdk_nvmf_capsule_cmd *cap_hdr;
cap_hdr = &req->cmd->nvmf_cmd;
if (qpair->ctrlr == NULL) {
/* No ctrlr established yet; the only valid command is Connect */
★もし、qpairが空の場合、Connectコマンドのみを受け付ける。
if (cap_hdr->fctype == SPDK_NVMF_FABRIC_COMMAND_CONNECT) {
return nvmf_ctrlr_cmd_connect(req);
} else {
SPDK_DEBUGLOG(nvmf, "Got fctype 0x%x, expected Connect\n",
cap_hdr->fctype);
req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR;
return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
}
} else if (nvmf_qpair_is_admin_queue(qpair)) {
★キュー種別を確認する。
/*
* Controller session is established, and this is an admin queue.
* Disallow Connect and allow other fabrics commands.
*/
switch (cap_hdr->fctype) {
case SPDK_NVMF_FABRIC_COMMAND_PROPERTY_SET:
return nvmf_property_set(req);
case SPDK_NVMF_FABRIC_COMMAND_PROPERTY_GET:
return nvmf_property_get(req);
default:
SPDK_DEBUGLOG(nvmf, "unknown fctype 0x%02x\n",
cap_hdr->fctype);
req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INVALID_OPCODE;
return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
}
} else {
/* Controller session is established, and this is an I/O queue */
/* For now, no I/O-specific Fabrics commands are implemented (other than Connect) */
SPDK_DEBUGLOG(nvmf, "Unexpected I/O fctype 0x%x\n", cap_hdr->fctype);
req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INVALID_OPCODE;
return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
}
}
ホストのアクセス可否判定(Connect)
fabricコマンドのConnect処理時にsubsystemがホストを許可するかどうかを判定する。
(gdb) bt
#0 spdk_nvmf_subsystem_host_allowed (subsystem=0x55ab16965ce3 <spdk_ring_enqueue+762>, hostnqn=0x7ffe82071d30 "\260;O\026") at subsystem.c:976
#1 0x000055ab1697df9c in nvmf_qpair_access_allowed (qpair=0x55ab1769b2a0, subsystem=0x55ab177eff90,
hostnqn=0x2000164f5200 "nqn.2014-08.org.nvmexpress:uuid:a7da905e-547a-4c8c-878a-67ab9bffcfa9") at ctrlr.c:644
#2 0x000055ab1697ed10 in nvmf_ctrlr_cmd_connect (req=0x55ab17748f40) at ctrlr.c:895
#3 0x000055ab1698586b in nvmf_ctrlr_process_fabrics_cmd (req=0x55ab17748f40) at ctrlr.c:3387
#4 0x000055ab16987c6c in spdk_nvmf_request_exec (req=0x55ab17748f40) at ctrlr.c:4302
#5 0x000055ab169a68c6 in nvmf_tcp_req_process (ttransport=0x55ab176a1820, tcp_req=0x55ab17748f40) at tcp.c:2876
#6 0x000055ab169a29f9 in nvmf_tcp_capsule_cmd_payload_handle (ttransport=0x55ab176a1820, tqpair=0x55ab1769b2a0, pdu=0x2000164f3bb0) at tcp.c:1552
#7 0x000055ab169a35c7 in _nvmf_tcp_pdu_payload_handle (tqpair=0x55ab1769b2a0, pdu=0x2000164f3bb0) at tcp.c:1813
#8 0x000055ab169a3973 in nvmf_tcp_pdu_payload_handle (tqpair=0x55ab1769b2a0, pdu=0x2000164f3bb0) at tcp.c:1877
#9 0x000055ab169a4978 in nvmf_tcp_sock_process (tqpair=0x55ab1769b2a0) at tcp.c:2224
#10 0x000055ab169a716f in nvmf_tcp_sock_cb (arg=0x55ab1769b2a0, group=0x55ab176a1930, sock=0x55ab1769b9c0) at tcp.c:3009
#11 0x000055ab16a34bf4 in sock_group_impl_poll_count (group_impl=0x55ab176a1c00, group=0x55ab176a1930, max_events=32) at sock.c:695
#12 0x000055ab16a34c8e in spdk_sock_group_poll_count (group=0x55ab176a1930, max_events=32) at sock.c:721
#13 0x000055ab16a34aed in spdk_sock_group_poll (group=0x55ab176a1930) at sock.c:672
#14 0x000055ab169a779c in nvmf_tcp_poll_group_poll (group=0x55ab176a1b60) at tcp.c:3162
#15 0x000055ab1699d440 in nvmf_transport_poll_group_poll (group=0x55ab176a1b60) at transport.c:582
#16 0x000055ab16992889 in nvmf_poll_group_poll (ctx=0x55ab17687ee0) at nvmf.c:71
#17 0x000055ab16a2c802 in thread_execute_poller (thread=0x55ab17687af0, poller=0x55ab17687f60) at thread.c:795
#18 0x000055ab16a2ccb2 in thread_poll (thread=0x55ab17687af0, max_msgs=0, now=21235041568544) at thread.c:917
#19 0x000055ab16a2cde3 in spdk_thread_poll (thread=0x55ab17687af0, max_msgs=0, now=21235041568544) at thread.c:968
#20 0x000055ab169fcc1b in _reactor_run (reactor=0x55ab176841c0) at reactor.c:901
#21 0x000055ab169fcd0d in reactor_run (arg=0x55ab176841c0) at reactor.c:939
#22 0x000055ab169fd0e1 in spdk_reactors_start () at reactor.c:1041
#23 0x000055ab169f99a9 in spdk_app_start (opts_user=0x7ffe82072350, start_fn=0x55ab168ce0ba <nvmf_tgt_started>, arg1=0x0) at app.c:628
#24 0x000055ab168ce206 in main (argc=1, argv=0x7ffe82072518) at nvmf_main.c:47
spdk_nvmf_subsystem_host_allowed
SPDKはsubsystem単位でホストのアクセスするかどうかを管理する。
アクセスを許可するホストは、subsystemごとのhostsリスト構造体により管理する。
bool
spdk_nvmf_subsystem_host_allowed(struct spdk_nvmf_subsystem *subsystem, const char *hostnqn)
{
bool allowed;
if (!hostnqn) {
return false;
}
pthread_mutex_lock(&subsystem->mutex);
if (subsystem->flags.allow_any_host) {
pthread_mutex_unlock(&subsystem->mutex);
return true;
}
allowed = nvmf_subsystem_find_host(subsystem, hostnqn) != NULL;
pthread_mutex_unlock(&subsystem->mutex);
return allowed;
}
nvmf_subsystem_find_host
/* Must hold subsystem->mutex while calling this function */
static struct spdk_nvmf_host *
nvmf_subsystem_find_host(struct spdk_nvmf_subsystem *subsystem, const char *hostnqn)
{
struct spdk_nvmf_host *host = NULL;
TAILQ_FOREACH(host, &subsystem->hosts, link) {
if (strcmp(hostnqn, host->nqn) == 0) {
return host;
}
}
return NULL;
}
Identify
nvmf_ctrlr_identify
nvmf_ctrlr_process_admin_cmd
adminコマンドの起点。Fused adminコマンドは非サポート。
nvmf_ctrlr_process_admin_cmd(struct spdk_nvmf_request *req)
{
struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
struct spdk_nvme_cpl *response = &req->rsp->nvme_cpl;
struct spdk_nvmf_subsystem_poll_group *sgroup;
int rc;
if (cmd->opc == SPDK_NVME_OPC_ASYNC_EVENT_REQUEST) {
/* We do not want to treat AERs as outstanding commands,
* so decrement mgmt_io_outstanding here to offset
* the increment that happened prior to this call.
*/
sgroup = &req->qpair->group->sgroups[ctrlr->subsys->id];
assert(sgroup != NULL);
sgroup->mgmt_io_outstanding--;
}
if (ctrlr == NULL) {
★応答の初期化
SPDK_ERRLOG("Admin command sent before CONNECT\n");
response->status.sct = SPDK_NVME_SCT_GENERIC;
response->status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR;
return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
}
assert(spdk_get_thread() == ctrlr->thread);
if (cmd->fuse != 0) {
/* Fused admin commands are not supported. */
response->status.sct = SPDK_NVME_SCT_GENERIC;
response->status.sc = SPDK_NVME_SC_INVALID_FIELD;
return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
}
if (ctrlr->vcprop.cc.bits.en != 1) {
SPDK_ERRLOG("Admin command sent to disabled controller\n");
response->status.sct = SPDK_NVME_SCT_GENERIC;
response->status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR;
return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
}
if (req->data && spdk_nvme_opc_get_data_transfer(cmd->opc) == SPDK_NVME_DATA_CONTROLLER_TO_HOST) {
_clear_iovs(req->iov, req->iovcnt);
}
if (ctrlr->subsys->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY) {
/* Discovery controllers only support these admin OPS. */
switch (cmd->opc) {
case SPDK_NVME_OPC_IDENTIFY:
case SPDK_NVME_OPC_GET_LOG_PAGE:
case SPDK_NVME_OPC_KEEP_ALIVE:
case SPDK_NVME_OPC_SET_FEATURES:
case SPDK_NVME_OPC_GET_FEATURES:
case SPDK_NVME_OPC_ASYNC_EVENT_REQUEST:
break;
default:
goto invalid_opcode;
}
}
/* Call a custom adm cmd handler if set. Aborts are handled in a different path (see nvmf_passthru_admin_cmd) */
if (g_nvmf_custom_admin_cmd_hdlrs[cmd->opc].hdlr && cmd->opc != SPDK_NVME_OPC_ABORT) {
rc = g_nvmf_custom_admin_cmd_hdlrs[cmd->opc].hdlr(req);
if (rc >= SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE) {
/* The handler took care of this command */
return rc;
}
}
switch (cmd->opc) {
case SPDK_NVME_OPC_GET_LOG_PAGE:
return nvmf_ctrlr_get_log_page(req);
case SPDK_NVME_OPC_IDENTIFY:
return nvmf_ctrlr_identify(req);
case SPDK_NVME_OPC_ABORT:
return nvmf_ctrlr_abort(req);
case SPDK_NVME_OPC_GET_FEATURES:
return nvmf_ctrlr_get_features(req);
case SPDK_NVME_OPC_SET_FEATURES:
return nvmf_ctrlr_set_features(req);
case SPDK_NVME_OPC_ASYNC_EVENT_REQUEST:
return nvmf_ctrlr_async_event_request(req);
case SPDK_NVME_OPC_KEEP_ALIVE:
return nvmf_ctrlr_keep_alive(req);
case SPDK_NVME_OPC_CREATE_IO_SQ:
case SPDK_NVME_OPC_CREATE_IO_CQ:
case SPDK_NVME_OPC_DELETE_IO_SQ:
case SPDK_NVME_OPC_DELETE_IO_CQ:
/* Create and Delete I/O CQ/SQ not allowed in NVMe-oF */
goto invalid_opcode;
default:
goto invalid_opcode;
}
invalid_opcode:
SPDK_INFOLOG(nvmf, "Unsupported admin opcode 0x%x\n", cmd->opc);
response->status.sct = SPDK_NVME_SCT_GENERIC;
response->status.sc = SPDK_NVME_SC_INVALID_OPCODE;
return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
}
コマンド処理
- 59.41% 0.65% reactor_0 nvmf_tgt [.] nvmf_transport_poll_group_poll a
- 58.75% nvmf_transport_poll_group_poll a
- 58.67% nvmf_tcp_poll_group_poll a
- 57.32% spdk_sock_group_poll a
- 57.17% spdk_sock_group_poll_count a
- 55.50% sock_group_impl_poll_count a
- 36.45% epoll_wait a
- 12.51% entry_SYSCALL_64_after_hwframe a
- 12.04% do_syscall_64 a
8.17% syscall_enter_from_user_mode a
- 3.66% __x64_sys_epoll_wait a◆ - 3.13% do_epoll_wait a
- 2.24% __fdget a
2.20% __fget_light a
- 0.60% asm_common_interrupt a
common_interrupt a
irq_exit_rcu a
__softirqentry_text_start a
- 9.04% nvmf_tcp_sock_cb a
- 9.01% nvmf_tcp_sock_process a
- 4.23% nvme_tcp_read_payload_data ★ TCPパケットからペイロードを読み込み a
- 4.07% nvme_tcp_readv_data a
- 3.80% spdk_sock_readv a
- 3.09% __GI___readv a
- 2.81% entry_SYSCALL_64_after_hwframe a
- 2.79% do_syscall_64 a
- 2.72% __x64_sys_readv a
- do_readv a
- 2.64% vfs_readv a
- 2.55% do_iter_read a
- 2.48% do_iter_readv_writev a
- sock_read_iter a
- 2.43% sock_recvmsg a
- 2.31% inet_recvmsg a
- tcp_recvmsg a
- 2.19% tcp_recvmsg_locked a
- 1.77% skb_copy_datagram_iter a
- __skb_datagram_iter a
- 1.22% simple_copy_to_iter a
- 0.86% _copy_to_iter a
0.80% copy_user_generic_string
- 0.70% posix_sock_readv a
- 0.66% posix_sock_recv_from_pipe a
0.56% __memmove_avx_unaligned_erms a
- 2.45% nvmf_tcp_pdu_payload_handle a
- 2.44% _nvmf_tcp_pdu_payload_handle a
- 2.40% nvmf_tcp_h2c_data_payload_handle ★Writeコマンド処理の入り口。 a
- 2.36% nvmf_tcp_req_process a
- 2.31% spdk_nvmf_request_exec a
- 2.15% nvmf_ctrlr_process_io_cmd ★ コントローラのio_cmd処理本体。 a
- 2.09% nvmf_bdev_ctrlr_write_cmd a
- 2.06% spdk_bdev_writev_blocks a
- 2.05% bdev_writev_blocks_with_md a
- 2.02% _bdev_io_submit_ext a
- 1.99% bdev_io_submit a
- 1.98% _bdev_io_submit a
- 1.95% bdev_io_do_submit a
- 1.93% bdev_malloc_submit_request a
- _bdev_malloc_submit_request a
- bdev_malloc_writev a
- 1.87% spdk_accel_submit_copy a
- 1.83% sw_accel_submit_tasks a
- 1.79% __memmove_avx_unaligned_erms a
- 0.75% asm_common_interrupt a
common_interrupt a
irq_exit_rcu a
- __softirqentry_text_start a
- net_rx_action a
__napi_poll a
e1000_clean
- 1.35% nvme_tcp_read_data a
- 1.32% spdk_sock_recv ◆ - posix_sock_recv a
- posix_sock_readv a
- 1.19% __GI___readv a
- 1.14% entry_SYSCALL_64_after_hwframe a
- do_syscall_64 a
- __x64_sys_readv a
- do_readv a
- 1.11% vfs_readv a
- do_iter_read a
- 1.09% do_iter_readv_writev a
- 1.06% sock_read_iter a
- sock_recvmsg a
- 1.02% inet_recvmsg a
- tcp_recvmsg a
- 0.99% tcp_recvmsg_locked a
- 0.60% skb_copy_datagram_iter a
__skb_datagram_iter a
- 0.77% nvmf_tcp_capsule_cmd_hdr_handle ★Readコマンド処理の入り口。カプセル化されているということ? a
- 0.74% nvmf_tcp_req_process a
- 0.60% spdk_nvmf_request_exec a
- 0.58% nvmf_ctrlr_process_io_cmd a
- nvmf_bdev_ctrlr_read_cmd a
spdk_bdev_readv_blocks a
- bdev_readv_blocks_with_md a
- 0.55% _bdev_io_submit_ext a
- bdev_io_submit a
- 0.54% _bdev_io_submit a
bdev_io_do_submit a
bdev_malloc_submit_request a
_bdev_malloc_submit_request a
- bdev_malloc_readv a
- spdk_accel_submit_copy a
sw_accel_submit_tasks
- 7.75% posix_sock_group_impl_poll a
- 2.71% __libc_sendmsg a
- 2.50% entry_SYSCALL_64_after_hwframe a
- 2.47% do_syscall_64 a
- 2.40% __x64_sys_sendmsg a
- __sys_sendmsg a
- 2.35% ___sys_sendmsg a
- 2.17% ____sys_sendmsg a
- 2.13% sock_sendmsg a
- 2.10% inet_sendmsg a
- 2.07% tcp_sendmsg a
- 1.95% tcp_sendmsg_locked a
- 0.97% tcp_push a
- 0.96% __tcp_push_pending_frames a
- 0.96% tcp_write_xmit a
- 0.91% __tcp_transmit_skb a
- 0.90% ip_queue_xmit a
- __ip_queue_xmit a
- 0.89% ip_local_out a
- 0.87% ip_output a
- 0.86% ip_finish_output a
- 0.83% __ip_finish_output a
ip_finish_output2 a
- 1.97% _sock_flush a
0.80% spdk_sock_prep_reqs a
0.75% __libc_disable_asynccancel a
0.59% __libc_enable_asynccancel a
0.58% posix_sock_group_impl_poll