目次
機能
BUSEのキューを読み取り、read, write関数を呼び出し。
方針
API
関数構造体を登録。
struct buse_ops = {
.read
.write
}
BUSE修正
ビルドを通す
diff --git a/kernel/buse-blkdev.c b/kernel/buse-blkdev.c
index ee97b03..e679c52 100644
--- a/kernel/buse-blkdev.c
+++ b/kernel/buse-blkdev.c
@@ -17,7 +17,7 @@
*/
static int buse_init_hctx(struct blk_mq_hw_ctx *hw_ctx, void *driver_data, unsigned int hw_ctx_id)
{
- struct buse *buse = hw_ctx->queue->queuedata;
+ struct buse *buse = driver_data;
struct buse_queue *q = buse->queues;
q[hw_ctx_id].id = hw_ctx_id;
@@ -47,7 +47,6 @@ static blk_status_t buse_queue_rq(struct blk_mq_hw_ctx *hw_ctx, const struct blk
switch (req_op(r)) {
case REQ_OP_DISCARD:
- case REQ_OP_WRITE_SAME:
case REQ_OP_WRITE_ZEROES:
case REQ_OP_SECURE_ERASE:
case REQ_OP_WRITE:
@@ -56,6 +55,8 @@ static blk_status_t buse_queue_rq(struct blk_mq_hw_ctx *hw_ctx, const struct blk
return buse_flush(cmd);
case REQ_OP_READ:
return buse_read(cmd);
+ default:
+ break;
}
pr_warn("Unsupported request no. %d\n", req_op(r));
@@ -71,7 +72,7 @@ static const struct block_device_operations buse_blkdev_ops = {
* When io request times out we just print warning to the dmesg a give it another chance. This is
* the best we can do. If the device is eventually stopped, these requests will be canceled.
*/
-static enum blk_eh_timer_return buse_timeout(struct request *rq, bool b)
+static enum blk_eh_timer_return buse_timeout(struct request *rq)
{
pr_warn("Request timed out! Is userspace connected? (rq = %p)\n", rq);
@@ -118,24 +119,28 @@ int buse_blkdev_init(struct buse *buse)
size_t writelist_size = max_writes * sizeof(struct writelist_item);
unsigned int max_hw_sectors;
- blkdev->disk = alloc_disk_node(1, NUMA_NO_NODE);
- if (!blkdev->disk) {
- ret = -ENOMEM;
- goto err;
- }
-
buse_set_tag_set(buse);
ret = blk_mq_alloc_tag_set(tag_set);
if (ret)
goto err_disk;
- blkdev->request_queue = blk_mq_init_queue_data(tag_set, buse);
+ blkdev->disk = blk_mq_alloc_disk(&buse->blkdev.tag_set, buse);
+ if (!blkdev->disk) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ //blkdev->request_queue = blk_mq_init_queue(tag_set);
+ //blkdev->request_queue->queuedata = buse;
+ blkdev->request_queue = blkdev->disk->queue;
+
if (IS_ERR(blkdev->request_queue)) {
ret = PTR_ERR(blkdev->request_queue);
goto err_tag;
}
+
blk_queue_write_cache(blkdev->request_queue, true, false);
max_hw_sectors = (buse->write_chunk_size - writelist_size) / SECTOR_SIZE;
@@ -159,23 +164,23 @@ int buse_blkdev_init(struct buse *buse)
blk_queue_max_segments(blkdev->request_queue, USHRT_MAX);
blk_queue_max_segment_size(blkdev->request_queue, UINT_MAX);
- if (buse->can_write_same)
- blk_queue_max_write_same_sectors(blkdev->request_queue, UINT_MAX);
+ //if (buse->can_write_same)
+ // blk_queue_max_write_same_sectors(blkdev->request_queue, UINT_MAX);
if (buse->can_write_zeroes)
blk_queue_max_write_zeroes_sectors(blkdev->request_queue, UINT_MAX);
- if (buse->can_discard) {
+ /*if (buse->can_discard) {
blk_queue_flag_set(QUEUE_FLAG_DISCARD, blkdev->request_queue);
blkdev->request_queue->limits.discard_granularity = buse->block_size;
blkdev->request_queue->limits.discard_alignment = buse->block_size;
blk_queue_max_discard_sectors(blkdev->request_queue, UINT_MAX);
blk_queue_max_discard_segments(blkdev->request_queue, USHRT_MAX);
- }
+ }*/
- if (buse->can_secure_erase)
- blk_queue_flag_set(QUEUE_FLAG_SECERASE, blkdev->request_queue);
+ //if (buse->can_secure_erase)
+ // blk_queue_flag_set(QUEUE_FLAG_SECERASE, blkdev->request_queue);
return 0;
@@ -197,7 +202,7 @@ void buse_blkdev_exit(struct buse *buse)
put_disk(buse->blkdev.disk);
}
- blk_cleanup_queue(buse->blkdev.request_queue);
+ blk_mq_destroy_queue(buse->blkdev.request_queue);
blk_mq_free_tag_set(&buse->blkdev.tag_set);
buse->blkdev.created = false;
}
@@ -212,7 +217,6 @@ void buse_gendisk_register(struct buse *buse)
disk->major = buse_blkdev_major;
disk->minors = buse_blkdev_max_minors;
disk->first_minor = buse->index * disk->minors;
- disk->flags |= GENHD_FL_EXT_DEVT;
disk->fops = &buse_blkdev_ops;
disk->private_data = buse;
disk->queue = buse->blkdev.request_queue;
diff --git a/kernel/buse-chrdev.c b/kernel/buse-chrdev.c
index 0082242..19771e9 100644
--- a/kernel/buse-chrdev.c
+++ b/kernel/buse-chrdev.c
@@ -113,6 +113,7 @@ static int chrdev_release_rqueue(struct inode *inode, struct file *file)
*/
static int chrdev_open_wqueue(struct inode *inode, struct file *file)
{
+ printk("open_wqueue....");
struct buse_wqueue *wq = inode_get_wqueue(inode);
if (!wq || atomic_read(&wq->bound) == 1)
return -EFAULT;
@@ -128,6 +129,7 @@ static int chrdev_open_wqueue(struct inode *inode, struct file *file)
*/
static int chrdev_open_rqueue(struct inode *inode, struct file *file)
{
+ printk("open_rqueue....");
struct buse_rqueue *rq = inode_get_rqueue(inode);
if (!rq || atomic_read(&rq->bound) == 1)
return -EFAULT;
@@ -156,7 +158,7 @@ static struct vm_operations_struct vm_ops_rqueue = {
static int chrdev_mmap_wqueue(struct file *file, struct vm_area_struct *vma)
{
vma->vm_ops = &vm_ops_wqueue;
- vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
+ vm_flags_set(vma, VM_DONTEXPAND | VM_DONTDUMP);
vma->vm_private_data = file->private_data;
vm_open(vma);
@@ -169,7 +171,7 @@ static int chrdev_mmap_wqueue(struct file *file, struct vm_area_struct *vma)
static int chrdev_mmap_rqueue(struct file *file, struct vm_area_struct *vma)
{
vma->vm_ops = &vm_ops_rqueue;
- vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
+ vm_flags_set(vma, VM_DONTEXPAND | VM_DONTDUMP);
vma->vm_private_data = file->private_data;
vm_open(vma);
@@ -262,7 +264,8 @@ ssize_t chrdev_read_wqueue(struct file *file, char __user *buf, size_t len, loff
chunk = pop_write_request_wait(wq);
if (IS_ERR(chunk)) {
- return PTR_ERR(chunk);
+ //return PTR_ERR(chunk);
+ return -EAGAIN;
}
ret = copy_to_user(buf, &chunk->shmem_offset, sizeof(chunk->shmem_offset));
@@ -271,12 +274,14 @@ ssize_t chrdev_read_wqueue(struct file *file, char __user *buf, size_t len, loff
pr_alert("copy_to_user failed\n");
return -EFAULT;
}
+ printk("chunk->shmem_offset: %ld", chunk->shmem_offset);
ret = copy_to_user(buf, &chunk->num_writes, sizeof(chunk->num_writes));
if (ret) {
pr_alert("copy_to_user failed\n");
return -EFAULT;
}
+ printk("chunk->num_writes: %ld", chunk->num_writes);
if (is_wqueue_term(chunk))
kfree(chunk);
@@ -306,7 +311,8 @@ ssize_t chrdev_read_rqueue(struct file *file, char __user *buf, size_t len, loff
chunk = pop_read_request_wait(rq);
if (IS_ERR(chunk)) {
- return PTR_ERR(chunk);
+ //return PTR_ERR(chunk);
+ return -EAGAIN;
}
ret = copy_to_user(buf, &chunk->sector, sizeof(chunk->sector));
diff --git a/kernel/buse-rqueue.c b/kernel/buse-rqueue.c
index 8907f8c..f6e41e9 100644
--- a/kernel/buse-rqueue.c
+++ b/kernel/buse-rqueue.c
@@ -100,9 +100,9 @@ struct read_chunk *pop_read_request_wait(struct buse_rqueue *rq)
int ret;
struct read_chunk *ch = NULL;
- ret = wait_event_interruptible(rq->busy_chunks_avail, !list_empty(&rq->busy_chunks));
- if (ret < 0)
- return ERR_PTR(-EAGAIN);
+ ret = wait_event_interruptible_timeout(rq->busy_chunks_avail, !list_empty(&rq->busy_chunks), 5);
+ if (ret <= 0)
+ return ERR_PTR(-EAGAIN);
mutex_lock(&rq->lock);
diff --git a/kernel/buse-wqueue.c b/kernel/buse-wqueue.c
index 2b4f12f..c1bff81 100644
--- a/kernel/buse-wqueue.c
+++ b/kernel/buse-wqueue.c
@@ -8,6 +8,7 @@
#include <linux/kernel.h>
#include <linux/kthread.h>
#include <linux/module.h>
+#include <linux/sched/task.h>
#include "buse-blkdev.h"
#include "buse-chrdev.h"
@@ -68,6 +69,7 @@ static void init_write_chunk(struct buse_wqueue *wq, struct write_chunk *ch)
ch->writelist_frontier = wq->shmem + ch->shmem_offset;
ch->data_frontier = ch->writelist_frontier + max_writes;
ch->num_writes = 0;
+ printk("init_write_chunk: ch=%p, shmem_offset: %d, max_writes: %d\n", ch, ch->shmem_offset, max_writes);
INIT_LIST_HEAD(&ch->dependent_reads);
@@ -156,8 +158,8 @@ struct write_chunk *pop_write_request_wait(struct buse_wqueue *wq)
struct write_chunk *ch = NULL;
int ret;
- ret = wait_event_interruptible(wq->busy_chunks_avail, !list_empty(&wq->busy_chunks));
- if (ret < 0)
+ ret = wait_event_interruptible_timeout(wq->busy_chunks_avail, !list_empty(&wq->busy_chunks), 5);
+ if (ret <= 0)
return ERR_PTR(-EAGAIN);
mutex_lock(&wq->lock);
@@ -167,6 +169,7 @@ struct write_chunk *pop_write_request_wait(struct buse_wqueue *wq)
ch = list_first_entry(&wq->busy_chunks, struct write_chunk, list);
list_del_init(&ch->list);
+ printk("pop_wqueue: ch: %p, shmem_offset: %d\n", ch ,ch->shmem_offset);
if (!is_wqueue_term(ch))
list_add_tail(&ch->list, &wq->fetched_chunks);
@@ -189,6 +192,7 @@ int close_chunk(struct buse_wqueue *wq)
list_add_tail(&ch->list, &wq->busy_chunks);
wq->active_chunk = NULL;
+ printk("close_chunk: ch->shmem_offset: %ld, ch->num_writes: %ld", ch->shmem_offset, ch->num_writes);
wake_up(&wq->busy_chunks_avail);
end:
@@ -284,6 +288,8 @@ static void copy_to_chunk(struct buse_cmd *cmd, struct write_chunk *ch)
len = bvec.bv_len;
src = kmap_atomic(bvec.bv_page);
memcpy(ch->data_frontier, src + bvec.bv_offset, len);
+
+ printk("Write: ch->data_frontier = %p, data: %s, len=%d", ch->data_frontier, src + bvec.bv_offset, len);
kunmap_atomic(src);
ch->data_frontier += len;
}
@@ -421,6 +427,8 @@ static int send_flush(struct buse_wqueue* wq, struct buse_cmd *cmd)
list_add_tail(&fake_chunk->list, &wq->busy_chunks);
wake_up(&wq->busy_chunks_avail);
+ printk("send_flush: fake_chunk: %p, shmem_offset: %d\n", fake_chunk ,fake_chunk->shmem_offset);
+
return 0;
}
@@ -446,7 +454,8 @@ static int wqueue_flush(void *data)
mutex_unlock(&wq->lock);
kfree(data);
- do_exit(0);
+ return 0;
+ //do_exit(0); // TODO: Fix this
}
/*
@@ -614,6 +623,7 @@ static int wqueue_init(struct buse_wqueue *wq)
for (i = 0; i < w_chunks; i++)
init_write_chunk(wq, &wq->chunks[i]);
+ printk("wqueue_init:%p\n", wq);
open_chunk(wq);
return 0;
@@ -629,6 +639,7 @@ err:
*/
int buse_wqueues_init(struct buse *buse)
{
+ printk("buse_wqueues_init\n");
int ret, i;
struct buse_queue *q;
size_t collisions_areas = buse->size / buse->collision_area_size;
@@ -688,6 +699,7 @@ static void rerun_write_chunks(struct buse_wqueue *wq)
ch = list_last_entry(&wq->fetched_chunks, struct write_chunk, list);
list_del_init(&ch->list);
list_add(&ch->list, &wq->busy_chunks);
+ printk("rerun_write_chunks: ch: %p, shmem_offset: %d\n", ch ,ch->shmem_offset);//DEBUG
}
wake_up(&wq->busy_chunks_avail);
mutex_unlock(&wq->lock);
diff --git a/kernel/main.c b/kernel/main.c
index c78ea6d..b2021e8 100644
--- a/kernel/main.c
+++ b/kernel/main.c
@@ -61,6 +61,7 @@ void buse_blkdev_init_cond(struct buse *buse)
{
int ret;
+ printk("wq_bound=%d, rq_bound=%d, created=%d\n", buse_wqueues_bound(buse), buse_rqueues_bound(buse), buse->blkdev.created);
if (!buse_wqueues_bound(buse) ||
!buse_rqueues_bound(buse) ||
buse->blkdev.created)
@@ -194,7 +195,7 @@ static int __init buse_init(void)
goto err;
}
- buse_chrdev_class = class_create(THIS_MODULE, buse_blkdev_name);
+ buse_chrdev_class = class_create(buse_blkdev_name);
if (IS_ERR(buse_chrdev_class)) {
ret = PTR_ERR(buse_chrdev_class);
goto err_blk;
ポーリング向け改変
diff --git a/kernel/buse-rqueue.c b/kernel/buse-rqueue.c
index f6e41e9..c89c5c0 100644
--- a/kernel/buse-rqueue.c
+++ b/kernel/buse-rqueue.c
@@ -99,12 +99,23 @@ struct read_chunk *pop_read_request_wait(struct buse_rqueue *rq)
{
int ret;
struct read_chunk *ch = NULL;
-
+#ifdef FBD
+
+ if (list_empty(&rq->busy_chunks))
+ return ERR_PTR(-EAGAIN);
+ mutex_lock(&rq->lock);
+
+ if (list_empty(&rq->busy_chunks)){
+ mutex_unlock(&rq->lock);
+ return ERR_PTR(-EAGAIN);
+ }
+#else
ret = wait_event_interruptible_timeout(rq->busy_chunks_avail, !list_empty(&rq->busy_chunks), 5);
if (ret <= 0)
return ERR_PTR(-EAGAIN);
-
mutex_lock(&rq->lock);
+#endif
+
BUG_ON(list_empty(&rq->busy_chunks));
@@ -199,7 +210,9 @@ again:
list_add_tail(&ch->list, &rq->busy_chunks);
blk_mq_start_request(cmd->rq);
+#ifndef FBD
wake_up(&rq->busy_chunks_avail);
+#endif
mutex_unlock(&rq->lock);
return BLK_STS_OK;
@@ -237,7 +250,9 @@ again:
ch->shmem_offset = (u64)-1;
list_add_tail(&ch->list, &rq->busy_chunks);
rq->terminated = true;
+#ifndef FBD
wake_up(&rq->busy_chunks_avail);
+#endif
mutex_unlock(&rq->lock);
}
@@ -444,8 +459,9 @@ static int rqueue_init(struct buse_rqueue *rq)
struct buse *buse = rq->buse;
uint r_chunks = buse->read_shm_size / buse->block_size;
int numa_node = buse_get_numa_node_for_queue_id(rq->buse, rq->q->id);
-
+#ifndef FBD
init_waitqueue_head(&rq->busy_chunks_avail);
+#endif
init_waitqueue_head(&rq->free_chunks_avail);
INIT_LIST_HEAD(&rq->busy_chunks);
INIT_LIST_HEAD(&rq->fetched_chunks);
@@ -537,7 +553,9 @@ static void rerun_read_chunks(struct buse_rqueue *rq)
list_del_init(&ch->list);
list_add(&ch->list, &rq->busy_chunks);
}
+#ifndef FBD
wake_up(&rq->busy_chunks_avail);
+#endif
mutex_unlock(&rq->lock);
}
diff --git a/kernel/buse-wqueue.c b/kernel/buse-wqueue.c
index c1bff81..a1ec80b 100644
--- a/kernel/buse-wqueue.c
+++ b/kernel/buse-wqueue.c
@@ -158,11 +158,21 @@ struct write_chunk *pop_write_request_wait(struct buse_wqueue *wq)
struct write_chunk *ch = NULL;
int ret;
+#ifdef FBD
+ if (list_empty(&wq->busy_chunks))
+ return ERR_PTR(-EAGAIN);
+
+ mutex_lock(&wq->lock);
+ if (list_empty(&wq->busy_chunks)){
+ mutex_unlock(&wq->lock);
+ return ERR_PTR(-EAGAIN);
+ }
+#else
ret = wait_event_interruptible_timeout(wq->busy_chunks_avail, !list_empty(&wq->busy_chunks), 5);
if (ret <= 0)
return ERR_PTR(-EAGAIN);
-
mutex_lock(&wq->lock);
+#endif
BUG_ON(list_empty(&wq->busy_chunks));
@@ -193,7 +203,9 @@ int close_chunk(struct buse_wqueue *wq)
wq->active_chunk = NULL;
printk("close_chunk: ch->shmem_offset: %ld, ch->num_writes: %ld", ch->shmem_offset, ch->num_writes);
+#ifndef FBD
wake_up(&wq->busy_chunks_avail);
+#endif
end:
return 0;
@@ -359,8 +371,9 @@ again:
close_chunk(wq);
list_add_tail(&fake_chunk->list, &wq->busy_chunks);
wq->terminated = true;
+#ifndef FBD
wake_up(&wq->busy_chunks_avail);
-
+#endif
mutex_unlock(&wq->lock);
}
@@ -425,7 +438,9 @@ static int send_flush(struct buse_wqueue* wq, struct buse_cmd *cmd)
fake_chunk->cmd = cmd;
list_add_tail(&fake_chunk->list, &wq->busy_chunks);
+#ifndef FBD
wake_up(&wq->busy_chunks_avail);
+#endif
printk("send_flush: fake_chunk: %p, shmem_offset: %d\n", fake_chunk ,fake_chunk->shmem_offset);
@@ -598,7 +613,9 @@ static int wqueue_init(struct buse_wqueue *wq)
uint w_chunks = buse->write_shm_size / buse->write_chunk_size;
int numa_node = buse_get_numa_node_for_queue_id(wq->buse, wq->q->id);
+#ifndef FBD
init_waitqueue_head(&wq->busy_chunks_avail);
+#endif
init_waitqueue_head(&wq->free_chunks_avail);
INIT_LIST_HEAD(&wq->free_chunks);
INIT_LIST_HEAD(&wq->busy_chunks);
@@ -701,7 +718,9 @@ static void rerun_write_chunks(struct buse_wqueue *wq)
list_add(&ch->list, &wq->busy_chunks);
printk("rerun_write_chunks: ch: %p, shmem_offset: %d\n", ch ,ch->shmem_offset);//DEBUG
}
+#ifndef FBD
wake_up(&wq->busy_chunks_avail);
+#endif
mutex_unlock(&wq->lock);
}
diff --git a/kernel/main.h b/kernel/main.h
index 4751bd5..a76c175 100644
--- a/kernel/main.h
+++ b/kernel/main.h
@@ -9,6 +9,8 @@
#define BUSE_MAGIC 0xB3
+#define FBD
+
extern const char *buse_blkdev_name;
extern const int buse_blkdev_max_minors;
extern int buse_blkdev_major;
@@ -157,7 +159,9 @@ struct buse_rqueue
/* Waitqueue on the event when no busy chunk is available, i.e. there
* is nothing to send to the userspace.
*/
+#ifndef FBD
wait_queue_head_t busy_chunks_avail;
+#endif
/* Waitqueue on the event when no free chunk is available, i.e. there
* is no space to process additional reads.
@@ -285,7 +289,9 @@ struct buse_wqueue
/* Waitqueue on the event when no busy chunk is available, i.e. there
* is nothing to send to the userspace.
*/
+#ifndef FBD
wait_queue_head_t busy_chunks_avail;
+#endif
/* Waitqueue on the event when no free chunk is available, i.e. there
* is no space to process additional reads.
modprobe
takayuki@takayuki-VirtualBox:~/repos/buse/kernel$ sudo make install
takayuki@takayuki-VirtualBox:~/repos/buse/kernel$ sudo modprobe buse
BUSE構成
BUSEをmodprobeしただけではBUSEデバイスは作成されない。BUSEデバイスを使用するにはconfigfs経由でBUSEデバイスを宣言する必要がある。BUSEデバイスの作成は、/sys/kernel/config/buse以下にファイルを作成する。
root@takayuki-VirtualBox:/home/takayuki/repos/buse/kernel# mkdir /sys/kernel/config/buse/0
root@takayuki-VirtualBox:/home/takayuki/repos/buse/kernel# ls /sys/kernel/config/buse/0/
blocksize can_discard can_secure_erase can_write_same can_write_zeroes collision_area_size hw_queues io_min io_opt no_scheduler power queue_depth read_shm_size size write_chunk_size write_shm_size
PowerOn
root@takayuki-VirtualBox:/home/takayuki/repos/buse/kernel# echo 1 > /sys/kernel/config/buse/0/power
buse app
commit 1ea686f5cca683a45c47a1588d9857a6fe28fdfe (HEAD)
Author: takayuki <t-fukatani@gmail.com>
Date: Sun Nov 24 22:28:46 2024 +0900
Succeeded mkfs.
diff --git a/test/app/Makefile b/test/app/Makefile
index d1bf18bb7..3546a1777 100644
--- a/test/app/Makefile
+++ b/test/app/Makefile
@@ -6,7 +6,7 @@
SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..)
include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
-DIRS-y += bdev_svc fuzz histogram_perf jsoncat stub
+DIRS-y += bdev_svc fuzz histogram_perf jsoncat stub buse
.PHONY: all clean $(DIRS-y)
diff --git a/test/app/buse/.gitignore b/test/app/buse/.gitignore
new file mode 100644
index 000000000..39802f642
--- /dev/null
+++ b/test/app/buse/.gitignore
@@ -0,0 +1 @@
+stub
diff --git a/test/app/buse/Makefile b/test/app/buse/Makefile
new file mode 100644
index 000000000..3b357b030
--- /dev/null
+++ b/test/app/buse/Makefile
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright (C) 2017 Intel Corporation.
+# All rights reserved.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+include $(SPDK_ROOT_DIR)/mk/spdk.modules.mk
+
+APP = buse
+
+C_SRCS := buse.c
+
+SPDK_LIB_LIST = $(SOCK_MODULES_LIST) event nvme
+
+include $(SPDK_ROOT_DIR)/mk/spdk.app.mk
diff --git a/test/app/buse/buse b/test/app/buse/buse
new file mode 100755
index 000000000..e45ac0366
Binary files /dev/null and b/test/app/buse/buse differ
diff --git a/test/app/buse/buse.c b/test/app/buse/buse.c
new file mode 100644
index 000000000..e3b0dd2c3
--- /dev/null
+++ b/test/app/buse/buse.c
@@ -0,0 +1,445 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (C) 2017 Intel Corporation.
+ * All rights reserved.
+ */
+
+#include <stdint.h>
+
+#include "buse.h"
+#include "spdk/stdinc.h"
+
+#include "spdk/event.h"
+#include "spdk/nvme.h"
+#include "spdk/string.h"
+#include "spdk/thread.h"
+
+#define NUM_QUEUES 1
+#define TARGET_DEVICE "/dev/loop14"
+
+static void *rmm[NUM_QUEUES]; // BUSE read shared mem pointer
+static void *wmm[NUM_QUEUES]; // BUSE write shared mem pointer
+static int rfd[NUM_QUEUES]; // BUSE read file discriptor
+static int wfd[NUM_QUEUES]; // BUSE read file discriptor
+static int tfd; // Target device file discriptor
+
+static struct spdk_poller *g_poller;
+/* default sleep time in ms */
+static uint32_t g_sleep_time = 1000;
+static uint32_t g_io_queue_size;
+
+struct ctrlr_entry {
+ struct spdk_nvme_ctrlr *ctrlr;
+ TAILQ_ENTRY(ctrlr_entry) link;
+};
+
+static TAILQ_HEAD(, ctrlr_entry) g_controllers = TAILQ_HEAD_INITIALIZER(g_controllers);
+
+static void
+cleanup(void)
+{
+ struct ctrlr_entry *ctrlr_entry, *tmp;
+ struct spdk_nvme_detach_ctx *detach_ctx = NULL;
+
+ TAILQ_FOREACH_SAFE(ctrlr_entry, &g_controllers, link, tmp) {
+ TAILQ_REMOVE(&g_controllers, ctrlr_entry, link);
+ spdk_nvme_cuse_unregister(ctrlr_entry->ctrlr);
+ spdk_nvme_detach_async(ctrlr_entry->ctrlr, &detach_ctx);
+ free(ctrlr_entry);
+ }
+
+ if (detach_ctx) {
+ spdk_nvme_detach_poll(detach_ctx);
+ }
+}
+
+typedef struct {
+ int (*read)(int);
+ int (*write)(int);
+
+} buse_ops;
+
+static void
+usage(char *executable_name)
+{
+ printf("%s [options]\n", executable_name);
+ printf("options:\n");
+ printf(" -i shared memory ID [required]\n");
+ printf(" -m mask core mask for DPDK\n");
+ printf(" -n channel number of memory channels used for DPDK\n");
+ printf(" -L flag enable log flag\n");
+ printf(" -p core main (primary) core for DPDK\n");
+ printf(" -s size memory size in MB for DPDK\n");
+ printf(" -t msec sleep time (ms) between checking for admin completions\n");
+ printf(" -q size override default io_queue_size when attaching controllers\n");
+ printf(" -H show this usage\n");
+}
+
+static bool
+probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
+ struct spdk_nvme_ctrlr_opts *opts)
+{
+ if (g_io_queue_size > 0) {
+ opts->io_queue_size = g_io_queue_size;
+ }
+ return true;
+}
+
+static void
+attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
+ struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts)
+{
+ struct ctrlr_entry *entry;
+
+ entry = malloc(sizeof(struct ctrlr_entry));
+ if (entry == NULL) {
+ fprintf(stderr, "Malloc error\n");
+ exit(1);
+ }
+
+ entry->ctrlr = ctrlr;
+ TAILQ_INSERT_TAIL(&g_controllers, entry, link);
+ if (spdk_nvme_cuse_register(ctrlr) != 0) {
+ fprintf(stderr, "could not register ctrlr with cuse\n");
+ }
+}
+
+#define STORE /* デバイスに対応するファイルディスクリプタまたはストレージの定義を行う */
+
+static
+size_t buse_proto_read(int len, int offset, uint8_t* p)
+{
+ size_t size = 0;
+ // Check if the parameters are valid
+
+ if (len <= 0 || offset < 0 || p == NULL) {
+ fprintf(stderr, "Invalid parameters for buse_proto_read\n");
+ return -1;
+ }
+
+ ssize_t bytes_read = pread(tfd, p, len, offset);
+
+ if (bytes_read < 0) {
+ perror("Read operation failed");
+ return -1;
+ }
+printf("Read %d bytes from offset %d, data:%.10s\n", len, offset, p);
+
+ // Perform the read operation
+ // This is a simplified example; you may need to adjust this based on your specific storage implementation
+
+ //DEBUG_PRINT("Read %d bytes from offset %d\n", len, offset);
+
+ return size;
+}
+
+static
+size_t buse_proto_write(int len, int offset, uint8_t* p)
+{
+ size_t size = 0;
+
+ printf("Write %d bytes from offset %d, data: %.10s\n", len, offset, p);
+
+ // Check if the parameters are valid
+ if (len <= 0 || offset < 0 || p == NULL) {
+ fprintf(stderr, "Invalid parameters for buse_proto_write\n");
+ return -1;
+ }
+
+ ssize_t bytes_written = pwrite(tfd, p, len, offset);
+
+ if (bytes_written < 0) {
+ perror("Write operation failed");
+ return -1;
+ }
+
+ //DEBUG_PRINT("Write %d bytes from offset %d\n", len, offset);
+
+
+ // Perform the write operation
+ // This is a simplified example; you may need to adjust this based on your specific storage implementation
+ /*ssize_t bytes_written = pwrite(wfd[i], p, len, offset);
+
+ if (bytes_written < 0) {
+ perror("Write operation failed");
+ return -1;
+ }
+ size += bytes_written;
+
+ // For debugging purposes
+ printf("Wrote %zd bytes to offset %d\n", bytes_written, offset);
+ }*/
+
+ return size;
+}
+
+
+const struct buse_ops buse_ops_ins = {
+ .read = buse_proto_read,
+ .write = buse_proto_write
+};
+
+/*
+ * RETURN:
+ * Success: read length
+ * Fail: -1
+ */
+static int buse_process_read_queue(void)
+{
+ uint8_t request[24];
+
+ DEBUG_PRINT("buse_process_read_queue\n");
+
+ for (int i = 0; i < NUM_QUEUES; i++)
+ {
+ int bytes_read = read(rfd[i], request, sizeof(request));
+ if (bytes_read != sizeof(request))
+ {
+ if (errno == EAGAIN)
+ {
+ continue; // EOF
+ }
+ DEBUG_PRINT("read failed: bytes_read=%d", bytes_read);
+ return -1; // Error
+ }
+ // セクター、長さ、オフセットを取得
+ uint64_t sector = *(uint64_t *)&request[0];
+ uint64_t len = *(uint64_t *)&request[8];
+ uint64_t offset = *(uint64_t *)&request[16];
+
+ buse_ops_ins.read(len * 512, sector * 512, (void *)(rmm[i]) + offset);
+
+ write(rfd[i], &offset, sizeof(offset));
+ }
+ return 1;
+}
+
+#define SHM_SIZE 33554432
+#define WCHUNK_SIZE 2097152
+#define BLOCK_SIZE 512
+
+static int buse_process_write_queue(void)
+{
+ uint8_t request[16];
+
+ DEBUG_PRINT("buse_process_write_queue\n");
+
+ for (int i = 0; i < NUM_QUEUES; i++)
+ {
+ size_t bytes_read = read(wfd[i], request, sizeof(request));
+ if (bytes_read != sizeof(request))
+ {
+ if (errno == EAGAIN)
+ {
+ continue; // EOF
+ }
+ DEBUG_PRINT("read failed: bytes_read=%zu\n", bytes_read);
+ return -1; // Error
+ }
+ // オフセットと書き込み数を取得
+ uint64_t offset = *(uint64_t *)&request[0];
+ uint64_t writes = *(uint64_t *)&request[8];
+
+ if (offset == writes && writes > WCHUNK_SIZE / BLOCK_SIZE)
+ {
+ printf("Received flsh cmd. Ignoring offset: %ld, writes %ld\n", offset, writes);
+ write(wfd[i], &offset, sizeof(offset));
+ return 1;
+ }
+
+ uint64_t tmp = offset;
+ //uint64_t frontier = tmp + METADATA_SIZE;
+ uint64_t data_frontier = tmp + (WCHUNK_SIZE / BLOCK_SIZE) * 32;
+
+ printf("Write offset %ld, writes %ld, data_frontier %p\n", offset, writes, (void *)data_frontier);
+
+ for (uint64_t j = 0; j < writes; j++) {
+ // 書き込み情報を取得
+ uint8_t write_io[32];
+ memcpy(write_io, (void *)(wmm[i]) + tmp, sizeof(write_io));
+ tmp += 32;
+
+ uint64_t sector = *(uint64_t *)&write_io[0];
+ uint64_t len = *(uint64_t *)&write_io[8];
+ //uint64_t seq = *(uint64_t *)&write_io[16];
+ //uint64_t rfu = *(uint64_t *)&write_io[24];
+
+ // デバイス特有の書き込み/フラッシュ操作
+ int bytes_pwrite = buse_ops_ins.write(len * 512, sector * 512, (void *)(wmm[i]) + data_frontier);
+ if (bytes_pwrite < 0) {
+ perror("pwrite failed");
+ return -1;
+ }
+ data_frontier += len * 512;
+ }
+ write(wfd[i], &offset, sizeof(offset));
+ // オフセットをデバイスに書き込む
+ }
+ return 1;
+}
+
+static int
+buse_poll(void *arg)
+{
+ DEBUG_PRINT("Polling...\n");
+ buse_process_write_queue();
+ buse_process_read_queue();
+
+ return 0;
+}
+
+#define MAJOR 0
+
+static void
+buse_start(void *arg1)
+{
+ //int shm_id = (intptr_t)arg1;
+
+ int major = MAJOR;
+
+ printf("BUSE app is starting....\n");
+
+ /*
+ * Initialize BUSE device files.
+ */
+ char device_path[256];
+ int fd;
+ // r: read, w: write
+ const char modes[] = {'r', 'w'};
+ int fds[2];
+
+ for (int j = 0; j < NUM_QUEUES; j++)
+ {
+ for (int i = 0; i < 2; i++) {
+ snprintf(device_path, sizeof(device_path), "/dev/buse%d-%c%d", major, modes[i], j);
+
+ printf("Queue initializing. Opening %s.\n", device_path);
+
+ fd = open(device_path, O_RDWR|O_NONBLOCK);
+ if (fd < 0) {
+ perror("Failed to open device");
+ return;
+ }
+ fds[i] = fd;
+ }
+
+ rfd[j] = fds[0];
+ wfd[j] = fds[1];
+
+ /*
+ * Initialize shared mem.
+ */
+ void *mm_ptrs[2];
+
+ for (int i = 0; i < 2; i++)
+ {
+ mm_ptrs[i] = mmap(NULL, SHM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fds[i], 0);
+ printf("mmap: %p\n", mm_ptrs[i]);
+
+ if (mm_ptrs[i] == MAP_FAILED) {
+ perror("mmap failed");
+ close(fds[i]);
+ return;
+ }
+ }
+
+ rmm[j] = mm_ptrs[0];
+ wmm[j] = mm_ptrs[1];
+ }
+
+ printf("Opening target device: %s.\n", TARGET_DEVICE);
+
+ tfd = open(TARGET_DEVICE, O_RDWR|O_NONBLOCK);
+
+ spdk_unaffinitize_thread();
+
+ if (spdk_nvme_probe(NULL, NULL, probe_cb, attach_cb, NULL) != 0) {
+ fprintf(stderr, "spdk_nvme_probe() failed\n");
+ exit(1);
+ }
+
+ g_poller = SPDK_POLLER_REGISTER(buse_poll, NULL, 0);
+}
+
+static void
+buse_shutdown(void)
+{
+ spdk_poller_unregister(&g_poller);
+ spdk_app_stop(0);
+}
+
+int
+main(int argc, char **argv)
+{
+ int ch;
+ struct spdk_app_opts opts = {};
+ long int val;
+
+ /* default value in opts structure */
+ spdk_app_opts_init(&opts, sizeof(opts));
+
+ opts.name = "buse";
+ opts.rpc_addr = NULL;
+ opts.env_context = "--proc-type=primary";
+
+ while ((ch = getopt(argc, argv, "i:m:n:p:q:s:t:HL:")) != -1) {
+ if (ch == 'm') {
+ opts.reactor_mask = optarg;
+ } else if (ch == 'L') {
+ if (spdk_log_set_flag(optarg) != 0) {
+ SPDK_ERRLOG("unknown flag: %s\n", optarg);
+ usage(argv[0]);
+ exit(EXIT_FAILURE);
+ }
+#ifdef DEBUG
+ opts.print_level = SPDK_LOG_DEBUG;
+#endif
+ } else if (ch == '?' || ch == 'H') {
+ usage(argv[0]);
+ exit(EXIT_SUCCESS);
+ } else {
+ val = spdk_strtol(optarg, 10);
+ if (val < 0) {
+ fprintf(stderr, "Converting a string to integer failed\n");
+ exit(1);
+ }
+ switch (ch) {
+ case 'i':
+ opts.shm_id = val;
+ break;
+ case 'n':
+ opts.mem_channel = val;
+ break;
+ case 'p':
+ opts.main_core = val;
+ break;
+ case 's':
+ opts.mem_size = val;
+ break;
+ case 't':
+ g_sleep_time = val;
+ break;
+ case 'q':
+ g_io_queue_size = val;
+ break;
+ default:
+ usage(argv[0]);
+ exit(EXIT_FAILURE);
+ }
+ }
+ }
+
+ if (opts.shm_id < 0) {
+ fprintf(stderr, "%s: -i shared memory ID must be specified\n", argv[0]);
+ usage(argv[0]);
+ exit(1);
+ }
+
+ opts.shutdown_cb = buse_shutdown;
+
+ ch = spdk_app_start(&opts, buse_start, (void *)(intptr_t)opts.shm_id);
+
+ cleanup();
+ spdk_app_fini();
+
+ return ch;
+}
diff --git a/test/app/buse/buse.h b/test/app/buse/buse.h
new file mode 100644
index 000000000..163d298b9
--- /dev/null
+++ b/test/app/buse/buse.h
@@ -0,0 +1,39 @@
+#ifndef BUSE_H_
+#define BUSE_H_
+
+#include <stdint.h>
+#include <stdio.h>
+
+//#define DEBUG 1
+#undef DEBUG
+#ifdef DEBUG
+#define DEBUG_PRINT(fmt, ...) do { printf(fmt,##__VA_ARGS__); } while (0)
+#else
+#define DEBUG_PRINT(fmt, ...) do { } while (0)
+#endif
+
+struct buse_ops {
+ /**
+ * Read data from the device.
+ *
+ * @param len The number of bytes to read.
+ * @param offset The offset in the device from where to start reading.
+ * @param p Pointer to the buffer where the read data will be stored.
+ *
+ * @return The number of bytes actually read, or a negative value on error.
+ */
+ size_t (*read)(int len, int offset, uint8_t* p);
+
+ /**
+ * Write data to the device.
+ *
+ * @param len The number of bytes to write.
+ * @param offset The offset in the device where to start writing.
+ * @param p Pointer to the buffer containing the data to be written.
+ *
+ * @return The number of bytes actually written, or a negative value on error.
+ */
+ size_t (*write)(int len, int offset, uint8_t* p);
+};
+
+#endif // BUSE_H_
buse app実行
takayuki@takayuki-VirtualBox:~/repos/spdk$ sudo ./scripts/setup.sh
takayuki@takayuki-VirtualBox:~/repos/spdk$ sudo ./test/app/buse/buse -i 1