diff --git a/doc/tgtadm.8.xml b/doc/tgtadm.8.xml
index e945cd3b..15eb716f 100644
--- a/doc/tgtadm.8.xml
+++ b/doc/tgtadm.8.xml
@@ -106,12 +106,13 @@ Possible device-types are:
Possible backend types are:
- rdwr : Use normal file I/O. This is the default for disk devices
- aio : Use Asynchronous I/O
- rbd : Use Ceph's distributed-storage RADOS Block Device
+ rdwr : Use normal file I/O. This is the default for disk devices
+ aio : Use Asynchronous I/O
+ io_uring : Use io_uring I/O
+ rbd : Use Ceph's distributed-storage RADOS Block Device
- sg : Special backend type for passthrough devices
- ssc : Special backend type for tape emulation
+ sg : Special backend type for passthrough devices
+ ssc : Special backend type for tape emulation
diff --git a/scripts/tgt.bashcomp.sh b/scripts/tgt.bashcomp.sh
index 2a8dfb81..9f6d024f 100644
--- a/scripts/tgt.bashcomp.sh
+++ b/scripts/tgt.bashcomp.sh
@@ -162,7 +162,7 @@ _tgtadm() {
portal pt session sess connection conn account lld" -- "${cur}") )
return 0;;
--bstype|-E)
- COMPREPLY=( $(compgen -W "rdwr aio rbd sg ssc" -- "${cur}") )
+ COMPREPLY=( $(compgen -W "rdwr aio rbd sg ssc io_uring" -- "${cur}") )
return 0;;
--bsoflags|-f)
COMPREPLY=( $(compgen -W "direct sync" -- "${cur}") )
diff --git a/scripts/tgtd.spec b/scripts/tgtd.spec
index e0e38db6..77bd7374 100644
--- a/scripts/tgtd.spec
+++ b/scripts/tgtd.spec
@@ -8,7 +8,7 @@ License: GPLv2
URL: http://stgt.sourceforge.net/
Source0: %{name}-%{version}-%{release}.tgz
BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n)
-BuildRequires: pkgconfig libibverbs-devel librdmacm-devel libxslt libaio-devel
+BuildRequires: pkgconfig libibverbs-devel librdmacm-devel libxslt libaio-devel liburing
%if %{defined suse_version}
BuildRequires: docbook-xsl-stylesheets
Requires: aaa_base
diff --git a/usr/Makefile b/usr/Makefile
index 394625b3..2fd38d9c 100644
--- a/usr/Makefile
+++ b/usr/Makefile
@@ -31,6 +31,11 @@ TGTD_OBJS += bs_aio.o
LIBS += -laio
endif
+ifneq ($(shell test -e /usr/include/sys/eventfd.h && test -e /usr/include/liburing.h && echo 1),)
+TGTD_OBJS += bs_io_uring.o
+LIBS += -luring
+endif
+
ifneq ($(ISCSI_RDMA),)
TGTD_OBJS += iscsi/iser.o iscsi/iser_text.o
LIBS += -libverbs -lrdmacm
diff --git a/usr/bs_io_uring.c b/usr/bs_io_uring.c
new file mode 100644
index 00000000..10aefcab
--- /dev/null
+++ b/usr/bs_io_uring.c
@@ -0,0 +1,496 @@
+/*
+ * io_uring backing store
+ *
+ * Copyright (C) 2024 Jonathan Frederick
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include "list.h"
+#include "util.h"
+#include "tgtd.h"
+#include "target.h"
+#include "scsi.h"
+#include
+
+#define IO_URING_MAX_IODEPTH (1024 * 16)
+
+enum unmap_mode {
+ UNMAP_MODE_BLKDISCARD,
+ UNMAP_MODE_FALLOCATE,
+ UNMAP_MODE_NONE,
+};
+
+struct bs_io_uring_info {
+ struct io_uring ring;
+ struct scsi_lu *lu;
+ int evt_fd;
+ unsigned int npending;
+ unsigned int iodepth;
+ enum unmap_mode unmap_mode;
+};
+
+static inline struct bs_io_uring_info *BS_IO_URING_I(struct scsi_lu *lu)
+{
+ return (struct bs_io_uring_info *)((char *)lu + sizeof(*lu));
+}
+
+static void cmd_error_sense(struct scsi_cmd *cmd, uint8_t key, uint16_t asc)
+{
+ scsi_set_result(cmd, SAM_STAT_CHECK_CONDITION);
+ sense_data_build(cmd, key, asc);
+}
+
+static void bs_io_uring_get_completions_helper(struct bs_io_uring_info *info)
+{
+ struct io_uring_cqe *cqe;
+ unsigned head;
+ unsigned i = 0;
+ /* read from eventfd returns 8-byte int, fails with the error EINVAL
+ if the size of the supplied buffer is less than 8 bytes */
+ uint64_t evts_complete;
+
+ while (1) {
+ int ret = read(info->evt_fd, &evts_complete, sizeof(evts_complete));
+ if (ret < 0) {
+ switch (errno) {
+ case EINTR:
+ continue;
+ case EAGAIN:
+ // EAGAIN in non-blocking evt_fd means nothing is available
+ return;
+ default:
+ eprintf("failed to read IO_URING completions, %m\n");
+ return;
+ }
+ }
+ break;
+ }
+
+ io_uring_for_each_cqe(&info->ring, head, cqe)
+ {
+ struct scsi_cmd *cmd = (struct scsi_cmd *)io_uring_cqe_get_data(cqe);
+ if (cmd != NULL) {
+ int result = SAM_STAT_GOOD;
+ if (unlikely(cqe->res < 0)) {
+ eprintf("error in async operation: %s\n", strerror(-cqe->res));
+ sense_data_build(cmd, MEDIUM_ERROR, 0);
+ result = SAM_STAT_CHECK_CONDITION;
+ }
+
+ target_cmd_io_done(cmd, result);
+ }
+
+ info->npending--;
+ i++;
+ }
+
+ io_uring_cq_advance(&info->ring, i);
+}
+
+static int queue_read(struct bs_io_uring_info *info, struct scsi_cmd *cmd)
+{
+ struct io_uring_sqe *sqe;
+ sqe = io_uring_get_sqe(&info->ring);
+ if (!sqe) {
+ return -1;
+ }
+
+ io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE);
+ io_uring_sqe_set_data(sqe, cmd);
+ io_uring_prep_read(sqe, 0, scsi_get_in_buffer(cmd), scsi_get_in_length(cmd), cmd->offset);
+ set_cmd_async(cmd);
+
+ info->npending++;
+ io_uring_submit(&info->ring);
+ return 0;
+}
+
+static int queue_write(struct bs_io_uring_info *info, struct scsi_cmd *cmd)
+{
+ struct io_uring_sqe *sqe;
+ sqe = io_uring_get_sqe(&info->ring);
+ if (!sqe) {
+ return -1;
+ }
+
+ io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE);
+ io_uring_sqe_set_data(sqe, cmd);
+ io_uring_prep_write(sqe, 0, scsi_get_out_buffer(cmd), scsi_get_out_length(cmd), cmd->offset);
+ set_cmd_async(cmd);
+
+ info->npending++;
+ io_uring_submit(&info->ring);
+ return 0;
+}
+
+static int queue_sync(struct bs_io_uring_info *info, struct scsi_cmd *cmd)
+{
+ struct io_uring_sqe *sqe;
+ sqe = io_uring_get_sqe(&info->ring);
+ if (!sqe) {
+ return -1;
+ }
+
+ if (cmd->scb[0] == SYNCHRONIZE_CACHE_16) {
+ sqe->off = cmd->offset;
+ sqe->len = scsi_get_in_length(cmd);
+ }
+
+ io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE);
+ io_uring_sqe_set_data(sqe, cmd);
+ io_uring_prep_fsync(sqe, 0, IORING_FSYNC_DATASYNC);
+ set_cmd_async(cmd);
+
+ info->npending++;
+ io_uring_submit(&info->ring);
+ return 0;
+}
+
+static int queue_unmap(struct bs_io_uring_info *info, struct scsi_cmd *cmd)
+{
+ uint32_t length = scsi_get_out_length(cmd);
+ char *tmpbuf = scsi_get_out_buffer(cmd);
+
+ if (length < 8)
+ return 0;
+
+ length -= 8;
+ tmpbuf += 8;
+
+ int num_discards = length / 16;
+ while (num_discards > 0) {
+ uint64_t offset = get_unaligned_be64(&tmpbuf[0]);
+ offset = offset << cmd->dev->blk_shift;
+
+ uint32_t tl = get_unaligned_be32(&tmpbuf[8]);
+ tl = tl << cmd->dev->blk_shift;
+
+ if (offset + tl > cmd->dev->size) {
+ eprintf("UNMAP beyond EOF\n");
+ cmd_error_sense(cmd, ILLEGAL_REQUEST, ASC_LBA_OUT_OF_RANGE);
+ return 0;
+ }
+
+ if (tl > 0) {
+ dprintf("unmap offset %lu length %u\n", offset, tl);
+
+ switch (info->unmap_mode) {
+ case UNMAP_MODE_FALLOCATE:
+#ifdef FALLOC_FL_PUNCH_HOLE
+ while (info->npending >= info->iodepth) {
+ bs_io_uring_get_completions_helper(info);
+ }
+ struct io_uring_sqe *sqe;
+ sqe = io_uring_get_sqe(&info->ring);
+ if (!sqe) {
+ return -1;
+ }
+ io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE);
+ if (num_discards == 1) {
+ io_uring_sqe_set_data(sqe, cmd);
+ } else {
+ io_uring_sqe_set_data(sqe, NULL);
+ sqe->flags |= IOSQE_IO_LINK;
+ }
+ dprintf("sending fallocate o: %lu l %u\n", offset, tl);
+ io_uring_prep_fallocate(sqe, 0, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, offset, tl);
+ io_uring_submit(&info->ring);
+ info->npending++;
+ set_cmd_async(cmd);
+#endif
+ break;
+ case UNMAP_MODE_BLKDISCARD:
+#ifdef BLKDISCARD
+ // We have to send a sync request here to use ioctl
+ uint64_t range[] = { offset, tl };
+ dprintf("sending BLKDISCARD o: %lu l: %lu\n", range[0], range[1]);
+ int ret = ioctl(cmd->dev->fd, BLKDISCARD, &range);
+ if (ret) {
+ eprintf("BLKDISCARD got code %d %s\n", ret, strerror(-ret));
+ cmd_error_sense(cmd, HARDWARE_ERROR, ASC_INTERNAL_TGT_FAILURE);
+ return ret;
+ }
+#endif
+ break;
+ default:
+ eprintf("Ignoring UNMAP request\n");
+ break;
+ }
+ }
+
+ length -= 16;
+ tmpbuf += 16;
+ num_discards -= 1;
+ }
+
+ return 0;
+}
+
+static int bs_io_uring_cmd_submit(struct scsi_cmd *cmd)
+{
+ struct scsi_lu *lu = cmd->dev;
+ struct bs_io_uring_info *info = BS_IO_URING_I(lu);
+ unsigned int scsi_op = (unsigned int)cmd->scb[0];
+ int ret;
+
+ while (info->npending >= info->iodepth) {
+ bs_io_uring_get_completions_helper(info);
+ }
+
+ switch (scsi_op) {
+ case WRITE_6:
+ case WRITE_10:
+ case WRITE_12:
+ case WRITE_16:
+ ret = queue_write(info, cmd);
+
+ // dprintf("write offset: %lx\n", cmd->offset);
+ break;
+
+ case READ_6:
+ case READ_10:
+ case READ_12:
+ case READ_16:
+ ret = queue_read(info, cmd);
+
+ // dprintf("read offset: %lx\n", cmd->offset);
+ break;
+ case SYNCHRONIZE_CACHE:
+ case SYNCHRONIZE_CACHE_16:
+ if (cmd->scb[1] & 0x2) {
+ cmd_error_sense(cmd, ILLEGAL_REQUEST, ASC_INVALID_FIELD_IN_CDB);
+ ret = -1;
+ } else {
+ ret = queue_sync(info, cmd);
+ }
+ break;
+ case UNMAP:
+ if (!cmd->dev->attrs.thinprovisioning) {
+ cmd_error_sense(cmd, ILLEGAL_REQUEST, ASC_INVALID_FIELD_IN_CDB);
+ ret = -1;
+ } else {
+ ret = queue_unmap(info, cmd);
+ }
+ break;
+ case WRITE_SAME:
+ case WRITE_SAME_16:
+ dprintf("WRITE_SAME not yet supported for IO_URING backend.\n");
+ ret = -1;
+ break;
+ default:
+ dprintf("skipped cmd:%p op:%x\n", cmd, scsi_op);
+ ret = 0;
+ }
+
+ if (scsi_get_result(cmd) != SAM_STAT_GOOD) {
+ eprintf("io error %p %x %d, %m\n", cmd, cmd->scb[0], ret);
+ }
+
+ return 0;
+}
+
+static void bs_io_uring_get_completions(int fd, int events, void *data)
+{
+ struct bs_io_uring_info *info = data;
+ bs_io_uring_get_completions_helper(info);
+}
+
+static int bs_io_uring_open(struct scsi_lu *lu, char *path, int *fd, uint64_t *size)
+{
+ struct bs_io_uring_info *info = BS_IO_URING_I(lu);
+ struct io_uring_params params;
+ int ret;
+ uint32_t blksize = 0;
+
+ memset(¶ms, 0, sizeof(params));
+ params.flags |= IORING_SETUP_SQPOLL;
+ params.sq_thread_idle = 1000;
+
+ eprintf("create io_uring context for tgt:%d lun:%" PRId64 ", max iodepth:%d\n", info->lu->tgt->tid,
+ info->lu->lun, info->iodepth);
+
+ ret = io_uring_queue_init_params(info->iodepth, &info->ring, ¶ms);
+ if (ret) {
+ eprintf("failed to init io_uring queue params, %m\n");
+ return ret;
+ }
+
+ int afd = eventfd(0, O_NONBLOCK);
+ if (afd < 0) {
+ eprintf("failed to create eventfd for tgt:%d lun:%" PRId64 ", %m\n", info->lu->tgt->tid, info->lu->lun);
+ ret = afd;
+ goto close_ctx;
+ }
+ dprintf("eventfd:%d for tgt:%d lun:%" PRId64 "\n", afd, info->lu->tgt->tid, info->lu->lun);
+
+ ret = tgt_event_add(afd, EPOLLIN, bs_io_uring_get_completions, info);
+ if (ret)
+ goto close_eventfd;
+ info->evt_fd = afd;
+
+ eprintf("open %s, RW for tgt:%d lun:%" PRId64 "\n", path, info->lu->tgt->tid, info->lu->lun);
+ *fd = backed_file_open(path, O_RDWR, size, &blksize);
+ /* If we get access denied, try opening the file in readonly mode */
+ if (*fd == -1 && (errno == EACCES || errno == EROFS)) {
+ eprintf("open %s, READONLY for tgt:%d lun:%" PRId64 "\n", path, info->lu->tgt->tid, info->lu->lun);
+ *fd = backed_file_open(path, O_RDONLY, size, &blksize);
+ lu->attrs.readonly = 1;
+ }
+ if (*fd < 0) {
+ eprintf("failed to open %s, for tgt:%d lun:%" PRId64 ", %m\n", path, info->lu->tgt->tid, info->lu->lun);
+ ret = *fd;
+ goto remove_tgt_evt;
+ }
+
+ eprintf("%s opened successfully for tgt:%d lun:%" PRId64 "\n", path, info->lu->tgt->tid, info->lu->lun);
+
+ struct stat st;
+ if (fstat(*fd, &st) < 0) {
+ printf("fstat fail\n");
+ return -1;
+ }
+
+ if (S_ISREG(st.st_mode)) {
+ info->unmap_mode = UNMAP_MODE_FALLOCATE;
+ } else if (S_ISBLK(st.st_mode)) {
+ info->unmap_mode = UNMAP_MODE_BLKDISCARD;
+ } else {
+ info->unmap_mode = UNMAP_MODE_NONE;
+ }
+
+ ret = io_uring_register_files(&info->ring, fd, 1);
+ if (ret) {
+ eprintf("failed to register buffers: %s\n", strerror(-ret));
+ goto remove_tgt_evt;
+ }
+ ret = io_uring_register_eventfd(&info->ring, info->evt_fd);
+ if (ret) {
+ eprintf("failed to register eventfd: %s\n", strerror(-ret));
+ goto remove_tgt_evt;
+ }
+
+ if (!lu->attrs.no_auto_lbppbe)
+ update_lbppbe(lu, blksize);
+
+ return 0;
+
+remove_tgt_evt:
+ tgt_event_del(afd);
+close_eventfd:
+ close(afd);
+close_ctx:
+ io_uring_queue_exit(&info->ring);
+ return ret;
+}
+
+static void bs_io_uring_close(struct scsi_lu *lu)
+{
+ close(lu->fd);
+}
+
+static tgtadm_err bs_io_uring_init(struct scsi_lu *lu, char *bsopts)
+{
+ struct bs_io_uring_info *info = BS_IO_URING_I(lu);
+
+ memset(info, 0, sizeof(*info));
+ info->lu = lu;
+ info->iodepth = IO_URING_MAX_IODEPTH;
+
+ return TGTADM_SUCCESS;
+}
+
+static void bs_io_uring_exit(struct scsi_lu *lu)
+{
+ struct bs_io_uring_info *info = BS_IO_URING_I(lu);
+ tgt_event_del(info->evt_fd);
+ close(info->evt_fd);
+ io_uring_queue_exit(&info->ring);
+}
+
+static struct backingstore_template io_uring_bst = {
+ .bs_name = "io_uring",
+ .bs_datasize = sizeof(struct bs_io_uring_info),
+ .bs_init = bs_io_uring_init,
+ .bs_exit = bs_io_uring_exit,
+ .bs_open = bs_io_uring_open,
+ .bs_close = bs_io_uring_close,
+ .bs_cmd_submit = bs_io_uring_cmd_submit,
+};
+
+__attribute__((constructor)) static void register_bs_module(void)
+{
+ unsigned char opcodes[] = { ALLOW_MEDIUM_REMOVAL,
+ COMPARE_AND_WRITE,
+ FORMAT_UNIT,
+ INQUIRY,
+ MAINT_PROTOCOL_IN,
+ MODE_SELECT,
+ MODE_SELECT_10,
+ MODE_SENSE,
+ MODE_SENSE_10,
+ ORWRITE_16,
+ PERSISTENT_RESERVE_IN,
+ PERSISTENT_RESERVE_OUT,
+ PRE_FETCH_10,
+ PRE_FETCH_16,
+ READ_10,
+ READ_12,
+ READ_16,
+ READ_6,
+ READ_CAPACITY,
+ RELEASE,
+ REPORT_LUNS,
+ REQUEST_SENSE,
+ RESERVE,
+ SEND_DIAGNOSTIC,
+ SERVICE_ACTION_IN,
+ START_STOP,
+ SYNCHRONIZE_CACHE,
+ SYNCHRONIZE_CACHE_16,
+ TEST_UNIT_READY,
+ UNMAP,
+ VERIFY_10,
+ VERIFY_12,
+ VERIFY_16,
+ WRITE_10,
+ WRITE_12,
+ WRITE_16,
+ WRITE_6,
+ WRITE_VERIFY,
+ WRITE_VERIFY_12,
+ WRITE_VERIFY_16 };
+ bs_create_opcode_map(&io_uring_bst, opcodes, ARRAY_SIZE(opcodes));
+ register_backingstore_template(&io_uring_bst);
+}