From: Andrey Smirnov <andrew.smirnov@gmail.com>
To: barebox@lists.infradead.org
Cc: Andrey Smirnov <andrew.smirnov@gmail.com>
Subject: [PATCH 8/8] drivers: Import a very basic NVME implementation from Linux
Date: Mon, 11 Feb 2019 22:40:22 -0800 [thread overview]
Message-ID: <20190212064022.26154-9-andrew.smirnov@gmail.com> (raw)
In-Reply-To: <20190212064022.26154-1-andrew.smirnov@gmail.com>
Import a very abridged NVME implementation from Linux kernel in order
to be able to access NVME storage attached via PCIe.
Signed-off-by: Andrey Smirnov <andrew.smirnov@gmail.com>
---
drivers/Kconfig | 1 +
drivers/Makefile | 1 +
drivers/nvme/Kconfig | 5 +
drivers/nvme/Makefile | 1 +
drivers/nvme/host/Kconfig | 11 +
drivers/nvme/host/Makefile | 9 +
drivers/nvme/host/core.c | 614 +++++++++++++++++
drivers/nvme/host/nvme.h | 148 +++++
drivers/nvme/host/pci.c | 697 ++++++++++++++++++++
include/linux/nvme.h | 1271 ++++++++++++++++++++++++++++++++++++
10 files changed, 2758 insertions(+)
create mode 100644 drivers/nvme/Kconfig
create mode 100644 drivers/nvme/Makefile
create mode 100644 drivers/nvme/host/Kconfig
create mode 100644 drivers/nvme/host/Makefile
create mode 100644 drivers/nvme/host/core.c
create mode 100644 drivers/nvme/host/nvme.h
create mode 100644 drivers/nvme/host/pci.c
create mode 100644 include/linux/nvme.h
diff --git a/drivers/Kconfig b/drivers/Kconfig
index c6c2eb14d..d6fbcbfe1 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -39,5 +39,6 @@ source "drivers/phy/Kconfig"
source "drivers/crypto/Kconfig"
source "drivers/memory/Kconfig"
source "drivers/soc/imx/Kconfig"
+source "drivers/nvme/Kconfig"
endmenu
diff --git a/drivers/Makefile b/drivers/Makefile
index 752fd6624..65fd488ce 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -39,3 +39,4 @@ obj-$(CONFIG_CRYPTO_HW) += crypto/
obj-$(CONFIG_AIODEV) += aiodev/
obj-y += memory/
obj-y += soc/imx/
+obj-y += nvme/
diff --git a/drivers/nvme/Kconfig b/drivers/nvme/Kconfig
new file mode 100644
index 000000000..27ac9654a
--- /dev/null
+++ b/drivers/nvme/Kconfig
@@ -0,0 +1,5 @@
+menu "NVME Support"
+
+source "drivers/nvme/host/Kconfig"
+
+endmenu
diff --git a/drivers/nvme/Makefile b/drivers/nvme/Makefile
new file mode 100644
index 000000000..6d7d51c80
--- /dev/null
+++ b/drivers/nvme/Makefile
@@ -0,0 +1 @@
+obj-y += host/
diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig
new file mode 100644
index 000000000..8888c8900
--- /dev/null
+++ b/drivers/nvme/host/Kconfig
@@ -0,0 +1,11 @@
+config NVME_CORE
+ bool
+
+config BLK_DEV_NVME
+ bool "NVM Express block device"
+ depends on PCI && BLOCK
+ select NVME_CORE
+ ---help---
+ The NVM Express driver is for solid state drives directly
+ connected to the PCI or PCI Express bus. If you know you
+ don't have one of these, it is safe to answer N.
diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile
new file mode 100644
index 000000000..9afbc0d2e
--- /dev/null
+++ b/drivers/nvme/host/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+
+ccflags-y += -I$(src)
+
+obj-$(CONFIG_NVME_CORE) += nvme-core.o
+obj-$(CONFIG_BLK_DEV_NVME) += nvme.o
+
+nvme-core-y := core.o
+nvme-y += pci.o
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
new file mode 100644
index 000000000..e0984708b
--- /dev/null
+++ b/drivers/nvme/host/core.c
@@ -0,0 +1,614 @@
+#include <common.h>
+
+#include "nvme.h"
+
+int __nvme_submit_sync_cmd(struct nvme_ctrl *ctrl,
+ struct nvme_command *cmd,
+ union nvme_result *result,
+ void *buffer, unsigned bufflen,
+ unsigned timeout, int qid)
+{
+ return ctrl->ops->submit_sync_cmd(ctrl, cmd, result, buffer, bufflen,
+ timeout, qid);
+}
+EXPORT_SYMBOL_GPL(__nvme_submit_sync_cmd);
+
+int nvme_submit_sync_cmd(struct nvme_ctrl *ctrl,
+ struct nvme_command *cmd,
+ void *buffer, unsigned bufflen)
+{
+ return __nvme_submit_sync_cmd(ctrl, cmd, NULL, buffer, bufflen, 0,
+ NVME_QID_ADMIN);
+}
+EXPORT_SYMBOL_GPL(nvme_sec_submit);
+
+static int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
+{
+ struct nvme_command c = { };
+ int error;
+
+ /* gcc-4.4.4 (at least) has issues with initializers and anon unions */
+ c.identify.opcode = nvme_admin_identify;
+ c.identify.cns = NVME_ID_CNS_CTRL;
+
+ *id = kmalloc(sizeof(struct nvme_id_ctrl), GFP_KERNEL);
+ if (!*id)
+ return -ENOMEM;
+
+ error = nvme_submit_sync_cmd(dev, &c, *id,
+ sizeof(struct nvme_id_ctrl));
+ if (error)
+ kfree(*id);
+
+ return error;
+}
+
+static int
+nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11,
+ void *buffer, size_t buflen, u32 *result)
+{
+ struct nvme_command c;
+ union nvme_result res;
+ int ret;
+
+ memset(&c, 0, sizeof(c));
+ c.features.opcode = nvme_admin_set_features;
+ c.features.fid = cpu_to_le32(fid);
+ c.features.dword11 = cpu_to_le32(dword11);
+
+ ret = __nvme_submit_sync_cmd(dev, &c, &res, buffer, buflen, 0,
+ NVME_QID_ADMIN);
+ if (ret >= 0 && result)
+ *result = le32_to_cpu(res.u32);
+ return ret;
+}
+
+int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count)
+{
+ u32 q_count = (*count - 1) | ((*count - 1) << 16);
+ u32 result;
+ int status, nr_io_queues;
+
+ status = nvme_set_features(ctrl, NVME_FEAT_NUM_QUEUES, q_count, NULL, 0,
+ &result);
+ if (status < 0)
+ return status;
+
+ /*
+ * Degraded controllers might return an error when setting the queue
+ * count. We still want to be able to bring them online and offer
+ * access to the admin queue, as that might be only way to fix them up.
+ */
+ if (status > 0) {
+ dev_err(ctrl->dev, "Could not set queue count (%d)\n", status);
+ *count = 0;
+ } else {
+ nr_io_queues = min(result & 0xffff, result >> 16) + 1;
+ *count = min(*count, nr_io_queues);
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nvme_set_queue_count);
+
+static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled)
+{
+ uint64_t start = get_time_ns();
+ unsigned long timeout =
+ ((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2);
+ u32 csts, bit = enabled ? NVME_CSTS_RDY : 0;
+ int ret;
+
+ while ((ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts)) == 0) {
+ if (csts == ~0)
+ return -ENODEV;
+ if ((csts & NVME_CSTS_RDY) == bit)
+ break;
+
+ mdelay(100);
+
+ if (is_timeout(start, timeout)) {
+ dev_err(ctrl->dev,
+ "Device not ready; aborting %s\n", enabled ?
+ "initialisation" : "reset");
+ return -ENODEV;
+ }
+ }
+
+ return ret;
+}
+
+static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *ns_list)
+{
+ struct nvme_command c = { };
+
+ c.identify.opcode = nvme_admin_identify;
+ c.identify.cns = NVME_ID_CNS_NS_ACTIVE_LIST;
+ c.identify.nsid = cpu_to_le32(nsid);
+ return nvme_submit_sync_cmd(dev, &c, ns_list, NVME_IDENTIFY_DATA_SIZE);
+}
+
+static struct nvme_id_ns *nvme_identify_ns(struct nvme_ctrl *ctrl,
+ unsigned nsid)
+{
+ struct nvme_id_ns *id;
+ struct nvme_command c = { };
+ int error;
+
+ /* gcc-4.4.4 (at least) has issues with initializers and anon unions */
+ c.identify.opcode = nvme_admin_identify;
+ c.identify.nsid = cpu_to_le32(nsid);
+ c.identify.cns = NVME_ID_CNS_NS;
+
+ id = kmalloc(sizeof(*id), GFP_KERNEL);
+ if (!id)
+ return NULL;
+
+ error = nvme_submit_sync_cmd(ctrl, &c, id, sizeof(*id));
+ if (error) {
+ dev_warn(ctrl->dev, "Identify namespace failed\n");
+ kfree(id);
+ return NULL;
+ }
+
+ return id;
+}
+
+static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
+ unsigned nsid, struct nvme_id_ns *id)
+{
+ static int instance = 1;
+ struct nvme_ns_head *head;
+ int ret = -ENOMEM;
+
+ head = kzalloc(sizeof(*head), GFP_KERNEL);
+ if (!head)
+ goto out;
+
+ head->instance = instance++;
+ head->ns_id = nsid;
+
+ return head;
+out:
+ return ERR_PTR(ret);
+}
+
+static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid,
+ struct nvme_id_ns *id)
+{
+ struct nvme_ctrl *ctrl = ns->ctrl;
+ const bool is_shared = id->nmic & (1 << 0);
+ struct nvme_ns_head *head = NULL;
+
+ if (is_shared) {
+ dev_info(ctrl->dev, "Skipping shared namespace %u\n", nsid);
+ return -ENOTSUPP;
+ }
+
+ head = nvme_alloc_ns_head(ctrl, nsid, id);
+ if (IS_ERR(head))
+ return PTR_ERR(head);
+
+ ns->head = head;
+
+ return 0;
+}
+
+#define DISK_NAME_LEN 32
+
+static void nvme_update_disk_info(struct block_device *blk, struct nvme_ns *ns,
+ struct nvme_id_ns *id)
+{
+ blk->blockbits = ns->lba_shift;
+ blk->num_blocks = le64_to_cpup(&id->nsze);
+
+ ns->readonly = id->nsattr & (1 << 0);
+}
+
+static void __nvme_revalidate_disk(struct block_device *blk,
+ struct nvme_id_ns *id)
+{
+ struct nvme_ns *ns = to_nvme_ns(blk);
+
+ /*
+ * If identify namespace failed, use default 512 byte block size so
+ * block layer can use before failing read/write for 0 capacity.
+ */
+ ns->lba_shift = id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ds;
+ if (ns->lba_shift == 0)
+ ns->lba_shift = 9;
+
+ nvme_update_disk_info(blk, ns, id);
+}
+
+static void nvme_setup_rw(struct nvme_ns *ns, struct nvme_command *cmnd,
+ int block, int num_block)
+{
+ cmnd->rw.nsid = cpu_to_le32(ns->head->ns_id);
+ cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, block));
+ cmnd->rw.length = cpu_to_le16(num_block - 1);
+ cmnd->rw.control = 0;
+ cmnd->rw.dsmgmt = 0;
+}
+
+static void nvme_setup_flush(struct nvme_ns *ns, struct nvme_command *cmnd)
+{
+ memset(cmnd, 0, sizeof(*cmnd));
+ cmnd->common.opcode = nvme_cmd_flush;
+ cmnd->common.nsid = cpu_to_le32(ns->head->ns_id);
+}
+
+static int nvme_submit_sync_rw(struct nvme_ns *ns, struct nvme_command *cmnd,
+ void *buffer, int block, int num_blocks)
+{
+ /*
+ * ns->ctrl->max_hw_sectors is in units of 512 bytes, so we
+ * need to make sure we adjust it to discovered lba_shift
+ */
+ const u32 max_hw_sectors =
+ ns->ctrl->max_hw_sectors >> (ns->lba_shift - 9);
+ int ret;
+
+ if (num_blocks > max_hw_sectors) {
+ while (num_blocks) {
+ const int chunk = min_t(int, num_blocks,
+ max_hw_sectors);
+
+ ret = nvme_submit_sync_rw(ns, cmnd, buffer, block,
+ chunk);
+ if (ret)
+ break;
+
+ num_blocks -= chunk;
+ buffer += chunk;
+ block += chunk;
+ }
+
+ return ret;
+ }
+
+ nvme_setup_rw(ns, cmnd, block, num_blocks);
+
+ ret = __nvme_submit_sync_cmd(ns->ctrl, cmnd, NULL, buffer,
+ num_blocks << ns->lba_shift,
+ 0, NVME_QID_IO);
+
+ if (ret) {
+ dev_err(ns->ctrl->dev,
+ "I/O failed: block: %d, num blocks: %d, status code type: %xh, status code %02xh\n",
+ block, num_blocks, (ret >> 8) & 0xf,
+ ret & 0xff);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+
+static int nvme_block_device_read(struct block_device *blk, void *buffer,
+ int block, int num_blocks)
+{
+ struct nvme_ns *ns = to_nvme_ns(blk);
+ struct nvme_command cmnd = { };
+
+ cmnd.rw.opcode = nvme_cmd_read;
+
+ return nvme_submit_sync_rw(ns, &cmnd, buffer, block, num_blocks);
+}
+
+static int __maybe_unused
+nvme_block_device_write(struct block_device *blk, const void *buffer,
+ int block, int num_blocks)
+{
+ struct nvme_ns *ns = to_nvme_ns(blk);
+ struct nvme_command cmnd = { };
+
+ if (ns->readonly)
+ return -EINVAL;
+
+ cmnd.rw.opcode = nvme_cmd_write;
+
+ return nvme_submit_sync_rw(ns, &cmnd, (void *)buffer, block,
+ num_blocks);
+}
+
+static int __maybe_unused nvme_block_device_flush(struct block_device *blk)
+{
+ struct nvme_ns *ns = to_nvme_ns(blk);
+ struct nvme_command cmnd = { };
+
+ nvme_setup_flush(ns, &cmnd);
+
+ return __nvme_submit_sync_cmd(ns->ctrl, &cmnd, NULL, NULL,
+ 0, 0, NVME_QID_IO);
+}
+
+static struct block_device_ops nvme_block_device_ops = {
+ .read = nvme_block_device_read,
+#ifdef CONFIG_BLOCK_WRITE
+ .write = nvme_block_device_write,
+ .flush = nvme_block_device_flush,
+#endif
+};
+
+static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
+{
+ struct nvme_ns *ns;
+ struct nvme_id_ns *id;
+ char disk_name[DISK_NAME_LEN];
+ int ret, flags;
+
+ ns = kzalloc(sizeof(*ns), GFP_KERNEL);
+ if (!ns)
+ return;
+
+ ns->ctrl = ctrl;
+ ns->lba_shift = 9; /* set to a default value for 512 until
+ * disk is validated */
+
+ id = nvme_identify_ns(ctrl, nsid);
+ if (!id)
+ goto out_free_ns;
+
+ if (id->ncap == 0)
+ goto out_free_id;
+
+ if (nvme_init_ns_head(ns, nsid, id))
+ goto out_free_id;
+
+ nvme_set_disk_name(disk_name, ns, ctrl, &flags);
+
+ ns->blk.dev = ctrl->dev;
+ ns->blk.ops = &nvme_block_device_ops;
+ ns->blk.cdev.name = strdup(disk_name);
+
+ __nvme_revalidate_disk(&ns->blk, id);
+ kfree(id);
+
+ ret = blockdevice_register(&ns->blk);
+ if (ret) {
+ dev_err(ctrl->dev, "Cannot register block device (%d)\n", ret);
+ goto out_free_id;
+ }
+
+ return;
+out_free_id:
+ kfree(id);
+out_free_ns:
+ kfree(ns);
+}
+
+static int nvme_scan_ns_list(struct nvme_ctrl *ctrl, unsigned nn)
+{
+ __le32 *ns_list;
+ unsigned i, j, nsid, prev = 0, num_lists = DIV_ROUND_UP(nn, 1024);
+ int ret = 0;
+
+ ns_list = kzalloc(NVME_IDENTIFY_DATA_SIZE, GFP_KERNEL);
+ if (!ns_list)
+ return -ENOMEM;
+
+ for (i = 0; i < num_lists; i++) {
+ ret = nvme_identify_ns_list(ctrl, prev, ns_list);
+ if (ret)
+ goto out;
+
+ for (j = 0; j < min(nn, 1024U); j++) {
+ nsid = le32_to_cpu(ns_list[j]);
+ if (!nsid)
+ goto out;
+
+ nvme_alloc_ns(ctrl, nsid);
+ }
+ nn -= j;
+ }
+ out:
+ kfree(ns_list);
+ return ret;
+}
+
+static void nvme_scan_ns_sequential(struct nvme_ctrl *ctrl, unsigned nn)
+{
+ unsigned i;
+
+ for (i = 1; i <= nn; i++)
+ nvme_alloc_ns(ctrl, i);
+}
+
+static void nvme_scan_work(struct nvme_ctrl *ctrl)
+{
+ struct nvme_id_ctrl *id;
+ unsigned nn;
+
+ if (nvme_identify_ctrl(ctrl, &id))
+ return;
+
+ nn = le32_to_cpu(id->nn);
+ if (ctrl->vs >= NVME_VS(1, 1, 0)) {
+ if (!nvme_scan_ns_list(ctrl, nn))
+ goto out_free_id;
+ }
+ nvme_scan_ns_sequential(ctrl, nn);
+out_free_id:
+ kfree(id);
+}
+
+void nvme_start_ctrl(struct nvme_ctrl *ctrl)
+{
+ if (ctrl->queue_count > 1)
+ nvme_scan_work(ctrl);
+}
+EXPORT_SYMBOL_GPL(nvme_start_ctrl);
+
+/*
+ * If the device has been passed off to us in an enabled state, just clear
+ * the enabled bit. The spec says we should set the 'shutdown notification
+ * bits', but doing so may cause the device to complete commands to the
+ * admin queue ... and we don't know what memory that might be pointing at!
+ */
+int nvme_disable_ctrl(struct nvme_ctrl *ctrl, u64 cap)
+{
+ int ret;
+
+ ctrl->ctrl_config &= ~NVME_CC_SHN_MASK;
+ ctrl->ctrl_config &= ~NVME_CC_ENABLE;
+
+ ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
+ if (ret)
+ return ret;
+
+ return nvme_wait_ready(ctrl, cap, false);
+}
+EXPORT_SYMBOL_GPL(nvme_disable_ctrl);
+
+int nvme_enable_ctrl(struct nvme_ctrl *ctrl, u64 cap)
+{
+ /*
+ * Default to a 4K page size, with the intention to update this
+ * path in the future to accomodate architectures with differing
+ * kernel and IO page sizes.
+ */
+ unsigned dev_page_min = NVME_CAP_MPSMIN(cap) + 12, page_shift = 12;
+ int ret;
+
+ if (page_shift < dev_page_min) {
+ dev_err(ctrl->dev,
+ "Minimum device page size %u too large for host (%u)\n",
+ 1 << dev_page_min, 1 << page_shift);
+ return -ENODEV;
+ }
+
+ ctrl->page_size = 1 << page_shift;
+
+ ctrl->ctrl_config = NVME_CC_CSS_NVM;
+ ctrl->ctrl_config |= (page_shift - 12) << NVME_CC_MPS_SHIFT;
+ ctrl->ctrl_config |= NVME_CC_AMS_RR | NVME_CC_SHN_NONE;
+ ctrl->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES;
+ ctrl->ctrl_config |= NVME_CC_ENABLE;
+
+ ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
+ if (ret)
+ return ret;
+ return nvme_wait_ready(ctrl, cap, true);
+}
+EXPORT_SYMBOL_GPL(nvme_enable_ctrl);
+
+int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl)
+{
+ uint64_t start = get_time_ns();
+ unsigned long timeout = SHUTDOWN_TIMEOUT;
+ u32 csts;
+ int ret;
+
+ ctrl->ctrl_config &= ~NVME_CC_SHN_MASK;
+ ctrl->ctrl_config |= NVME_CC_SHN_NORMAL;
+
+ ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
+ if (ret)
+ return ret;
+
+ while ((ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts)) == 0) {
+ if ((csts & NVME_CSTS_SHST_MASK) == NVME_CSTS_SHST_CMPLT)
+ break;
+
+ mdelay(100);
+
+ if (is_timeout(start, timeout)) {
+ dev_err(ctrl->dev,
+ "Device shutdown incomplete; abort shutdown\n");
+ return -ENODEV;
+ }
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(nvme_shutdown_ctrl);
+
+#define NVME_ID_MAX_LEN 41
+
+static void nvme_print(struct nvme_ctrl *ctrl, const char *prefix,
+ const char *_string, size_t _length)
+{
+ char string[NVME_ID_MAX_LEN];
+ const size_t length = min(_length, sizeof(string) - 1);
+
+ memcpy(string, _string, length);
+ string[length - 1] = '\0';
+
+ dev_info(ctrl->dev, "%s: %s\n", prefix, string);
+}
+
+static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
+{
+ nvme_print(ctrl, "serial", id->sn, sizeof(id->sn));
+ nvme_print(ctrl, "model", id->mn, sizeof(id->mn));
+ nvme_print(ctrl, "firmware", id->fr, sizeof(id->fr));
+
+ return 0;
+}
+
+/*
+ * Initialize the cached copies of the Identify data and various controller
+ * register in our nvme_ctrl structure. This should be called as soon as
+ * the admin queue is fully up and running.
+ */
+int nvme_init_identify(struct nvme_ctrl *ctrl)
+{
+ struct nvme_id_ctrl *id;
+ u64 cap;
+ int ret, page_shift;
+ u32 max_hw_sectors;
+
+ ret = ctrl->ops->reg_read32(ctrl, NVME_REG_VS, &ctrl->vs);
+ if (ret) {
+ dev_err(ctrl->dev, "Reading VS failed (%d)\n", ret);
+ return ret;
+ }
+
+ ret = ctrl->ops->reg_read64(ctrl, NVME_REG_CAP, &cap);
+ if (ret) {
+ dev_err(ctrl->dev, "Reading CAP failed (%d)\n", ret);
+ return ret;
+ }
+ page_shift = NVME_CAP_MPSMIN(cap) + 12;
+
+ ret = nvme_identify_ctrl(ctrl, &id);
+ if (ret) {
+ dev_err(ctrl->dev, "Identify Controller failed (%d)\n", ret);
+ return -EIO;
+ }
+
+ ret = nvme_init_subsystem(ctrl, id);
+ if (ret)
+ return ret;
+
+ if (id->mdts)
+ max_hw_sectors = 1 << (id->mdts + page_shift - 9);
+ else
+ max_hw_sectors = UINT_MAX;
+ ctrl->max_hw_sectors =
+ min_not_zero(ctrl->max_hw_sectors, max_hw_sectors);
+
+ kfree(id);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nvme_init_identify);
+
+
+/*
+ * Initialize a NVMe controller structures. This needs to be called during
+ * earliest initialization so that we have the initialized structured around
+ * during probing.
+ */
+int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device_d *dev,
+ const struct nvme_ctrl_ops *ops)
+{
+ static int instance = 0;
+
+ ctrl->dev = dev;
+ ctrl->ops = ops;
+ ctrl->instance = instance++;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nvme_init_ctrl);
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
new file mode 100644
index 000000000..4ec4aef97
--- /dev/null
+++ b/drivers/nvme/host/nvme.h
@@ -0,0 +1,148 @@
+/*
+ * Copyright (c) 2011-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _NVME_H
+#define _NVME_H
+
+#include <linux/nvme.h>
+#include <dma.h>
+#include <block.h>
+
+#define ADMIN_TIMEOUT (60 * HZ)
+#define SHUTDOWN_TIMEOUT ( 5 * HZ)
+
+/*
+ * Common request structure for NVMe passthrough. All drivers must have
+ * this structure as the first member of their request-private data.
+ */
+struct nvme_request {
+ struct nvme_command *cmd;
+ union nvme_result result;
+ u16 status;
+
+ void *buffer;
+ unsigned int buffer_len;
+ dma_addr_t buffer_dma_addr;
+ enum dma_data_direction dma_dir;
+};
+
+struct nvme_ctrl {
+ const struct nvme_ctrl_ops *ops;
+ struct device_d *dev;
+ int instance;
+
+ u32 ctrl_config;
+ u32 queue_count;
+ u64 cap;
+ u32 page_size;
+ u32 max_hw_sectors;
+ u32 vs;
+};
+
+/*
+ * Anchor structure for namespaces. There is one for each namespace in a
+ * NVMe subsystem that any of our controllers can see, and the namespace
+ * structure for each controller is chained of it. For private namespaces
+ * there is a 1:1 relation to our namespace structures, that is ->list
+ * only ever has a single entry for private namespaces.
+ */
+struct nvme_ns_head {
+ unsigned ns_id;
+ int instance;
+};
+
+struct nvme_ns {
+ struct nvme_ctrl *ctrl;
+ struct nvme_ns_head *head;
+ struct block_device blk;
+
+ int lba_shift;
+ bool readonly;
+};
+
+static inline struct nvme_ns *to_nvme_ns(struct block_device *blk)
+{
+ return container_of(blk, struct nvme_ns, blk);
+}
+
+struct nvme_ctrl_ops {
+ int (*reg_read32)(struct nvme_ctrl *ctrl, u32 off, u32 *val);
+ int (*reg_write32)(struct nvme_ctrl *ctrl, u32 off, u32 val);
+ int (*reg_read64)(struct nvme_ctrl *ctrl, u32 off, u64 *val);
+
+ int (*submit_sync_cmd)(struct nvme_ctrl *ctrl,
+ struct nvme_command *cmd,
+ union nvme_result *result,
+ void *buffer,
+ unsigned bufflen,
+ unsigned timeout, int qid);
+};
+
+static inline bool nvme_ctrl_ready(struct nvme_ctrl *ctrl)
+{
+ u32 val = 0;
+
+ if (ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &val))
+ return false;
+ return val & NVME_CSTS_RDY;
+}
+
+static inline u64 nvme_block_nr(struct nvme_ns *ns, sector_t sector)
+{
+ return (sector >> (ns->lba_shift - 9));
+}
+
+static inline void nvme_end_request(struct nvme_request *rq, __le16 status,
+ union nvme_result result)
+{
+ rq->status = le16_to_cpu(status) >> 1;
+ rq->result = result;
+}
+
+int nvme_disable_ctrl(struct nvme_ctrl *ctrl, u64 cap);
+int nvme_enable_ctrl(struct nvme_ctrl *ctrl, u64 cap);
+int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl);
+int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device_d *dev,
+ const struct nvme_ctrl_ops *ops);
+void nvme_start_ctrl(struct nvme_ctrl *ctrl);
+int nvme_init_identify(struct nvme_ctrl *ctrl);
+
+enum nvme_queue_id {
+ NVME_QID_ADMIN,
+ NVME_QID_IO,
+ NVME_QID_NUM,
+ NVME_QID_ANY = -1,
+};
+
+int __nvme_submit_sync_cmd(struct nvme_ctrl *ctrl,
+ struct nvme_command *cmd,
+ union nvme_result *result,
+ void *buffer, unsigned bufflen,
+ unsigned timeout, int qid);
+int nvme_submit_sync_cmd(struct nvme_ctrl *ctrl,
+ struct nvme_command *cmd,
+ void *buffer, unsigned bufflen);
+
+
+int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count);
+/*
+ * Without the multipath code enabled, multiple controller per subsystems are
+ * visible as devices and thus we cannot use the subsystem instance.
+ */
+static inline void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
+ struct nvme_ctrl *ctrl, int *flags)
+{
+ sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->head->instance);
+}
+
+#endif /* _NVME_H */
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
new file mode 100644
index 000000000..387bc45a7
--- /dev/null
+++ b/drivers/nvme/host/pci.c
@@ -0,0 +1,697 @@
+
+#include <common.h>
+#include <init.h>
+#include <io.h>
+#include <io-64-nonatomic-lo-hi.h>
+#include <linux/pci.h>
+
+#include <dma.h>
+
+#include "nvme.h"
+
+#define SQ_SIZE(depth) (depth * sizeof(struct nvme_command))
+#define CQ_SIZE(depth) (depth * sizeof(struct nvme_completion))
+
+#define NVME_MAX_KB_SZ 4096
+
+static int io_queue_depth = 2;
+
+struct nvme_dev;
+
+/*
+ * An NVM Express queue. Each device has at least two (one for admin
+ * commands and one for I/O commands).
+ */
+struct nvme_queue {
+ struct nvme_dev *dev;
+ struct nvme_request *req;
+ struct nvme_command *sq_cmds;
+ volatile struct nvme_completion *cqes;
+ dma_addr_t sq_dma_addr;
+ dma_addr_t cq_dma_addr;
+ u32 __iomem *q_db;
+ u16 q_depth;
+ u16 sq_tail;
+ u16 cq_head;
+ u16 qid;
+ u8 cq_phase;
+
+ u16 counter;
+};
+
+/*
+ * Represents an NVM Express device. Each nvme_dev is a PCI function.
+ */
+struct nvme_dev {
+ struct nvme_queue queues[NVME_QID_NUM];
+ u32 __iomem *dbs;
+ struct device_d *dev;
+ unsigned online_queues;
+ unsigned max_qid;
+ int q_depth;
+ u32 db_stride;
+ void __iomem *bar;
+ bool subsystem;
+ struct nvme_ctrl ctrl;
+ __le64 *prp_pool;
+ unsigned int prp_pool_size;
+ dma_addr_t prp_dma;
+};
+
+static inline struct nvme_dev *to_nvme_dev(struct nvme_ctrl *ctrl)
+{
+ return container_of(ctrl, struct nvme_dev, ctrl);
+}
+
+static int nvme_pci_setup_prps(struct nvme_dev *dev,
+ const struct nvme_request *req,
+ struct nvme_rw_command *cmnd)
+{
+ int length = req->buffer_len;
+ const int page_size = dev->ctrl.page_size;
+ dma_addr_t dma_addr = req->buffer_dma_addr;
+ u32 offset = dma_addr & (page_size - 1);
+ u64 prp1 = dma_addr;
+ __le64 *prp_list;
+ int i, nprps;
+ dma_addr_t prp_dma;
+
+
+ length -= (page_size - offset);
+ if (length <= 0) {
+ prp_dma = 0;
+ goto done;
+ }
+
+ dma_addr += (page_size - offset);
+
+ if (length <= page_size) {
+ prp_dma = dma_addr;
+ goto done;
+ }
+
+ nprps = DIV_ROUND_UP(length, page_size);
+ if (nprps > dev->prp_pool_size) {
+ dma_free_coherent(dev->prp_pool, dev->prp_dma,
+ dev->prp_pool_size * sizeof(u64));
+ dev->prp_pool_size = nprps;
+ dev->prp_pool = dma_alloc_coherent(nprps * sizeof(u64),
+ &dev->prp_dma);
+ }
+
+ prp_list = dev->prp_pool;
+ prp_dma = dev->prp_dma;
+
+ i = 0;
+ for (;;) {
+ if (i == page_size >> 3) {
+ __le64 *old_prp_list = prp_list;
+ prp_list = &prp_list[i];
+ prp_dma += page_size;
+ prp_list[0] = old_prp_list[i - 1];
+ old_prp_list[i - 1] = cpu_to_le64(prp_dma);
+ i = 1;
+ }
+
+ prp_list[i++] = cpu_to_le64(dma_addr);
+ dma_addr += page_size;
+ length -= page_size;
+ if (length <= 0)
+ break;
+ }
+
+done:
+ cmnd->dptr.prp1 = cpu_to_le64(prp1);
+ cmnd->dptr.prp2 = cpu_to_le64(prp_dma);
+
+ return 0;
+}
+
+static int nvme_map_data(struct nvme_dev *dev, struct nvme_request *req)
+{
+ if (!req->buffer || !req->buffer_len)
+ return 0;
+
+ req->buffer_dma_addr = dma_map_single(dev->dev, req->buffer,
+ req->buffer_len, req->dma_dir);
+ if (dma_mapping_error(dev->dev, req->buffer_dma_addr))
+ return -EFAULT;
+
+ return nvme_pci_setup_prps(dev, req, &req->cmd->rw);
+}
+
+static void nvme_unmap_data(struct nvme_dev *dev, struct nvme_request *req)
+{
+ if (!req->buffer || !req->buffer_len)
+ return;
+
+ dma_unmap_single(dev->dev, req->buffer_dma_addr, req->buffer_len,
+ req->dma_dir);
+}
+
+static int nvme_alloc_queue(struct nvme_dev *dev, int qid, int depth)
+{
+ struct nvme_queue *nvmeq = &dev->queues[qid];
+
+ if (dev->ctrl.queue_count > qid)
+ return 0;
+
+ nvmeq->cqes = dma_alloc_coherent(CQ_SIZE(depth),
+ &nvmeq->cq_dma_addr);
+ if (!nvmeq->cqes)
+ goto free_nvmeq;
+
+ nvmeq->sq_cmds = dma_alloc_coherent(SQ_SIZE(depth),
+ &nvmeq->sq_dma_addr);
+ if (!nvmeq->sq_cmds)
+ goto free_cqdma;
+
+ nvmeq->dev = dev;
+ nvmeq->cq_head = 0;
+ nvmeq->cq_phase = 1;
+ nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
+ nvmeq->q_depth = depth;
+ nvmeq->qid = qid;
+ dev->ctrl.queue_count++;
+
+ return 0;
+
+ free_cqdma:
+ dma_free_coherent((void *)nvmeq->cqes, nvmeq->cq_dma_addr,
+ CQ_SIZE(depth));
+ free_nvmeq:
+ return -ENOMEM;
+}
+
+static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
+{
+ struct nvme_command c;
+
+ memset(&c, 0, sizeof(c));
+ c.delete_queue.opcode = opcode;
+ c.delete_queue.qid = cpu_to_le16(id);
+
+ return nvme_submit_sync_cmd(&dev->ctrl, &c, NULL, 0);
+}
+
+static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid,
+ struct nvme_queue *nvmeq, s16 vector)
+{
+ struct nvme_command c;
+ int flags = NVME_QUEUE_PHYS_CONTIG | NVME_CQ_IRQ_ENABLED;
+
+ /*
+ * Note: we (ab)use the fact that the prp fields survive if no data
+ * is attached to the request.
+ */
+ memset(&c, 0, sizeof(c));
+ c.create_cq.opcode = nvme_admin_create_cq;
+ c.create_cq.prp1 = cpu_to_le64(nvmeq->cq_dma_addr);
+ c.create_cq.cqid = cpu_to_le16(qid);
+ c.create_cq.qsize = cpu_to_le16(nvmeq->q_depth - 1);
+ c.create_cq.cq_flags = cpu_to_le16(flags);
+ c.create_cq.irq_vector = cpu_to_le16(vector);
+
+ return nvme_submit_sync_cmd(&dev->ctrl, &c, NULL, 0);
+}
+
+static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid,
+ struct nvme_queue *nvmeq)
+{
+ struct nvme_command c;
+ int flags = NVME_QUEUE_PHYS_CONTIG;
+
+ /*
+ * Note: we (ab)use the fact that the prp fields survive if no data
+ * is attached to the request.
+ */
+ memset(&c, 0, sizeof(c));
+ c.create_sq.opcode = nvme_admin_create_sq;
+ c.create_sq.prp1 = cpu_to_le64(nvmeq->sq_dma_addr);
+ c.create_sq.sqid = cpu_to_le16(qid);
+ c.create_sq.qsize = cpu_to_le16(nvmeq->q_depth - 1);
+ c.create_sq.sq_flags = cpu_to_le16(flags);
+ c.create_sq.cqid = cpu_to_le16(qid);
+
+ return nvme_submit_sync_cmd(&dev->ctrl, &c, NULL, 0);
+}
+
+static int adapter_delete_cq(struct nvme_dev *dev, u16 cqid)
+{
+ return adapter_delete_queue(dev, nvme_admin_delete_cq, cqid);
+}
+
+static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid)
+{
+ struct nvme_dev *dev = nvmeq->dev;
+
+ nvmeq->sq_tail = 0;
+ nvmeq->cq_head = 0;
+ nvmeq->cq_phase = 1;
+ nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
+ dev->online_queues++;
+}
+
+static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
+{
+ struct nvme_dev *dev = nvmeq->dev;
+ int result;
+ s16 vector;
+
+ vector = 0;
+ result = adapter_alloc_cq(dev, qid, nvmeq, vector);
+ if (result)
+ return result;
+
+ result = adapter_alloc_sq(dev, qid, nvmeq);
+ if (result < 0)
+ return result;
+ else if (result)
+ goto release_cq;
+
+ nvme_init_queue(nvmeq, qid);
+
+ return result;
+
+release_cq:
+ adapter_delete_cq(dev, qid);
+ return result;
+}
+
+/**
+ * nvme_submit_cmd() - Copy a command into a queue and ring the doorbell
+ * @nvmeq: The queue to use
+ * @cmd: The command to send
+ */
+static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd)
+{
+ memcpy(&nvmeq->sq_cmds[nvmeq->sq_tail], cmd, sizeof(*cmd));
+
+ if (++nvmeq->sq_tail == nvmeq->q_depth)
+ nvmeq->sq_tail = 0;
+ writel(nvmeq->sq_tail, nvmeq->q_db);
+}
+
+/* We read the CQE phase first to check if the rest of the entry is valid */
+static inline bool nvme_cqe_pending(struct nvme_queue *nvmeq)
+{
+ return (le16_to_cpu(nvmeq->cqes[nvmeq->cq_head].status) & 1) ==
+ nvmeq->cq_phase;
+}
+
+static inline void nvme_ring_cq_doorbell(struct nvme_queue *nvmeq)
+{
+ u16 head = nvmeq->cq_head;
+
+ writel(head, nvmeq->q_db + nvmeq->dev->db_stride);
+}
+
+static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx)
+{
+ volatile struct nvme_completion *cqe = &nvmeq->cqes[idx];
+ struct nvme_request *req = nvmeq->req;
+
+ if (unlikely(cqe->command_id >= nvmeq->q_depth)) {
+ dev_warn(nvmeq->dev->ctrl.dev,
+ "invalid id %d completed on queue %d\n",
+ cqe->command_id, le16_to_cpu(cqe->sq_id));
+ return;
+ }
+
+ if (WARN_ON(cqe->command_id != req->cmd->common.command_id))
+ return;
+
+ nvme_end_request(req, cqe->status, cqe->result);
+}
+
+static void nvme_complete_cqes(struct nvme_queue *nvmeq, u16 start, u16 end)
+{
+ while (start != end) {
+ nvme_handle_cqe(nvmeq, start);
+ if (++start == nvmeq->q_depth)
+ start = 0;
+ }
+}
+
+static inline void nvme_update_cq_head(struct nvme_queue *nvmeq)
+{
+ if (++nvmeq->cq_head == nvmeq->q_depth) {
+ nvmeq->cq_head = 0;
+ nvmeq->cq_phase = !nvmeq->cq_phase;
+ }
+}
+
+static inline bool nvme_process_cq(struct nvme_queue *nvmeq, u16 *start,
+ u16 *end, int tag)
+{
+ bool found = false;
+
+ *start = nvmeq->cq_head;
+ while (!found && nvme_cqe_pending(nvmeq)) {
+ if (nvmeq->cqes[nvmeq->cq_head].command_id == tag)
+ found = true;
+ nvme_update_cq_head(nvmeq);
+ }
+ *end = nvmeq->cq_head;
+
+ if (*start != *end)
+ nvme_ring_cq_doorbell(nvmeq);
+ return found;
+}
+
+static bool nvme_poll(struct nvme_queue *nvmeq, unsigned int tag)
+{
+ u16 start, end;
+ bool found;
+
+ if (!nvme_cqe_pending(nvmeq))
+ return false;
+
+ found = nvme_process_cq(nvmeq, &start, &end, tag);
+
+ nvme_complete_cqes(nvmeq, start, end);
+ return found;
+}
+
+static int nvme_pci_submit_sync_cmd(struct nvme_ctrl *ctrl,
+ struct nvme_command *cmd,
+ union nvme_result *result,
+ void *buffer,
+ unsigned int buffer_len,
+ unsigned timeout, int qid)
+{
+ struct nvme_dev *dev = to_nvme_dev(ctrl);
+ struct nvme_queue *nvmeq = &dev->queues[qid];
+ struct nvme_request req = { };
+ const u16 tag = nvmeq->counter++ & (nvmeq->q_depth - 1);
+ enum dma_data_direction dma_dir;
+ int ret;
+
+ switch (qid) {
+ case NVME_QID_ADMIN:
+ switch (cmd->common.opcode) {
+ case nvme_admin_create_sq:
+ case nvme_admin_create_cq:
+ case nvme_admin_delete_sq:
+ case nvme_admin_delete_cq:
+ case nvme_admin_set_features:
+ dma_dir = DMA_TO_DEVICE;
+ break;
+ case nvme_admin_identify:
+ dma_dir = DMA_FROM_DEVICE;
+ break;
+ default:
+ return -EINVAL;
+ }
+ break;
+ case NVME_QID_IO:
+ switch (cmd->rw.opcode) {
+ case nvme_cmd_write:
+ dma_dir = DMA_TO_DEVICE;
+ break;
+ case nvme_cmd_read:
+ dma_dir = DMA_FROM_DEVICE;
+ break;
+ default:
+ return -EINVAL;
+ }
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ cmd->common.command_id = tag;
+
+ timeout = timeout ?: ADMIN_TIMEOUT;
+
+ req.cmd = cmd;
+ req.buffer = buffer;
+ req.buffer_len = buffer_len;
+ req.dma_dir = dma_dir;
+
+ ret = nvme_map_data(dev, &req);
+ if (ret) {
+ dev_err(dev->dev, "Failed to map request data\n");
+ return ret;
+ }
+
+ nvme_submit_cmd(nvmeq, cmd);
+
+ nvmeq->req = &req;
+ ret = wait_on_timeout(timeout, nvme_poll(nvmeq, tag));
+ nvmeq->req = NULL;
+
+ nvme_unmap_data(dev, &req);
+
+ if (result)
+ *result = req.result;
+
+ return ret ?: req.status;
+}
+
+static int nvme_pci_configure_admin_queue(struct nvme_dev *dev)
+{
+ int result;
+ u32 aqa;
+ struct nvme_queue *nvmeq;
+
+ dev->subsystem = readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 1, 0) ?
+ NVME_CAP_NSSRC(dev->ctrl.cap) : 0;
+
+ if (dev->subsystem &&
+ (readl(dev->bar + NVME_REG_CSTS) & NVME_CSTS_NSSRO))
+ writel(NVME_CSTS_NSSRO, dev->bar + NVME_REG_CSTS);
+
+ result = nvme_disable_ctrl(&dev->ctrl, dev->ctrl.cap);
+ if (result < 0)
+ return result;
+
+ result = nvme_alloc_queue(dev, NVME_QID_ADMIN, NVME_AQ_DEPTH);
+ if (result)
+ return result;
+
+ nvmeq = &dev->queues[NVME_QID_ADMIN];
+ aqa = nvmeq->q_depth - 1;
+ aqa |= aqa << 16;
+
+ writel(aqa, dev->bar + NVME_REG_AQA);
+ writeq(nvmeq->sq_dma_addr, dev->bar + NVME_REG_ASQ);
+ writeq(nvmeq->cq_dma_addr, dev->bar + NVME_REG_ACQ);
+
+ result = nvme_enable_ctrl(&dev->ctrl, dev->ctrl.cap);
+ if (result)
+ return result;
+
+ nvme_init_queue(nvmeq, NVME_QID_ADMIN);
+
+ return result;
+}
+
+static int nvme_create_io_queues(struct nvme_dev *dev)
+{
+ unsigned i, max;
+ int ret = 0;
+
+ for (i = dev->ctrl.queue_count; i <= dev->max_qid; i++) {
+ if (nvme_alloc_queue(dev, i, dev->q_depth)) {
+ ret = -ENOMEM;
+ break;
+ }
+ }
+
+ max = min(dev->max_qid, dev->ctrl.queue_count - 1);
+ for (i = dev->online_queues; i <= max; i++) {
+ ret = nvme_create_queue(&dev->queues[i], i);
+ if (ret)
+ break;
+ }
+
+ /*
+ * Ignore failing Create SQ/CQ commands, we can continue with less
+ * than the desired amount of queues, and even a controller without
+ * I/O queues can still be used to issue admin commands. This might
+ * be useful to upgrade a buggy firmware for example.
+ */
+ return ret >= 0 ? 0 : ret;
+}
+
+static int nvme_setup_io_queues(struct nvme_dev *dev)
+{
+ int result, nr_io_queues;
+
+ nr_io_queues = NVME_QID_NUM - 1;
+ result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues);
+ if (result < 0)
+ return result;
+
+ dev->max_qid = nr_io_queues;
+
+ return nvme_create_io_queues(dev);
+}
+
+static int nvme_pci_enable(struct nvme_dev *dev)
+{
+ struct pci_dev *pdev = to_pci_dev(dev->dev);
+
+ if (pci_enable_device(pdev))
+ return -ENOMEM;
+
+ pci_set_master(pdev);
+
+ if (readl(dev->bar + NVME_REG_CSTS) == -1)
+ return -ENODEV;
+
+ dev->ctrl.cap = readq(dev->bar + NVME_REG_CAP);
+
+ dev->q_depth = min_t(int, NVME_CAP_MQES(dev->ctrl.cap) + 1,
+ io_queue_depth);
+ dev->db_stride = 1 << NVME_CAP_STRIDE(dev->ctrl.cap);
+ dev->dbs = dev->bar + 4096;
+
+ return 0;
+}
+
+static void nvme_reset_work(struct nvme_dev *dev)
+{
+ int result = -ENODEV;
+
+ result = nvme_pci_enable(dev);
+ if (result)
+ goto out;
+
+ result = nvme_pci_configure_admin_queue(dev);
+ if (result)
+ goto out;
+
+ /*
+ * Limit the max command size to prevent iod->sg allocations going
+ * over a single page.
+ */
+ dev->ctrl.max_hw_sectors = NVME_MAX_KB_SZ << 1;
+
+ result = nvme_init_identify(&dev->ctrl);
+ if (result)
+ goto out;
+
+ result = nvme_setup_io_queues(dev);
+ if (result) {
+ dev_err(dev->ctrl.dev, "IO queues not created\n");
+ goto out;
+ }
+
+ nvme_start_ctrl(&dev->ctrl);
+out:
+ return;
+}
+
+static int nvme_pci_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val)
+{
+ *val = readl(to_nvme_dev(ctrl)->bar + off);
+ return 0;
+}
+
+static int nvme_pci_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val)
+{
+ writel(val, to_nvme_dev(ctrl)->bar + off);
+ return 0;
+}
+
+static int nvme_pci_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val)
+{
+ *val = readq(to_nvme_dev(ctrl)->bar + off);
+ return 0;
+}
+
+static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
+ .reg_read32 = nvme_pci_reg_read32,
+ .reg_write32 = nvme_pci_reg_write32,
+ .reg_read64 = nvme_pci_reg_read64,
+ .submit_sync_cmd = nvme_pci_submit_sync_cmd,
+};
+
+static void nvme_dev_map(struct nvme_dev *dev)
+{
+ struct pci_dev *pdev = to_pci_dev(dev->dev);
+
+ dev->bar = pci_iomap(pdev, 0);
+}
+
+static void nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode)
+{
+ int ret;
+ ret = adapter_delete_queue(nvmeq->dev, opcode, nvmeq->qid);
+ if (ret < 0)
+ dev_err(nvmeq->dev->dev, "%s: %s\n", __func__,
+ strerror(-ret));
+ else if (ret)
+ dev_err(nvmeq->dev->dev,
+ "%s: status code type: %xh, status code %02xh\n",
+ __func__, (ret >> 8) & 0xf, ret & 0xff);
+}
+
+static void nvme_disable_io_queues(struct nvme_dev *dev)
+{
+ int i, queues = dev->online_queues - 1;
+
+ for (i = queues; i > 0; i--) {
+ nvme_delete_queue(&dev->queues[i], nvme_admin_delete_sq);
+ nvme_delete_queue(&dev->queues[i], nvme_admin_delete_cq);
+ }
+}
+
+static void nvme_disable_admin_queue(struct nvme_dev *dev)
+{
+ struct nvme_queue *nvmeq = &dev->queues[0];
+ u16 start, end;
+
+ nvme_shutdown_ctrl(&dev->ctrl);
+ nvme_process_cq(nvmeq, &start, &end, -1);
+ nvme_complete_cqes(nvmeq, start, end);
+}
+
+static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+ struct nvme_dev *dev;
+ int result;
+
+ dev = xzalloc(sizeof(*dev));
+ dev->dev = &pdev->dev;
+ pdev->dev.priv = dev;
+
+ nvme_dev_map(dev);
+ result = nvme_init_ctrl(&dev->ctrl, &pdev->dev, &nvme_pci_ctrl_ops);
+ if (result)
+ return result;
+
+ nvme_reset_work(dev);
+
+ return 0;
+}
+
+static void nvme_remove(struct pci_dev *pdev)
+{
+ struct nvme_dev *dev = pdev->dev.priv;
+ bool dead = true;
+
+ u32 csts = readl(dev->bar + NVME_REG_CSTS);
+
+ dead = !!((csts & NVME_CSTS_CFS) || !(csts & NVME_CSTS_RDY));
+
+ if (!dead && dev->ctrl.queue_count > 0) {
+ nvme_disable_io_queues(dev);
+ nvme_disable_admin_queue(dev);
+ }
+}
+
+static const struct pci_device_id nvme_id_table[] = {
+ { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, PCI_ANY_ID) },
+ { 0, },
+};
+
+static struct pci_driver nvme_driver = {
+ .name = "nvme",
+ .id_table = nvme_id_table,
+ .probe = nvme_probe,
+ .remove = nvme_remove,
+};
+device_pci_driver(nvme_driver);
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
new file mode 100644
index 000000000..818dbe933
--- /dev/null
+++ b/include/linux/nvme.h
@@ -0,0 +1,1271 @@
+/*
+ * Definitions for the NVM Express interface
+ * Copyright (c) 2011-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _LINUX_NVME_H
+#define _LINUX_NVME_H
+
+#include <linux/types.h>
+#include <linux/uuid.h>
+
+/* NQN names in commands fields specified one size */
+#define NVMF_NQN_FIELD_LEN 256
+
+/* However the max length of a qualified name is another size */
+#define NVMF_NQN_SIZE 223
+
+#define NVMF_TRSVCID_SIZE 32
+#define NVMF_TRADDR_SIZE 256
+#define NVMF_TSAS_SIZE 256
+
+#define NVME_DISC_SUBSYS_NAME "nqn.2014-08.org.nvmexpress.discovery"
+
+#define NVME_RDMA_IP_PORT 4420
+
+#define NVME_NSID_ALL 0xffffffff
+
+enum nvme_subsys_type {
+ NVME_NQN_DISC = 1, /* Discovery type target subsystem */
+ NVME_NQN_NVME = 2, /* NVME type target subsystem */
+};
+
+/* Address Family codes for Discovery Log Page entry ADRFAM field */
+enum {
+ NVMF_ADDR_FAMILY_PCI = 0, /* PCIe */
+ NVMF_ADDR_FAMILY_IP4 = 1, /* IP4 */
+ NVMF_ADDR_FAMILY_IP6 = 2, /* IP6 */
+ NVMF_ADDR_FAMILY_IB = 3, /* InfiniBand */
+ NVMF_ADDR_FAMILY_FC = 4, /* Fibre Channel */
+};
+
+/* Transport Type codes for Discovery Log Page entry TRTYPE field */
+enum {
+ NVMF_TRTYPE_RDMA = 1, /* RDMA */
+ NVMF_TRTYPE_FC = 2, /* Fibre Channel */
+ NVMF_TRTYPE_LOOP = 254, /* Reserved for host usage */
+ NVMF_TRTYPE_MAX,
+};
+
+/* Transport Requirements codes for Discovery Log Page entry TREQ field */
+enum {
+ NVMF_TREQ_NOT_SPECIFIED = 0, /* Not specified */
+ NVMF_TREQ_REQUIRED = 1, /* Required */
+ NVMF_TREQ_NOT_REQUIRED = 2, /* Not Required */
+};
+
+/* RDMA QP Service Type codes for Discovery Log Page entry TSAS
+ * RDMA_QPTYPE field
+ */
+enum {
+ NVMF_RDMA_QPTYPE_CONNECTED = 1, /* Reliable Connected */
+ NVMF_RDMA_QPTYPE_DATAGRAM = 2, /* Reliable Datagram */
+};
+
+/* RDMA QP Service Type codes for Discovery Log Page entry TSAS
+ * RDMA_QPTYPE field
+ */
+enum {
+ NVMF_RDMA_PRTYPE_NOT_SPECIFIED = 1, /* No Provider Specified */
+ NVMF_RDMA_PRTYPE_IB = 2, /* InfiniBand */
+ NVMF_RDMA_PRTYPE_ROCE = 3, /* InfiniBand RoCE */
+ NVMF_RDMA_PRTYPE_ROCEV2 = 4, /* InfiniBand RoCEV2 */
+ NVMF_RDMA_PRTYPE_IWARP = 5, /* IWARP */
+};
+
+/* RDMA Connection Management Service Type codes for Discovery Log Page
+ * entry TSAS RDMA_CMS field
+ */
+enum {
+ NVMF_RDMA_CMS_RDMA_CM = 1, /* Sockets based endpoint addressing */
+};
+
+#define NVME_AQ_DEPTH 32
+#define NVME_NR_AEN_COMMANDS 1
+#define NVME_AQ_BLK_MQ_DEPTH (NVME_AQ_DEPTH - NVME_NR_AEN_COMMANDS)
+
+/*
+ * Subtract one to leave an empty queue entry for 'Full Queue' condition. See
+ * NVM-Express 1.2 specification, section 4.1.2.
+ */
+#define NVME_AQ_MQ_TAG_DEPTH (NVME_AQ_BLK_MQ_DEPTH - 1)
+
+enum {
+ NVME_REG_CAP = 0x0000, /* Controller Capabilities */
+ NVME_REG_VS = 0x0008, /* Version */
+ NVME_REG_INTMS = 0x000c, /* Interrupt Mask Set */
+ NVME_REG_INTMC = 0x0010, /* Interrupt Mask Clear */
+ NVME_REG_CC = 0x0014, /* Controller Configuration */
+ NVME_REG_CSTS = 0x001c, /* Controller Status */
+ NVME_REG_NSSR = 0x0020, /* NVM Subsystem Reset */
+ NVME_REG_AQA = 0x0024, /* Admin Queue Attributes */
+ NVME_REG_ASQ = 0x0028, /* Admin SQ Base Address */
+ NVME_REG_ACQ = 0x0030, /* Admin CQ Base Address */
+ NVME_REG_CMBLOC = 0x0038, /* Controller Memory Buffer Location */
+ NVME_REG_CMBSZ = 0x003c, /* Controller Memory Buffer Size */
+ NVME_REG_DBS = 0x1000, /* SQ 0 Tail Doorbell */
+};
+
+#define NVME_CAP_MQES(cap) ((cap) & 0xffff)
+#define NVME_CAP_TIMEOUT(cap) (((cap) >> 24) & 0xff)
+#define NVME_CAP_STRIDE(cap) (((cap) >> 32) & 0xf)
+#define NVME_CAP_NSSRC(cap) (((cap) >> 36) & 0x1)
+#define NVME_CAP_MPSMIN(cap) (((cap) >> 48) & 0xf)
+#define NVME_CAP_MPSMAX(cap) (((cap) >> 52) & 0xf)
+
+#define NVME_CMB_BIR(cmbloc) ((cmbloc) & 0x7)
+#define NVME_CMB_OFST(cmbloc) (((cmbloc) >> 12) & 0xfffff)
+
+enum {
+ NVME_CMBSZ_SQS = 1 << 0,
+ NVME_CMBSZ_CQS = 1 << 1,
+ NVME_CMBSZ_LISTS = 1 << 2,
+ NVME_CMBSZ_RDS = 1 << 3,
+ NVME_CMBSZ_WDS = 1 << 4,
+
+ NVME_CMBSZ_SZ_SHIFT = 12,
+ NVME_CMBSZ_SZ_MASK = 0xfffff,
+
+ NVME_CMBSZ_SZU_SHIFT = 8,
+ NVME_CMBSZ_SZU_MASK = 0xf,
+};
+
+/*
+ * Submission and Completion Queue Entry Sizes for the NVM command set.
+ * (In bytes and specified as a power of two (2^n)).
+ */
+#define NVME_NVM_IOSQES 6
+#define NVME_NVM_IOCQES 4
+
+enum {
+ NVME_CC_ENABLE = 1 << 0,
+ NVME_CC_CSS_NVM = 0 << 4,
+ NVME_CC_EN_SHIFT = 0,
+ NVME_CC_CSS_SHIFT = 4,
+ NVME_CC_MPS_SHIFT = 7,
+ NVME_CC_AMS_SHIFT = 11,
+ NVME_CC_SHN_SHIFT = 14,
+ NVME_CC_IOSQES_SHIFT = 16,
+ NVME_CC_IOCQES_SHIFT = 20,
+ NVME_CC_AMS_RR = 0 << NVME_CC_AMS_SHIFT,
+ NVME_CC_AMS_WRRU = 1 << NVME_CC_AMS_SHIFT,
+ NVME_CC_AMS_VS = 7 << NVME_CC_AMS_SHIFT,
+ NVME_CC_SHN_NONE = 0 << NVME_CC_SHN_SHIFT,
+ NVME_CC_SHN_NORMAL = 1 << NVME_CC_SHN_SHIFT,
+ NVME_CC_SHN_ABRUPT = 2 << NVME_CC_SHN_SHIFT,
+ NVME_CC_SHN_MASK = 3 << NVME_CC_SHN_SHIFT,
+ NVME_CC_IOSQES = NVME_NVM_IOSQES << NVME_CC_IOSQES_SHIFT,
+ NVME_CC_IOCQES = NVME_NVM_IOCQES << NVME_CC_IOCQES_SHIFT,
+ NVME_CSTS_RDY = 1 << 0,
+ NVME_CSTS_CFS = 1 << 1,
+ NVME_CSTS_NSSRO = 1 << 4,
+ NVME_CSTS_PP = 1 << 5,
+ NVME_CSTS_SHST_NORMAL = 0 << 2,
+ NVME_CSTS_SHST_OCCUR = 1 << 2,
+ NVME_CSTS_SHST_CMPLT = 2 << 2,
+ NVME_CSTS_SHST_MASK = 3 << 2,
+};
+
+struct nvme_id_power_state {
+ __le16 max_power; /* centiwatts */
+ __u8 rsvd2;
+ __u8 flags;
+ __le32 entry_lat; /* microseconds */
+ __le32 exit_lat; /* microseconds */
+ __u8 read_tput;
+ __u8 read_lat;
+ __u8 write_tput;
+ __u8 write_lat;
+ __le16 idle_power;
+ __u8 idle_scale;
+ __u8 rsvd19;
+ __le16 active_power;
+ __u8 active_work_scale;
+ __u8 rsvd23[9];
+};
+
+enum {
+ NVME_PS_FLAGS_MAX_POWER_SCALE = 1 << 0,
+ NVME_PS_FLAGS_NON_OP_STATE = 1 << 1,
+};
+
+struct nvme_id_ctrl {
+ __le16 vid;
+ __le16 ssvid;
+ char sn[20];
+ char mn[40];
+ char fr[8];
+ __u8 rab;
+ __u8 ieee[3];
+ __u8 cmic;
+ __u8 mdts;
+ __le16 cntlid;
+ __le32 ver;
+ __le32 rtd3r;
+ __le32 rtd3e;
+ __le32 oaes;
+ __le32 ctratt;
+ __u8 rsvd100[156];
+ __le16 oacs;
+ __u8 acl;
+ __u8 aerl;
+ __u8 frmw;
+ __u8 lpa;
+ __u8 elpe;
+ __u8 npss;
+ __u8 avscc;
+ __u8 apsta;
+ __le16 wctemp;
+ __le16 cctemp;
+ __le16 mtfa;
+ __le32 hmpre;
+ __le32 hmmin;
+ __u8 tnvmcap[16];
+ __u8 unvmcap[16];
+ __le32 rpmbs;
+ __le16 edstt;
+ __u8 dsto;
+ __u8 fwug;
+ __le16 kas;
+ __le16 hctma;
+ __le16 mntmt;
+ __le16 mxtmt;
+ __le32 sanicap;
+ __le32 hmminds;
+ __le16 hmmaxd;
+ __u8 rsvd338[4];
+ __u8 anatt;
+ __u8 anacap;
+ __le32 anagrpmax;
+ __le32 nanagrpid;
+ __u8 rsvd352[160];
+ __u8 sqes;
+ __u8 cqes;
+ __le16 maxcmd;
+ __le32 nn;
+ __le16 oncs;
+ __le16 fuses;
+ __u8 fna;
+ __u8 vwc;
+ __le16 awun;
+ __le16 awupf;
+ __u8 nvscc;
+ __u8 nwpc;
+ __le16 acwu;
+ __u8 rsvd534[2];
+ __le32 sgls;
+ __le32 mnan;
+ __u8 rsvd544[224];
+ char subnqn[256];
+ __u8 rsvd1024[768];
+ __le32 ioccsz;
+ __le32 iorcsz;
+ __le16 icdoff;
+ __u8 ctrattr;
+ __u8 msdbd;
+ __u8 rsvd1804[244];
+ struct nvme_id_power_state psd[32];
+ __u8 vs[1024];
+};
+
+enum {
+ NVME_CTRL_ONCS_COMPARE = 1 << 0,
+ NVME_CTRL_ONCS_WRITE_UNCORRECTABLE = 1 << 1,
+ NVME_CTRL_ONCS_DSM = 1 << 2,
+ NVME_CTRL_ONCS_WRITE_ZEROES = 1 << 3,
+ NVME_CTRL_ONCS_TIMESTAMP = 1 << 6,
+ NVME_CTRL_VWC_PRESENT = 1 << 0,
+ NVME_CTRL_OACS_SEC_SUPP = 1 << 0,
+ NVME_CTRL_OACS_DIRECTIVES = 1 << 5,
+ NVME_CTRL_OACS_DBBUF_SUPP = 1 << 8,
+ NVME_CTRL_LPA_CMD_EFFECTS_LOG = 1 << 1,
+};
+
+struct nvme_lbaf {
+ __le16 ms;
+ __u8 ds;
+ __u8 rp;
+};
+
+struct nvme_id_ns {
+ __le64 nsze;
+ __le64 ncap;
+ __le64 nuse;
+ __u8 nsfeat;
+ __u8 nlbaf;
+ __u8 flbas;
+ __u8 mc;
+ __u8 dpc;
+ __u8 dps;
+ __u8 nmic;
+ __u8 rescap;
+ __u8 fpi;
+ __u8 rsvd33;
+ __le16 nawun;
+ __le16 nawupf;
+ __le16 nacwu;
+ __le16 nabsn;
+ __le16 nabo;
+ __le16 nabspf;
+ __le16 noiob;
+ __u8 nvmcap[16];
+ __u8 rsvd64[28];
+ __le32 anagrpid;
+ __u8 rsvd96[3];
+ __u8 nsattr;
+ __u8 rsvd100[4];
+ __u8 nguid[16];
+ __u8 eui64[8];
+ struct nvme_lbaf lbaf[16];
+ __u8 rsvd192[192];
+ __u8 vs[3712];
+};
+
+enum {
+ NVME_ID_CNS_NS = 0x00,
+ NVME_ID_CNS_CTRL = 0x01,
+ NVME_ID_CNS_NS_ACTIVE_LIST = 0x02,
+ NVME_ID_CNS_NS_DESC_LIST = 0x03,
+ NVME_ID_CNS_NS_PRESENT_LIST = 0x10,
+ NVME_ID_CNS_NS_PRESENT = 0x11,
+ NVME_ID_CNS_CTRL_NS_LIST = 0x12,
+ NVME_ID_CNS_CTRL_LIST = 0x13,
+};
+
+enum {
+ NVME_DIR_IDENTIFY = 0x00,
+ NVME_DIR_STREAMS = 0x01,
+ NVME_DIR_SND_ID_OP_ENABLE = 0x01,
+ NVME_DIR_SND_ST_OP_REL_ID = 0x01,
+ NVME_DIR_SND_ST_OP_REL_RSC = 0x02,
+ NVME_DIR_RCV_ID_OP_PARAM = 0x01,
+ NVME_DIR_RCV_ST_OP_PARAM = 0x01,
+ NVME_DIR_RCV_ST_OP_STATUS = 0x02,
+ NVME_DIR_RCV_ST_OP_RESOURCE = 0x03,
+ NVME_DIR_ENDIR = 0x01,
+};
+
+enum {
+ NVME_NS_FEAT_THIN = 1 << 0,
+ NVME_NS_FLBAS_LBA_MASK = 0xf,
+ NVME_NS_FLBAS_META_EXT = 0x10,
+ NVME_LBAF_RP_BEST = 0,
+ NVME_LBAF_RP_BETTER = 1,
+ NVME_LBAF_RP_GOOD = 2,
+ NVME_LBAF_RP_DEGRADED = 3,
+ NVME_NS_DPC_PI_LAST = 1 << 4,
+ NVME_NS_DPC_PI_FIRST = 1 << 3,
+ NVME_NS_DPC_PI_TYPE3 = 1 << 2,
+ NVME_NS_DPC_PI_TYPE2 = 1 << 1,
+ NVME_NS_DPC_PI_TYPE1 = 1 << 0,
+ NVME_NS_DPS_PI_FIRST = 1 << 3,
+ NVME_NS_DPS_PI_MASK = 0x7,
+ NVME_NS_DPS_PI_TYPE1 = 1,
+ NVME_NS_DPS_PI_TYPE2 = 2,
+ NVME_NS_DPS_PI_TYPE3 = 3,
+};
+
+struct nvme_ns_id_desc {
+ __u8 nidt;
+ __u8 nidl;
+ __le16 reserved;
+};
+
+#define NVME_NIDT_EUI64_LEN 8
+#define NVME_NIDT_NGUID_LEN 16
+#define NVME_NIDT_UUID_LEN 16
+
+enum {
+ NVME_NIDT_EUI64 = 0x01,
+ NVME_NIDT_NGUID = 0x02,
+ NVME_NIDT_UUID = 0x03,
+};
+
+struct nvme_smart_log {
+ __u8 critical_warning;
+ __u8 temperature[2];
+ __u8 avail_spare;
+ __u8 spare_thresh;
+ __u8 percent_used;
+ __u8 rsvd6[26];
+ __u8 data_units_read[16];
+ __u8 data_units_written[16];
+ __u8 host_reads[16];
+ __u8 host_writes[16];
+ __u8 ctrl_busy_time[16];
+ __u8 power_cycles[16];
+ __u8 power_on_hours[16];
+ __u8 unsafe_shutdowns[16];
+ __u8 media_errors[16];
+ __u8 num_err_log_entries[16];
+ __le32 warning_temp_time;
+ __le32 critical_comp_time;
+ __le16 temp_sensor[8];
+ __u8 rsvd216[296];
+};
+
+struct nvme_fw_slot_info_log {
+ __u8 afi;
+ __u8 rsvd1[7];
+ __le64 frs[7];
+ __u8 rsvd64[448];
+};
+
+enum {
+ NVME_CMD_EFFECTS_CSUPP = 1 << 0,
+ NVME_CMD_EFFECTS_LBCC = 1 << 1,
+ NVME_CMD_EFFECTS_NCC = 1 << 2,
+ NVME_CMD_EFFECTS_NIC = 1 << 3,
+ NVME_CMD_EFFECTS_CCC = 1 << 4,
+ NVME_CMD_EFFECTS_CSE_MASK = 3 << 16,
+};
+
+struct nvme_effects_log {
+ __le32 acs[256];
+ __le32 iocs[256];
+ __u8 resv[2048];
+};
+
+enum nvme_ana_state {
+ NVME_ANA_OPTIMIZED = 0x01,
+ NVME_ANA_NONOPTIMIZED = 0x02,
+ NVME_ANA_INACCESSIBLE = 0x03,
+ NVME_ANA_PERSISTENT_LOSS = 0x04,
+ NVME_ANA_CHANGE = 0x0f,
+};
+
+struct nvme_ana_group_desc {
+ __le32 grpid;
+ __le32 nnsids;
+ __le64 chgcnt;
+ __u8 state;
+ __u8 rsvd17[15];
+ __le32 nsids[];
+};
+
+/* flag for the log specific field of the ANA log */
+#define NVME_ANA_LOG_RGO (1 << 0)
+
+struct nvme_ana_rsp_hdr {
+ __le64 chgcnt;
+ __le16 ngrps;
+ __le16 rsvd10[3];
+};
+
+enum {
+ NVME_SMART_CRIT_SPARE = 1 << 0,
+ NVME_SMART_CRIT_TEMPERATURE = 1 << 1,
+ NVME_SMART_CRIT_RELIABILITY = 1 << 2,
+ NVME_SMART_CRIT_MEDIA = 1 << 3,
+ NVME_SMART_CRIT_VOLATILE_MEMORY = 1 << 4,
+};
+
+enum {
+ NVME_AER_ERROR = 0,
+ NVME_AER_SMART = 1,
+ NVME_AER_NOTICE = 2,
+ NVME_AER_CSS = 6,
+ NVME_AER_VS = 7,
+};
+
+enum {
+ NVME_AER_NOTICE_NS_CHANGED = 0x00,
+ NVME_AER_NOTICE_FW_ACT_STARTING = 0x01,
+ NVME_AER_NOTICE_ANA = 0x03,
+};
+
+enum {
+ NVME_AEN_CFG_NS_ATTR = 1 << 8,
+ NVME_AEN_CFG_FW_ACT = 1 << 9,
+ NVME_AEN_CFG_ANA_CHANGE = 1 << 11,
+};
+
+struct nvme_lba_range_type {
+ __u8 type;
+ __u8 attributes;
+ __u8 rsvd2[14];
+ __u64 slba;
+ __u64 nlb;
+ __u8 guid[16];
+ __u8 rsvd48[16];
+};
+
+enum {
+ NVME_LBART_TYPE_FS = 0x01,
+ NVME_LBART_TYPE_RAID = 0x02,
+ NVME_LBART_TYPE_CACHE = 0x03,
+ NVME_LBART_TYPE_SWAP = 0x04,
+
+ NVME_LBART_ATTRIB_TEMP = 1 << 0,
+ NVME_LBART_ATTRIB_HIDE = 1 << 1,
+};
+
+struct nvme_reservation_status {
+ __le32 gen;
+ __u8 rtype;
+ __u8 regctl[2];
+ __u8 resv5[2];
+ __u8 ptpls;
+ __u8 resv10[13];
+ struct {
+ __le16 cntlid;
+ __u8 rcsts;
+ __u8 resv3[5];
+ __le64 hostid;
+ __le64 rkey;
+ } regctl_ds[];
+};
+
+enum nvme_async_event_type {
+ NVME_AER_TYPE_ERROR = 0,
+ NVME_AER_TYPE_SMART = 1,
+ NVME_AER_TYPE_NOTICE = 2,
+};
+
+/* I/O commands */
+
+enum nvme_opcode {
+ nvme_cmd_flush = 0x00,
+ nvme_cmd_write = 0x01,
+ nvme_cmd_read = 0x02,
+ nvme_cmd_write_uncor = 0x04,
+ nvme_cmd_compare = 0x05,
+ nvme_cmd_write_zeroes = 0x08,
+ nvme_cmd_dsm = 0x09,
+ nvme_cmd_resv_register = 0x0d,
+ nvme_cmd_resv_report = 0x0e,
+ nvme_cmd_resv_acquire = 0x11,
+ nvme_cmd_resv_release = 0x15,
+};
+
+/*
+ * Descriptor subtype - lower 4 bits of nvme_(keyed_)sgl_desc identifier
+ *
+ * @NVME_SGL_FMT_ADDRESS: absolute address of the data block
+ * @NVME_SGL_FMT_OFFSET: relative offset of the in-capsule data block
+ * @NVME_SGL_FMT_TRANSPORT_A: transport defined format, value 0xA
+ * @NVME_SGL_FMT_INVALIDATE: RDMA transport specific remote invalidation
+ * request subtype
+ */
+enum {
+ NVME_SGL_FMT_ADDRESS = 0x00,
+ NVME_SGL_FMT_OFFSET = 0x01,
+ NVME_SGL_FMT_TRANSPORT_A = 0x0A,
+ NVME_SGL_FMT_INVALIDATE = 0x0f,
+};
+
+/*
+ * Descriptor type - upper 4 bits of nvme_(keyed_)sgl_desc identifier
+ *
+ * For struct nvme_sgl_desc:
+ * @NVME_SGL_FMT_DATA_DESC: data block descriptor
+ * @NVME_SGL_FMT_SEG_DESC: sgl segment descriptor
+ * @NVME_SGL_FMT_LAST_SEG_DESC: last sgl segment descriptor
+ *
+ * For struct nvme_keyed_sgl_desc:
+ * @NVME_KEY_SGL_FMT_DATA_DESC: keyed data block descriptor
+ *
+ * Transport-specific SGL types:
+ * @NVME_TRANSPORT_SGL_DATA_DESC: Transport SGL data dlock descriptor
+ */
+enum {
+ NVME_SGL_FMT_DATA_DESC = 0x00,
+ NVME_SGL_FMT_SEG_DESC = 0x02,
+ NVME_SGL_FMT_LAST_SEG_DESC = 0x03,
+ NVME_KEY_SGL_FMT_DATA_DESC = 0x04,
+ NVME_TRANSPORT_SGL_DATA_DESC = 0x05,
+};
+
+struct nvme_sgl_desc {
+ __le64 addr;
+ __le32 length;
+ __u8 rsvd[3];
+ __u8 type;
+};
+
+struct nvme_keyed_sgl_desc {
+ __le64 addr;
+ __u8 length[3];
+ __u8 key[4];
+ __u8 type;
+};
+
+union nvme_data_ptr {
+ struct {
+ __le64 prp1;
+ __le64 prp2;
+ };
+ struct nvme_sgl_desc sgl;
+ struct nvme_keyed_sgl_desc ksgl;
+};
+
+/*
+ * Lowest two bits of our flags field (FUSE field in the spec):
+ *
+ * @NVME_CMD_FUSE_FIRST: Fused Operation, first command
+ * @NVME_CMD_FUSE_SECOND: Fused Operation, second command
+ *
+ * Highest two bits in our flags field (PSDT field in the spec):
+ *
+ * @NVME_CMD_PSDT_SGL_METABUF: Use SGLS for this transfer,
+ * If used, MPTR contains addr of single physical buffer (byte aligned).
+ * @NVME_CMD_PSDT_SGL_METASEG: Use SGLS for this transfer,
+ * If used, MPTR contains an address of an SGL segment containing
+ * exactly 1 SGL descriptor (qword aligned).
+ */
+enum {
+ NVME_CMD_FUSE_FIRST = (1 << 0),
+ NVME_CMD_FUSE_SECOND = (1 << 1),
+
+ NVME_CMD_SGL_METABUF = (1 << 6),
+ NVME_CMD_SGL_METASEG = (1 << 7),
+ NVME_CMD_SGL_ALL = NVME_CMD_SGL_METABUF | NVME_CMD_SGL_METASEG,
+};
+
+struct nvme_common_command {
+ __u8 opcode;
+ __u8 flags;
+ __u16 command_id;
+ __le32 nsid;
+ __le32 cdw2[2];
+ __le64 metadata;
+ union nvme_data_ptr dptr;
+ __le32 cdw10[6];
+};
+
+struct nvme_rw_command {
+ __u8 opcode;
+ __u8 flags;
+ __u16 command_id;
+ __le32 nsid;
+ __u64 rsvd2;
+ __le64 metadata;
+ union nvme_data_ptr dptr;
+ __le64 slba;
+ __le16 length;
+ __le16 control;
+ __le32 dsmgmt;
+ __le32 reftag;
+ __le16 apptag;
+ __le16 appmask;
+};
+
+enum {
+ NVME_RW_LR = 1 << 15,
+ NVME_RW_FUA = 1 << 14,
+ NVME_RW_DSM_FREQ_UNSPEC = 0,
+ NVME_RW_DSM_FREQ_TYPICAL = 1,
+ NVME_RW_DSM_FREQ_RARE = 2,
+ NVME_RW_DSM_FREQ_READS = 3,
+ NVME_RW_DSM_FREQ_WRITES = 4,
+ NVME_RW_DSM_FREQ_RW = 5,
+ NVME_RW_DSM_FREQ_ONCE = 6,
+ NVME_RW_DSM_FREQ_PREFETCH = 7,
+ NVME_RW_DSM_FREQ_TEMP = 8,
+ NVME_RW_DSM_LATENCY_NONE = 0 << 4,
+ NVME_RW_DSM_LATENCY_IDLE = 1 << 4,
+ NVME_RW_DSM_LATENCY_NORM = 2 << 4,
+ NVME_RW_DSM_LATENCY_LOW = 3 << 4,
+ NVME_RW_DSM_SEQ_REQ = 1 << 6,
+ NVME_RW_DSM_COMPRESSED = 1 << 7,
+ NVME_RW_PRINFO_PRCHK_REF = 1 << 10,
+ NVME_RW_PRINFO_PRCHK_APP = 1 << 11,
+ NVME_RW_PRINFO_PRCHK_GUARD = 1 << 12,
+ NVME_RW_PRINFO_PRACT = 1 << 13,
+ NVME_RW_DTYPE_STREAMS = 1 << 4,
+};
+
+struct nvme_dsm_cmd {
+ __u8 opcode;
+ __u8 flags;
+ __u16 command_id;
+ __le32 nsid;
+ __u64 rsvd2[2];
+ union nvme_data_ptr dptr;
+ __le32 nr;
+ __le32 attributes;
+ __u32 rsvd12[4];
+};
+
+enum {
+ NVME_DSMGMT_IDR = 1 << 0,
+ NVME_DSMGMT_IDW = 1 << 1,
+ NVME_DSMGMT_AD = 1 << 2,
+};
+
+#define NVME_DSM_MAX_RANGES 256
+
+struct nvme_dsm_range {
+ __le32 cattr;
+ __le32 nlb;
+ __le64 slba;
+};
+
+struct nvme_write_zeroes_cmd {
+ __u8 opcode;
+ __u8 flags;
+ __u16 command_id;
+ __le32 nsid;
+ __u64 rsvd2;
+ __le64 metadata;
+ union nvme_data_ptr dptr;
+ __le64 slba;
+ __le16 length;
+ __le16 control;
+ __le32 dsmgmt;
+ __le32 reftag;
+ __le16 apptag;
+ __le16 appmask;
+};
+
+/* Features */
+
+struct nvme_feat_auto_pst {
+ __le64 entries[32];
+};
+
+enum {
+ NVME_HOST_MEM_ENABLE = (1 << 0),
+ NVME_HOST_MEM_RETURN = (1 << 1),
+};
+
+/* Admin commands */
+
+enum nvme_admin_opcode {
+ nvme_admin_delete_sq = 0x00,
+ nvme_admin_create_sq = 0x01,
+ nvme_admin_get_log_page = 0x02,
+ nvme_admin_delete_cq = 0x04,
+ nvme_admin_create_cq = 0x05,
+ nvme_admin_identify = 0x06,
+ nvme_admin_abort_cmd = 0x08,
+ nvme_admin_set_features = 0x09,
+ nvme_admin_get_features = 0x0a,
+ nvme_admin_async_event = 0x0c,
+ nvme_admin_ns_mgmt = 0x0d,
+ nvme_admin_activate_fw = 0x10,
+ nvme_admin_download_fw = 0x11,
+ nvme_admin_ns_attach = 0x15,
+ nvme_admin_keep_alive = 0x18,
+ nvme_admin_directive_send = 0x19,
+ nvme_admin_directive_recv = 0x1a,
+ nvme_admin_dbbuf = 0x7C,
+ nvme_admin_format_nvm = 0x80,
+ nvme_admin_security_send = 0x81,
+ nvme_admin_security_recv = 0x82,
+ nvme_admin_sanitize_nvm = 0x84,
+};
+
+enum {
+ NVME_QUEUE_PHYS_CONTIG = (1 << 0),
+ NVME_CQ_IRQ_ENABLED = (1 << 1),
+ NVME_SQ_PRIO_URGENT = (0 << 1),
+ NVME_SQ_PRIO_HIGH = (1 << 1),
+ NVME_SQ_PRIO_MEDIUM = (2 << 1),
+ NVME_SQ_PRIO_LOW = (3 << 1),
+ NVME_FEAT_ARBITRATION = 0x01,
+ NVME_FEAT_POWER_MGMT = 0x02,
+ NVME_FEAT_LBA_RANGE = 0x03,
+ NVME_FEAT_TEMP_THRESH = 0x04,
+ NVME_FEAT_ERR_RECOVERY = 0x05,
+ NVME_FEAT_VOLATILE_WC = 0x06,
+ NVME_FEAT_NUM_QUEUES = 0x07,
+ NVME_FEAT_IRQ_COALESCE = 0x08,
+ NVME_FEAT_IRQ_CONFIG = 0x09,
+ NVME_FEAT_WRITE_ATOMIC = 0x0a,
+ NVME_FEAT_ASYNC_EVENT = 0x0b,
+ NVME_FEAT_AUTO_PST = 0x0c,
+ NVME_FEAT_HOST_MEM_BUF = 0x0d,
+ NVME_FEAT_TIMESTAMP = 0x0e,
+ NVME_FEAT_KATO = 0x0f,
+ NVME_FEAT_HCTM = 0x10,
+ NVME_FEAT_NOPSC = 0x11,
+ NVME_FEAT_RRL = 0x12,
+ NVME_FEAT_PLM_CONFIG = 0x13,
+ NVME_FEAT_PLM_WINDOW = 0x14,
+ NVME_FEAT_SW_PROGRESS = 0x80,
+ NVME_FEAT_HOST_ID = 0x81,
+ NVME_FEAT_RESV_MASK = 0x82,
+ NVME_FEAT_RESV_PERSIST = 0x83,
+ NVME_FEAT_WRITE_PROTECT = 0x84,
+ NVME_LOG_ERROR = 0x01,
+ NVME_LOG_SMART = 0x02,
+ NVME_LOG_FW_SLOT = 0x03,
+ NVME_LOG_CHANGED_NS = 0x04,
+ NVME_LOG_CMD_EFFECTS = 0x05,
+ NVME_LOG_ANA = 0x0c,
+ NVME_LOG_DISC = 0x70,
+ NVME_LOG_RESERVATION = 0x80,
+ NVME_FWACT_REPL = (0 << 3),
+ NVME_FWACT_REPL_ACTV = (1 << 3),
+ NVME_FWACT_ACTV = (2 << 3),
+};
+
+/* NVMe Namespace Write Protect State */
+enum {
+ NVME_NS_NO_WRITE_PROTECT = 0,
+ NVME_NS_WRITE_PROTECT,
+ NVME_NS_WRITE_PROTECT_POWER_CYCLE,
+ NVME_NS_WRITE_PROTECT_PERMANENT,
+};
+
+#define NVME_MAX_CHANGED_NAMESPACES 1024
+
+struct nvme_identify {
+ __u8 opcode;
+ __u8 flags;
+ __u16 command_id;
+ __le32 nsid;
+ __u64 rsvd2[2];
+ union nvme_data_ptr dptr;
+ __u8 cns;
+ __u8 rsvd3;
+ __le16 ctrlid;
+ __u32 rsvd11[5];
+};
+
+#define NVME_IDENTIFY_DATA_SIZE 4096
+
+struct nvme_features {
+ __u8 opcode;
+ __u8 flags;
+ __u16 command_id;
+ __le32 nsid;
+ __u64 rsvd2[2];
+ union nvme_data_ptr dptr;
+ __le32 fid;
+ __le32 dword11;
+ __le32 dword12;
+ __le32 dword13;
+ __le32 dword14;
+ __le32 dword15;
+};
+
+struct nvme_host_mem_buf_desc {
+ __le64 addr;
+ __le32 size;
+ __u32 rsvd;
+};
+
+struct nvme_create_cq {
+ __u8 opcode;
+ __u8 flags;
+ __u16 command_id;
+ __u32 rsvd1[5];
+ __le64 prp1;
+ __u64 rsvd8;
+ __le16 cqid;
+ __le16 qsize;
+ __le16 cq_flags;
+ __le16 irq_vector;
+ __u32 rsvd12[4];
+};
+
+struct nvme_create_sq {
+ __u8 opcode;
+ __u8 flags;
+ __u16 command_id;
+ __u32 rsvd1[5];
+ __le64 prp1;
+ __u64 rsvd8;
+ __le16 sqid;
+ __le16 qsize;
+ __le16 sq_flags;
+ __le16 cqid;
+ __u32 rsvd12[4];
+};
+
+struct nvme_delete_queue {
+ __u8 opcode;
+ __u8 flags;
+ __u16 command_id;
+ __u32 rsvd1[9];
+ __le16 qid;
+ __u16 rsvd10;
+ __u32 rsvd11[5];
+};
+
+struct nvme_abort_cmd {
+ __u8 opcode;
+ __u8 flags;
+ __u16 command_id;
+ __u32 rsvd1[9];
+ __le16 sqid;
+ __u16 cid;
+ __u32 rsvd11[5];
+};
+
+struct nvme_download_firmware {
+ __u8 opcode;
+ __u8 flags;
+ __u16 command_id;
+ __u32 rsvd1[5];
+ union nvme_data_ptr dptr;
+ __le32 numd;
+ __le32 offset;
+ __u32 rsvd12[4];
+};
+
+struct nvme_format_cmd {
+ __u8 opcode;
+ __u8 flags;
+ __u16 command_id;
+ __le32 nsid;
+ __u64 rsvd2[4];
+ __le32 cdw10;
+ __u32 rsvd11[5];
+};
+
+struct nvme_get_log_page_command {
+ __u8 opcode;
+ __u8 flags;
+ __u16 command_id;
+ __le32 nsid;
+ __u64 rsvd2[2];
+ union nvme_data_ptr dptr;
+ __u8 lid;
+ __u8 lsp; /* upper 4 bits reserved */
+ __le16 numdl;
+ __le16 numdu;
+ __u16 rsvd11;
+ __le32 lpol;
+ __le32 lpou;
+ __u32 rsvd14[2];
+};
+
+struct nvme_directive_cmd {
+ __u8 opcode;
+ __u8 flags;
+ __u16 command_id;
+ __le32 nsid;
+ __u64 rsvd2[2];
+ union nvme_data_ptr dptr;
+ __le32 numd;
+ __u8 doper;
+ __u8 dtype;
+ __le16 dspec;
+ __u8 endir;
+ __u8 tdtype;
+ __u16 rsvd15;
+
+ __u32 rsvd16[3];
+};
+
+/*
+ * Fabrics subcommands.
+ */
+enum nvmf_fabrics_opcode {
+ nvme_fabrics_command = 0x7f,
+};
+
+enum nvmf_capsule_command {
+ nvme_fabrics_type_property_set = 0x00,
+ nvme_fabrics_type_connect = 0x01,
+ nvme_fabrics_type_property_get = 0x04,
+};
+
+struct nvmf_common_command {
+ __u8 opcode;
+ __u8 resv1;
+ __u16 command_id;
+ __u8 fctype;
+ __u8 resv2[35];
+ __u8 ts[24];
+};
+
+/*
+ * The legal cntlid range a NVMe Target will provide.
+ * Note that cntlid of value 0 is considered illegal in the fabrics world.
+ * Devices based on earlier specs did not have the subsystem concept;
+ * therefore, those devices had their cntlid value set to 0 as a result.
+ */
+#define NVME_CNTLID_MIN 1
+#define NVME_CNTLID_MAX 0xffef
+#define NVME_CNTLID_DYNAMIC 0xffff
+
+#define MAX_DISC_LOGS 255
+
+/* Discovery log page entry */
+struct nvmf_disc_rsp_page_entry {
+ __u8 trtype;
+ __u8 adrfam;
+ __u8 subtype;
+ __u8 treq;
+ __le16 portid;
+ __le16 cntlid;
+ __le16 asqsz;
+ __u8 resv8[22];
+ char trsvcid[NVMF_TRSVCID_SIZE];
+ __u8 resv64[192];
+ char subnqn[NVMF_NQN_FIELD_LEN];
+ char traddr[NVMF_TRADDR_SIZE];
+ union tsas {
+ char common[NVMF_TSAS_SIZE];
+ struct rdma {
+ __u8 qptype;
+ __u8 prtype;
+ __u8 cms;
+ __u8 resv3[5];
+ __u16 pkey;
+ __u8 resv10[246];
+ } rdma;
+ } tsas;
+};
+
+/* Discovery log page header */
+struct nvmf_disc_rsp_page_hdr {
+ __le64 genctr;
+ __le64 numrec;
+ __le16 recfmt;
+ __u8 resv14[1006];
+ struct nvmf_disc_rsp_page_entry entries[0];
+};
+
+struct nvmf_connect_command {
+ __u8 opcode;
+ __u8 resv1;
+ __u16 command_id;
+ __u8 fctype;
+ __u8 resv2[19];
+ union nvme_data_ptr dptr;
+ __le16 recfmt;
+ __le16 qid;
+ __le16 sqsize;
+ __u8 cattr;
+ __u8 resv3;
+ __le32 kato;
+ __u8 resv4[12];
+};
+
+struct nvmf_connect_data {
+ uuid_t hostid;
+ __le16 cntlid;
+ char resv4[238];
+ char subsysnqn[NVMF_NQN_FIELD_LEN];
+ char hostnqn[NVMF_NQN_FIELD_LEN];
+ char resv5[256];
+};
+
+struct nvmf_property_set_command {
+ __u8 opcode;
+ __u8 resv1;
+ __u16 command_id;
+ __u8 fctype;
+ __u8 resv2[35];
+ __u8 attrib;
+ __u8 resv3[3];
+ __le32 offset;
+ __le64 value;
+ __u8 resv4[8];
+};
+
+struct nvmf_property_get_command {
+ __u8 opcode;
+ __u8 resv1;
+ __u16 command_id;
+ __u8 fctype;
+ __u8 resv2[35];
+ __u8 attrib;
+ __u8 resv3[3];
+ __le32 offset;
+ __u8 resv4[16];
+};
+
+struct nvme_dbbuf {
+ __u8 opcode;
+ __u8 flags;
+ __u16 command_id;
+ __u32 rsvd1[5];
+ __le64 prp1;
+ __le64 prp2;
+ __u32 rsvd12[6];
+};
+
+struct streams_directive_params {
+ __le16 msl;
+ __le16 nssa;
+ __le16 nsso;
+ __u8 rsvd[10];
+ __le32 sws;
+ __le16 sgs;
+ __le16 nsa;
+ __le16 nso;
+ __u8 rsvd2[6];
+};
+
+struct nvme_command {
+ union {
+ struct nvme_common_command common;
+ struct nvme_rw_command rw;
+ struct nvme_identify identify;
+ struct nvme_features features;
+ struct nvme_create_cq create_cq;
+ struct nvme_create_sq create_sq;
+ struct nvme_delete_queue delete_queue;
+ struct nvme_download_firmware dlfw;
+ struct nvme_format_cmd format;
+ struct nvme_dsm_cmd dsm;
+ struct nvme_write_zeroes_cmd write_zeroes;
+ struct nvme_abort_cmd abort;
+ struct nvme_get_log_page_command get_log_page;
+ struct nvmf_common_command fabrics;
+ struct nvmf_connect_command connect;
+ struct nvmf_property_set_command prop_set;
+ struct nvmf_property_get_command prop_get;
+ struct nvme_dbbuf dbbuf;
+ struct nvme_directive_cmd directive;
+ };
+};
+
+static inline bool nvme_is_write(struct nvme_command *cmd)
+{
+ /*
+ * What a mess...
+ *
+ * Why can't we simply have a Fabrics In and Fabrics out command?
+ */
+ if (unlikely(cmd->common.opcode == nvme_fabrics_command))
+ return cmd->fabrics.fctype & 1;
+ return cmd->common.opcode & 1;
+}
+
+enum {
+ /*
+ * Generic Command Status:
+ */
+ NVME_SC_SUCCESS = 0x0,
+ NVME_SC_INVALID_OPCODE = 0x1,
+ NVME_SC_INVALID_FIELD = 0x2,
+ NVME_SC_CMDID_CONFLICT = 0x3,
+ NVME_SC_DATA_XFER_ERROR = 0x4,
+ NVME_SC_POWER_LOSS = 0x5,
+ NVME_SC_INTERNAL = 0x6,
+ NVME_SC_ABORT_REQ = 0x7,
+ NVME_SC_ABORT_QUEUE = 0x8,
+ NVME_SC_FUSED_FAIL = 0x9,
+ NVME_SC_FUSED_MISSING = 0xa,
+ NVME_SC_INVALID_NS = 0xb,
+ NVME_SC_CMD_SEQ_ERROR = 0xc,
+ NVME_SC_SGL_INVALID_LAST = 0xd,
+ NVME_SC_SGL_INVALID_COUNT = 0xe,
+ NVME_SC_SGL_INVALID_DATA = 0xf,
+ NVME_SC_SGL_INVALID_METADATA = 0x10,
+ NVME_SC_SGL_INVALID_TYPE = 0x11,
+
+ NVME_SC_SGL_INVALID_OFFSET = 0x16,
+ NVME_SC_SGL_INVALID_SUBTYPE = 0x17,
+
+ NVME_SC_NS_WRITE_PROTECTED = 0x20,
+
+ NVME_SC_LBA_RANGE = 0x80,
+ NVME_SC_CAP_EXCEEDED = 0x81,
+ NVME_SC_NS_NOT_READY = 0x82,
+ NVME_SC_RESERVATION_CONFLICT = 0x83,
+
+ /*
+ * Command Specific Status:
+ */
+ NVME_SC_CQ_INVALID = 0x100,
+ NVME_SC_QID_INVALID = 0x101,
+ NVME_SC_QUEUE_SIZE = 0x102,
+ NVME_SC_ABORT_LIMIT = 0x103,
+ NVME_SC_ABORT_MISSING = 0x104,
+ NVME_SC_ASYNC_LIMIT = 0x105,
+ NVME_SC_FIRMWARE_SLOT = 0x106,
+ NVME_SC_FIRMWARE_IMAGE = 0x107,
+ NVME_SC_INVALID_VECTOR = 0x108,
+ NVME_SC_INVALID_LOG_PAGE = 0x109,
+ NVME_SC_INVALID_FORMAT = 0x10a,
+ NVME_SC_FW_NEEDS_CONV_RESET = 0x10b,
+ NVME_SC_INVALID_QUEUE = 0x10c,
+ NVME_SC_FEATURE_NOT_SAVEABLE = 0x10d,
+ NVME_SC_FEATURE_NOT_CHANGEABLE = 0x10e,
+ NVME_SC_FEATURE_NOT_PER_NS = 0x10f,
+ NVME_SC_FW_NEEDS_SUBSYS_RESET = 0x110,
+ NVME_SC_FW_NEEDS_RESET = 0x111,
+ NVME_SC_FW_NEEDS_MAX_TIME = 0x112,
+ NVME_SC_FW_ACIVATE_PROHIBITED = 0x113,
+ NVME_SC_OVERLAPPING_RANGE = 0x114,
+ NVME_SC_NS_INSUFFICENT_CAP = 0x115,
+ NVME_SC_NS_ID_UNAVAILABLE = 0x116,
+ NVME_SC_NS_ALREADY_ATTACHED = 0x118,
+ NVME_SC_NS_IS_PRIVATE = 0x119,
+ NVME_SC_NS_NOT_ATTACHED = 0x11a,
+ NVME_SC_THIN_PROV_NOT_SUPP = 0x11b,
+ NVME_SC_CTRL_LIST_INVALID = 0x11c,
+
+ /*
+ * I/O Command Set Specific - NVM commands:
+ */
+ NVME_SC_BAD_ATTRIBUTES = 0x180,
+ NVME_SC_INVALID_PI = 0x181,
+ NVME_SC_READ_ONLY = 0x182,
+ NVME_SC_ONCS_NOT_SUPPORTED = 0x183,
+
+ /*
+ * I/O Command Set Specific - Fabrics commands:
+ */
+ NVME_SC_CONNECT_FORMAT = 0x180,
+ NVME_SC_CONNECT_CTRL_BUSY = 0x181,
+ NVME_SC_CONNECT_INVALID_PARAM = 0x182,
+ NVME_SC_CONNECT_RESTART_DISC = 0x183,
+ NVME_SC_CONNECT_INVALID_HOST = 0x184,
+
+ NVME_SC_DISCOVERY_RESTART = 0x190,
+ NVME_SC_AUTH_REQUIRED = 0x191,
+
+ /*
+ * Media and Data Integrity Errors:
+ */
+ NVME_SC_WRITE_FAULT = 0x280,
+ NVME_SC_READ_ERROR = 0x281,
+ NVME_SC_GUARD_CHECK = 0x282,
+ NVME_SC_APPTAG_CHECK = 0x283,
+ NVME_SC_REFTAG_CHECK = 0x284,
+ NVME_SC_COMPARE_FAILED = 0x285,
+ NVME_SC_ACCESS_DENIED = 0x286,
+ NVME_SC_UNWRITTEN_BLOCK = 0x287,
+
+ /*
+ * Path-related Errors:
+ */
+ NVME_SC_ANA_PERSISTENT_LOSS = 0x301,
+ NVME_SC_ANA_INACCESSIBLE = 0x302,
+ NVME_SC_ANA_TRANSITION = 0x303,
+ NVME_SC_HOST_PATH_ERROR = 0x370,
+
+ NVME_SC_DNR = 0x4000,
+};
+
+struct nvme_completion {
+ /*
+ * Used by Admin and Fabrics commands to return data:
+ */
+ union nvme_result {
+ __le16 u16;
+ __le32 u32;
+ __le64 u64;
+ } result;
+ __le16 sq_head; /* how much of this queue may be reclaimed */
+ __le16 sq_id; /* submission queue that generated this entry */
+ __u16 command_id; /* of the command which completed */
+ __le16 status; /* did the command fail, and if so, why? */
+};
+
+#define NVME_VS(major, minor, tertiary) \
+ (((major) << 16) | ((minor) << 8) | (tertiary))
+
+#define NVME_MAJOR(ver) ((ver) >> 16)
+#define NVME_MINOR(ver) (((ver) >> 8) & 0xff)
+#define NVME_TERTIARY(ver) ((ver) & 0xff)
+
+#endif /* _LINUX_NVME_H */
--
2.20.1
_______________________________________________
barebox mailing list
barebox@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/barebox
next prev parent reply other threads:[~2019-02-12 6:41 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-02-12 6:40 [PATCH 0/8] NVME support Andrey Smirnov
2019-02-12 6:40 ` [PATCH 1/8] PCI: Fix to_pci_dev() macro Andrey Smirnov
2019-02-12 6:40 ` [PATCH 2/8] PCI: Introduce device_pci_driver() Andrey Smirnov
2019-02-12 6:40 ` [PATCH 3/8] drivers: Make use of device_pci_driver() Andrey Smirnov
2019-02-12 6:40 ` [PATCH 4/8] include: Sync up pci_ids.h with 4.20-rc1 Andrey Smirnov
2019-02-12 6:40 ` [PATCH 5/8] include: Add definitnion for HZ Andrey Smirnov
2019-02-12 6:40 ` [PATCH 6/8] include: Import uuid.h for Linux Andrey Smirnov
2019-02-12 6:40 ` [PATCH 7/8] include: Import min_not_zero() macro from Linux Andrey Smirnov
2019-02-12 6:40 ` Andrey Smirnov [this message]
2019-02-18 8:24 ` [PATCH 0/8] NVME support Sascha Hauer
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190212064022.26154-9-andrew.smirnov@gmail.com \
--to=andrew.smirnov@gmail.com \
--cc=barebox@lists.infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox