From 7ed2b60be27f12f08ba794e5bbf6d58ec39c293d Mon Sep 17 00:00:00 2001
From: n00899518 <nieweiqiang@huawei.com>
Date: Mon, 11 Aug 2025 15:33:49 +0800
Subject: [PATCH] feat: add migration driver for 5.4 kernel.

---
 .../KAEKernelDriver-OLK-5.10/Makefile         |   57 +-
 .../hisilicon/migration/acc_vf_migration.c    |  321 ++-
 .../hisilicon/migration/acc_vf_migration.h    |   40 +-
 .../{hisilicon => include_linux}/vfio.h       |    8 +-
 .../include_uapi_linux/vfio.h                 | 1444 ++++++++++++++
 .../KAEKernelDriver-OLK-5.4/Makefile          |   51 +-
 .../hisilicon/Makefile                        |    1 +
 .../hisilicon/migration/Makefile              |    2 +
 .../hisilicon/migration/acc_vf_migration.c    | 1719 +++++++++++++++++
 .../hisilicon/migration/acc_vf_migration.h    |  242 +++
 .../include_linux/vfio.h                      |  298 +++
 .../include_uapi_linux/vfio.h                 | 1444 ++++++++++++++
 .../KAEKernelDriver-OLK-6.6/Makefile          |   43 +-
 build.sh                                      |   38 +-
 14 files changed, 5421 insertions(+), 287 deletions(-)
 rename KAEKernelDriver/KAEKernelDriver-OLK-5.10/{hisilicon => include_linux}/vfio.h (98%)
 create mode 100644 KAEKernelDriver/KAEKernelDriver-OLK-5.10/include_uapi_linux/vfio.h
 create mode 100644 KAEKernelDriver/KAEKernelDriver-OLK-5.4/hisilicon/migration/Makefile
 create mode 100644 KAEKernelDriver/KAEKernelDriver-OLK-5.4/hisilicon/migration/acc_vf_migration.c
 create mode 100644 KAEKernelDriver/KAEKernelDriver-OLK-5.4/hisilicon/migration/acc_vf_migration.h
 create mode 100644 KAEKernelDriver/KAEKernelDriver-OLK-5.4/include_linux/vfio.h
 create mode 100644 KAEKernelDriver/KAEKernelDriver-OLK-5.4/include_uapi_linux/vfio.h

diff --git a/KAEKernelDriver/KAEKernelDriver-OLK-5.10/Makefile b/KAEKernelDriver/KAEKernelDriver-OLK-5.10/Makefile
index 16da4c2..e11901d 100644
--- a/KAEKernelDriver/KAEKernelDriver-OLK-5.10/Makefile
+++ b/KAEKernelDriver/KAEKernelDriver-OLK-5.10/Makefile
@@ -1,10 +1,10 @@
 KERNEL_VERSION_BY_BUILDENV :=`rpm -q --qf '%{VERSION}-%{RELEASE}.%{ARCH}\n' kernel-devel | head -n 1`
 KERNEL_PATH := /lib/modules/$(KERNEL_VERSION_BY_BUILDENV)/build
-KSP :=	$(shell if test -d /lib/modules/$(KERNEL_VERSION_BY_BUILDENV)/source; then  \
-                    echo /lib/modules/$(KERNEL_VERSION_BY_BUILDENV)/source; \
-	        else  \
-                    echo /lib/modules/$(KERNEL_VERSION_BY_BUILDENV)/build;  \
-                fi)
+# KSP :=	$(shell if test -d /lib/modules/$(KERNEL_VERSION_BY_BUILDENV)/source; then  \
+#                     echo /lib/modules/$(KERNEL_VERSION_BY_BUILDENV)/source; \
+# 	        else  \
+#                     echo /lib/modules/$(KERNEL_VERSION_BY_BUILDENV)/build;  \
+#                 fi)
 
 obj-m += uacce/
 obj-m += hisilicon/
@@ -13,18 +13,25 @@ DIRS := $(shell find . -maxdepth 3 -type d)
 TARGET = $(foreach dir,$(DIRS),$(wildcard \
 	$(dir)/*.o) $(dir)/*.ko $(dir)/*.tmp_versions $(dir)/*.depend $(dir)/*.mod.c $(dir)/*.order $(dir)/*.symvers)
 
+CONFIG_FLAGS = CONFIG_CC_STACKPROTECTOR_STRONG=y \
+	CONFIG_UACCE=m \
+	CONFIG_CRYPTO_QM_UACCE=m \
+	CONFIG_CRYPTO_DEV_HISI_SGL=m \
+	CONFIG_CRYPTO_DEV_HISI_QM=m \
+	CONFIG_CRYPTO_DEV_HISI_ZIP=m \
+	CONFIG_CRYPTO_DEV_HISI_HPRE=m \
+	CONFIG_CRYPTO_DEV_HISI_SEC2=m \
+	CONFIG_CRYPTO_DEV_HISI_TRNG=m
+
+ifeq ($(ENABLE_MIGRATION), y)
+CONFIG_FLAGS += CONFIG_CRYPTO_DEV_HISI_MIGRATION=m
+else
+CONFIG_FLAGS += CONFIG_CRYPTO_DEV_HISI_MIGRATION=n
+endif
+
 default: 
-	$(MAKE) -C $(KERNEL_PATH) M=$(shell pwd) modules \
-		CONFIG_CC_STACKPROTECTOR_STRONG=y \
-		CONFIG_UACCE=m \
-		CONFIG_CRYPTO_QM_UACCE=m \
-		CONFIG_CRYPTO_DEV_HISI_SGL=m \
-		CONFIG_CRYPTO_DEV_HISI_QM=m \
-		CONFIG_CRYPTO_DEV_HISI_ZIP=m \
-		CONFIG_CRYPTO_DEV_HISI_HPRE=m \
-		CONFIG_CRYPTO_DEV_HISI_SEC2=m \
-		CONFIG_CRYPTO_DEV_HISI_TRNG=m \
-		CONFIG_CRYPTO_DEV_HISI_MIGRATION=m
+	$(MAKE) -C $(KERNEL_PATH) M=$(shell pwd) modules $(CONFIG_FLAGS)
+
 #copy:
 #	cp -f $(shell pwd)/include_linux/uacce.h $(KSP)/include/linux
 #	cp -f $(shell pwd)/include_uapi_linux/uacce.h $(KSP)/include/uapi/linux
@@ -41,7 +48,9 @@ install:
 	-modprobe hisi_sec2 uacce_mode=1 pf_q_num=256
 	-modprobe hisi_hpre uacce_mode=1 pf_q_num=256
 	-modprobe hisi_zip  uacce_mode=1 pf_q_num=256
-	-modprobe hisi_migration
+	$(shell if [ "$(ENABLE_MIGRATION)" = "y" ]; then \
+		modprobe hisi_migration; \
+	fi)
 	-echo "options hisi_sec2 uacce_mode=1 pf_q_num=256" > /etc/modprobe.d/hisi_sec2.conf
 	-echo "options hisi_hpre uacce_mode=1 pf_q_num=256" > /etc/modprobe.d/hisi_hpre.conf
 	-echo "options hisi_zip  uacce_mode=1 pf_q_num=256" > /etc/modprobe.d/hisi_zip.conf
@@ -63,7 +72,9 @@ check:
 	done
 
 uninstall:
-	modprobe -r hisi_migration
+	$(shell if [ "$(ENABLE_MIGRATION)" = "y" ]; then \
+		modprobe -r hisi_migration; \
+	fi)
 	modprobe -r hisi_zip
 	modprobe -r hisi_hpre
 	modprobe -r hisi_sec2
@@ -72,13 +83,15 @@ uninstall:
 	rm -rf /etc/modprobe.d/hisi_sec2.conf
 	rm -rf /etc/modprobe.d/hisi_hpre.conf
 	rm -rf /etc/modprobe.d/hisi_zip.conf
-	dracut -f
 	rm -rf /lib/modules/$(KERNEL_VERSION_BY_BUILDENV)/extra/uacce.ko
 	rm -rf /lib/modules/$(KERNEL_VERSION_BY_BUILDENV)/extra/hisi_qm.ko
 	rm -rf /lib/modules/$(KERNEL_VERSION_BY_BUILDENV)/extra/hisi_sec2.ko
 	rm -rf /lib/modules/$(KERNEL_VERSION_BY_BUILDENV)/extra/hisi_hpre.ko
 	rm -rf /lib/modules/$(KERNEL_VERSION_BY_BUILDENV)/extra/hisi_zip.ko
-	rm -rf /lib/modules/$(KERNEL_VERSION_BY_BUILDENV)/extra/hisi_migration.ko
+	$(shell if [ "$(ENABLE_MIGRATION)" = "y" ]; then \
+		rm -rf /lib/modules/$(KERNEL_VERSION_BY_BUILDENV)/extra/hisi_migration.ko; \
+	fi)
+	depmod -a
 
 nosva:
 	$(shell mkdir -p /lib/modules/$(KERNEL_VERSION_BY_BUILDENV)/extra)
@@ -92,7 +105,9 @@ nosva:
 	-modprobe hisi_sec2 uacce_mode=2 pf_q_num=256
 	-modprobe hisi_hpre uacce_mode=2 pf_q_num=256
 	-modprobe hisi_zip  uacce_mode=2 pf_q_num=256
-	-modprobe hisi_migration
+	$(shell if [ "$(ENABLE_MIGRATION)" = "y" ]; then \
+		modprobe hisi_migration; \
+	fi)
 	-echo "options hisi_sec2 uacce_mode=2 pf_q_num=256" > /etc/modprobe.d/hisi_sec2.conf
 	-echo "options hisi_hpre uacce_mode=2 pf_q_num=256" > /etc/modprobe.d/hisi_hpre.conf
 	-echo "options hisi_zip  uacce_mode=2 pf_q_num=256" > /etc/modprobe.d/hisi_zip.conf
diff --git a/KAEKernelDriver/KAEKernelDriver-OLK-5.10/hisilicon/migration/acc_vf_migration.c b/KAEKernelDriver/KAEKernelDriver-OLK-5.10/hisilicon/migration/acc_vf_migration.c
index 7547e6e..8a7196a 100644
--- a/KAEKernelDriver/KAEKernelDriver-OLK-5.10/hisilicon/migration/acc_vf_migration.c
+++ b/KAEKernelDriver/KAEKernelDriver-OLK-5.10/hisilicon/migration/acc_vf_migration.c
@@ -10,7 +10,7 @@
 #include <linux/module.h>
 #include <linux/pci.h>
 #include <linux/sysfs.h>
-#include "../vfio.h"
+#include <linux/vfio.h>
 
 #include "acc_vf_migration.h"
 
@@ -18,6 +18,16 @@
 static struct dentry *mig_debugfs_root;
 static int mig_root_ref;
 
+/* return 0 mailbox ready, -ETIMEDOUT hardware timeout */
+static int qm_wait_mb_ready(struct hisi_qm *qm)
+{
+	u32 val;
+
+	return readl_relaxed_poll_timeout(qm->io_base + QM_MB_CMD_SEND_BASE,
+					  val, !((val >> QM_MB_BUSY_SHIFT) &
+					  0x1), POLL_PERIOD, POLL_TIMEOUT);
+}
+
 /* return 0 VM acc device ready, -ETIMEDOUT hardware timeout */
 static int qm_wait_dev_ready(struct hisi_qm *qm)
 {
@@ -27,6 +37,7 @@ static int qm_wait_dev_ready(struct hisi_qm *qm)
 				val, !(val & 0x1), POLL_PERIOD, POLL_TIMEOUT);
 }
 
+
 /* 128 bit should be written to hardware at one time to trigger a mailbox */
 static void qm_mb_write(struct hisi_qm *qm, const void *src)
 {
@@ -50,147 +61,57 @@ static void qm_mb_write(struct hisi_qm *qm, const void *src)
 		     : "memory");
 }
 
-/* 128 bit should be read from hardware at one time */
-static void qm_mb_read(struct hisi_qm *qm, void *dst)
-{
-	const void __iomem *fun_base = qm->io_base + QM_MB_CMD_SEND_BASE;
-	unsigned long tmp0 = 0, tmp1 = 0;
-
-	if (!IS_ENABLED(CONFIG_ARM64)) {
-		memcpy_fromio(dst, fun_base, 16);
-		dma_wmb();
-		return;
-	}
-
-	asm volatile("ldp %0, %1, %3\n"
-		     "stp %0, %1, %2\n"
-		     "dmb oshst\n"
-		     : "=&r" (tmp0),
-		       "=&r" (tmp1),
-		       "+Q" (*((char *)dst))
-		     : "Q" (*((char __iomem *)fun_base))
-		     : "memory");
-}
-
 static void qm_mb_pre_init(struct qm_mailbox *mailbox, u8 cmd,
-			   u64 base, u16 queue, bool op)
+			   u16 queue, bool op)
 {
-	mailbox->w0 = cpu_to_le16((cmd) |
-		((op) ? 0x1 << QM_MB_OP_SHIFT : 0) |
-		(0x1 << QM_MB_BUSY_SHIFT));
+	mailbox->w0 = cpu_to_le16(cmd |
+		     (op ? 0x1 << QM_MB_OP_SHIFT : 0) |
+		     (0x1 << QM_MB_BUSY_SHIFT));
 	mailbox->queue_num = cpu_to_le16(queue);
-	mailbox->base_l = cpu_to_le32(lower_32_bits(base));
-	mailbox->base_h = cpu_to_le32(upper_32_bits(base));
 	mailbox->rsvd = 0;
 }
 
-static int qm_wait_mb_ready(struct hisi_qm *qm)
+static int qm_mb_nolock(struct hisi_qm *qm, struct qm_mailbox *mailbox)
 {
-	struct qm_mailbox mailbox;
-	int i = 0;
-
-	while (i++ < QM_MB_WAIT_READY_CNT) {
-		qm_mb_read(qm, &mailbox);
-		if (!((le16_to_cpu(mailbox.w0) >> QM_MB_BUSY_SHIFT) & 0x1))
-			return 0;
+	int cnt = 0;
 
-		usleep_range(WAIT_PERIOD_US_MIN, WAIT_PERIOD_US_MAX);
+	if (unlikely(qm_wait_mb_ready(qm))) {
+		dev_err(&qm->pdev->dev, "QM mailbox is busy to start!\n");
+		return -EBUSY;
 	}
 
-	return -EBUSY;
-}
-
-static int qm_wait_mb_finish(struct hisi_qm *qm, struct qm_mailbox *mailbox)
-{
-	int i = 0;
-
-	while (++i) {
-		qm_mb_read(qm, mailbox);
-		if (!((le16_to_cpu(mailbox->w0) >> QM_MB_BUSY_SHIFT) & 0x1))
+	qm_mb_write(qm, mailbox);
+	while (true) {
+		if (!qm_wait_mb_ready(qm))
 			break;
-
-		if (i == QM_MB_MAX_WAIT_CNT) {
+		if (++cnt > QM_MB_MAX_WAIT_CNT) {
 			dev_err(&qm->pdev->dev, "QM mailbox operation timeout!\n");
-			return -ETIMEDOUT;
+			return -EBUSY;
 		}
-
-		usleep_range(WAIT_PERIOD_US_MIN, WAIT_PERIOD_US_MAX);
-	}
-
-	if (le16_to_cpu(mailbox->w0) & QM_MB_STATUS_MASK) {
-		dev_err(&qm->pdev->dev, "QM mailbox operation failed!\n");
-		return -EIO;
 	}
-
 	return 0;
 }
 
-static int qm_mb(struct hisi_qm *qm, struct qm_mailbox *mailbox)
+static int qm_mb(struct hisi_qm *qm, u8 cmd, dma_addr_t dma_addr, u16 queue,
+		 bool op)
 {
+	struct qm_mailbox mailbox;
 	int ret;
 
-	mutex_lock(&qm->mailbox_lock);
-	ret = qm_wait_mb_ready(qm);
-	if (ret)
-		goto unlock;
+	dev_dbg(&qm->pdev->dev, "QM mailbox request to q%u: %u-0x%llx\n",
+		queue, cmd, (unsigned long long)dma_addr);
 
-	qm_mb_write(qm, mailbox);
-	ret = qm_wait_mb_finish(qm, mailbox);
+	qm_mb_pre_init(&mailbox, cmd, queue, op);
+	mailbox.base_l = cpu_to_le32(lower_32_bits(dma_addr));
+	mailbox.base_h = cpu_to_le32(upper_32_bits(dma_addr));
 
-unlock:
+	mutex_lock(&qm->mailbox_lock);
+	ret = qm_mb_nolock(qm, &mailbox);
 	mutex_unlock(&qm->mailbox_lock);
 
 	return ret;
 }
 
-static int qm_config_set(struct hisi_qm *qm, u8 cmd, dma_addr_t dma_addr,
-			       u16 queue, bool op)
-{
-	struct qm_mailbox mailbox;
-
-	qm_mb_pre_init(&mailbox, cmd, dma_addr, queue, op);
-
-	return qm_mb(qm, &mailbox);
-}
-
-static int qm_config_get(struct hisi_qm *qm, u64 *base, u8 cmd, u16 queue)
-{
-	struct qm_mailbox mailbox;
-	int ret;
-
-	qm_mb_pre_init(&mailbox, cmd, 0, queue, 1);
-
-	ret = qm_mb(qm, &mailbox);
-	if (ret)
-		return ret;
-
-	*base = le32_to_cpu(mailbox.base_l) |
-		((u64)le32_to_cpu(mailbox.base_h) << 32);
-
-	return 0;
-}
-
-static void qm_db(struct hisi_qm *qm, u16 qn, u8 cmd,
-	u16 index, u8 priority)
-{
-	void __iomem *io_base = qm->io_base;
-	u16 randata = 0;
-	u64 doorbell;
-
-	if (cmd == QM_DOORBELL_CMD_SQ || cmd == QM_DOORBELL_CMD_CQ)
-		io_base = qm->db_io_base + (u64)qn * qm->db_interval +
-			  QM_DOORBELL_SQ_CQ_BASE_V2;
-	else
-		io_base += QM_DOORBELL_EQ_AEQ_BASE_V2;
-
-	doorbell = qn | ((u64)cmd << QM_DB_CMD_SHIFT_V2) |
-		   ((u64)randata << QM_DB_RAND_SHIFT_V2) |
-		   ((u64)index << QM_DB_INDEX_SHIFT_V2) |
-		   ((u64)priority << QM_DB_PRIORITY_SHIFT_V2);
-
-	writeq(doorbell, io_base);
-}
-
 /*
  * Each state Reg is checked 100 times,
  * with a delay of 100 microseconds after each check
@@ -309,10 +230,13 @@ static int qm_get_vft(struct hisi_qm *qm, u32 *base, u32 *number)
 	u64 sqc_vft;
 	int ret;
 
-	ret = qm_config_get(qm, &sqc_vft, QM_MB_CMD_SQC_VFT_V2, 0);
+	ret = qm_mb(qm, QM_MB_CMD_SQC_VFT_V2, 0, 0, 1);
 	if (ret)
 		return ret;
 
+	sqc_vft = readl(qm->io_base + QM_MB_CMD_DATA_ADDR_L) |
+		  ((u64)readl(qm->io_base + QM_MB_CMD_DATA_ADDR_H) <<
+		  QM_XQC_ADDR_OFFSET);
 	*base = QM_SQC_VFT_BASE_MASK_V2 & (sqc_vft >> QM_SQC_VFT_BASE_SHIFT_V2);
 	*number = (QM_SQC_VFT_NUM_MASK_V2 &
 		  (sqc_vft >> QM_SQC_VFT_NUM_SHIFT_V2)) + 1;
@@ -320,6 +244,36 @@ static int qm_get_vft(struct hisi_qm *qm, u32 *base, u32 *number)
 	return 0;
 }
 
+static int qm_get_sqc(struct hisi_qm *qm, u64 *addr)
+{
+	int ret;
+
+	ret = qm_mb(qm, QM_MB_CMD_SQC_BT, 0, 0, 1);
+	if (ret)
+		return ret;
+
+	*addr = readl(qm->io_base + QM_MB_CMD_DATA_ADDR_L) |
+		  ((u64)readl(qm->io_base + QM_MB_CMD_DATA_ADDR_H) <<
+		  QM_XQC_ADDR_OFFSET);
+
+	return 0;
+}
+
+static int qm_get_cqc(struct hisi_qm *qm, u64 *addr)
+{
+	int ret;
+
+	ret = qm_mb(qm, QM_MB_CMD_CQC_BT, 0, 0, 1);
+	if (ret)
+		return ret;
+
+	*addr = readl(qm->io_base + QM_MB_CMD_DATA_ADDR_L) |
+		  ((u64)readl(qm->io_base + QM_MB_CMD_DATA_ADDR_H) <<
+		  QM_XQC_ADDR_OFFSET);
+
+	return 0;
+}
+
 static int qm_rw_regs_read(struct hisi_qm *qm, struct acc_vf_data *vf_data)
 {
 	struct device *dev = &qm->pdev->dev;
@@ -427,6 +381,12 @@ static int qm_rw_regs_write(struct hisi_qm *qm, struct acc_vf_data *vf_data)
 		return ret;
 	}
 
+	ret = qm_write_reg(qm, QM_QUE_ISO_CFG_V, &vf_data->que_iso_cfg, 1);
+	if (ret) {
+		dev_err(dev, "failed to write QM_QUE_ISO_CFG_V!\n");
+		return ret;
+	}
+
 	ret = qm_write_reg(qm, QM_PAGE_SIZE, &vf_data->page_size, 1);
 	if (ret) {
 		dev_err(dev, "failed to write QM_PAGE_SIZE!\n");
@@ -456,19 +416,6 @@ static int qm_rw_regs_write(struct hisi_qm *qm, struct acc_vf_data *vf_data)
 	return 0;
 }
 
-static void vf_qm_xeqc_save(struct hisi_qm *qm,
-	struct acc_vf_migration *acc_vf_dev)
-{
-	struct acc_vf_data *vf_data = acc_vf_dev->vf_data;
-	u16 eq_head, aeq_head;
-
-	eq_head = vf_data->qm_eqc_dw[0] & 0xFFFF;
-	qm_db(qm, 0, QM_DOORBELL_CMD_EQ, eq_head, 0);
-
-	aeq_head = vf_data->qm_aeqc_dw[0] & 0xFFFF;
-	qm_db(qm, 0, QM_DOORBELL_CMD_AEQ, aeq_head, 0);
-}
-
 /*
  * the vf QM have unbind from host, insmod in the VM
  * so, qm just have the addr from pci dev
@@ -492,29 +439,26 @@ static int vf_migration_data_store(struct hisi_qm *qm,
 	 * every Reg is 32 bit, the dma address is 64 bit
 	 * so, the dma address is store in the Reg2 and Reg1
 	 */
-	vf_data->eqe_dma = vf_data->qm_eqc_dw[QM_XQC_ADDR_HIGH];
+	vf_data->eqe_dma = vf_data->qm_eqc_dw[2];
 	vf_data->eqe_dma <<= QM_XQC_ADDR_OFFSET;
-	vf_data->eqe_dma |= vf_data->qm_eqc_dw[QM_XQC_ADDR_LOW];
-	vf_data->aeqe_dma = vf_data->qm_aeqc_dw[QM_XQC_ADDR_HIGH];
+	vf_data->eqe_dma |= vf_data->qm_eqc_dw[1];
+	vf_data->aeqe_dma = vf_data->qm_aeqc_dw[2];
 	vf_data->aeqe_dma <<= QM_XQC_ADDR_OFFSET;
-	vf_data->aeqe_dma |= vf_data->qm_aeqc_dw[QM_XQC_ADDR_LOW];
+	vf_data->aeqe_dma |= vf_data->qm_aeqc_dw[1];
 
 	/* Through SQC_BT/CQC_BT to get sqc and cqc address */
-	ret = qm_config_get(qm, &vf_data->sqc_dma, QM_MB_CMD_SQC_BT, 0);
+	ret = qm_get_sqc(qm, &vf_data->sqc_dma);
 	if (ret) {
 		dev_err(dev, "failed to read SQC addr!\n");
 		return -EINVAL;
 	}
 
-	ret = qm_config_get(qm, &vf_data->cqc_dma, QM_MB_CMD_CQC_BT, 0);
+	ret = qm_get_cqc(qm, &vf_data->cqc_dma);
 	if (ret) {
 		dev_err(dev, "failed to read CQC addr!\n");
 		return -EINVAL;
 	}
 
-	/* Save eqc and aeqc interrupt information */
-	vf_qm_xeqc_save(qm, acc_vf_dev);
-
 	return 0;
 }
 
@@ -527,6 +471,27 @@ static void qm_dev_cmd_init(struct hisi_qm *qm)
 	writel(0x0, qm->io_base + QM_IFC_INT_MASK);
 }
 
+static void qm_db(struct hisi_qm *qm, u16 qn, u8 cmd,
+	u16 index, u8 priority)
+{
+	void __iomem *io_base = qm->io_base;
+	u16 randata = 0;
+	u64 doorbell;
+
+	if (cmd == QM_DOORBELL_CMD_SQ || cmd == QM_DOORBELL_CMD_CQ)
+		io_base = qm->db_io_base + (u64)qn * qm->db_interval +
+			  QM_DOORBELL_SQ_CQ_BASE_V2;
+	else
+		io_base += QM_DOORBELL_EQ_AEQ_BASE_V2;
+
+	doorbell = qn | ((u64)cmd << QM_DB_CMD_SHIFT_V2) |
+		   ((u64)randata << QM_DB_RAND_SHIFT_V2) |
+		   ((u64)index << QM_DB_INDEX_SHIFT_V2) |
+		   ((u64)priority << QM_DB_PRIORITY_SHIFT_V2);
+
+	writeq(doorbell, io_base);
+}
+
 static void vf_qm_fun_restart(struct hisi_qm *qm,
 	struct acc_vf_migration *acc_vf_dev)
 {
@@ -535,10 +500,9 @@ static void vf_qm_fun_restart(struct hisi_qm *qm,
 	int i;
 
 	/*
-	 * When the Guest is rebooted or reseted, the SMMU page table
-	 * will be destroyed, and the QP queue cannot be returned
-	 * normally at this time. so if Guest acc driver have removed,
-	 * don't need to restart QP.
+	 * When the system is rebooted, the SMMU page table is destroyed,
+	 * and the QP queue cannot be returned normally at this time.
+	 * if vf_ready == 0x2, don't need to restart QP.
 	 */
 	if (vf_data->vf_state != QM_READY) {
 		dev_err(dev, "failed to restart VF!\n");
@@ -553,7 +517,6 @@ static int vf_match_info_check(struct hisi_qm *qm,
 	struct acc_vf_migration *acc_vf_dev)
 {
 	struct acc_vf_data *vf_data = acc_vf_dev->vf_data;
-	struct hisi_qm *pf_qm = acc_vf_dev->pf_qm;
 	struct device *dev = &qm->pdev->dev;
 	u32 que_iso_state;
 	int ret;
@@ -577,7 +540,7 @@ static int vf_match_info_check(struct hisi_qm *qm,
 	}
 
 	/* vf isolation state check */
-	ret = qm_read_reg(pf_qm, QM_QUE_ISO_CFG_V, &que_iso_state, 1);
+	ret = qm_read_reg(qm, QM_QUE_ISO_CFG_V, &que_iso_state, 1);
 	if (ret) {
 		dev_err(dev, "failed to read QM_QUE_ISO_CFG_V!\n");
 		return ret;
@@ -610,13 +573,13 @@ static int vf_migration_data_recover(struct hisi_qm *qm,
 		return ret;
 	}
 
-	ret = qm_config_set(qm, QM_MB_CMD_SQC_BT, qm->sqc_dma, 0, 0);
+	ret = qm_mb(qm, QM_MB_CMD_SQC_BT, qm->sqc_dma, 0, 0);
 	if (ret) {
 		dev_err(dev, "Set sqc failed!\n");
 		return ret;
 	}
 
-	ret = qm_config_set(qm, QM_MB_CMD_CQC_BT, qm->cqc_dma, 0, 0);
+	ret = qm_mb(qm, QM_MB_CMD_CQC_BT, qm->cqc_dma, 0, 0);
 	if (ret) {
 		dev_err(dev, "Set cqc failed!\n");
 		return ret;
@@ -645,7 +608,7 @@ static int vf_qm_cache_wb(struct hisi_qm *qm)
 
 static int vf_qm_func_stop(struct hisi_qm *qm)
 {
-	return qm_config_set(qm, QM_MB_CMD_PAUSE_QM, 0, 0, 0);
+	return qm_mb(qm, QM_MB_CMD_PAUSE_QM, 0, 0, 0);
 }
 
 static int pf_qm_get_qp_num(struct hisi_qm *qm, int vf_id,
@@ -694,17 +657,19 @@ static int pf_qm_state_pre_save(struct hisi_qm *qm,
 	int vf_id = acc_vf_dev->vf_id;
 	int ret;
 
-	/* Vf acc type save */
+	/* vf acc type save */
 	vf_data->acc_type = acc_vf_dev->acc_type;
 
-	/* Vf qp num save from PF */
-	ret = pf_qm_get_qp_num(qm, vf_id, &vf_data->qp_base, &vf_data->qp_num);
-	if (ret) {
+	/* vf qp num save from PF */
+	ret = pf_qm_get_qp_num(qm, vf_id, &qm->qp_base, &qm->qp_num);
+	if (ret || qm->qp_num <= 1) {
 		dev_err(dev, "failed to get vft qp nums!\n");
 		return -EINVAL;
 	}
+	vf_data->qp_base = qm->qp_base;
+	vf_data->qp_num = qm->qp_num;
 
-	/* Vf isolation state save from PF */
+	/* vf isolation state save from PF */
 	ret = qm_read_reg(qm, QM_QUE_ISO_CFG_V, &vf_data->que_iso_cfg, 1);
 	if (ret) {
 		dev_err(dev, "failed to read QM_QUE_ISO_CFG_V!\n");
@@ -842,7 +807,12 @@ static int acc_vf_set_device_state(struct acc_vf_migration *acc_vf_dev,
 
 		break;
 	case VFIO_DEVICE_STATE_STOP:
+		/* restart all  VF's QP */
+		vf_qm_fun_restart(qm, acc_vf_dev);
+
+		break;
 	case VFIO_DEVICE_STATE_RESUMING:
+
 		break;
 	default:
 		ret = -EFAULT;
@@ -1242,30 +1212,12 @@ static void acc_vf_release(void *device_data)
 	module_put(THIS_MODULE);
 }
 
-static void acc_vf_reset(void *device_data)
-{
-	struct acc_vf_migration *acc_vf_dev =
-		vfio_pci_vendor_data(device_data);
-	struct hisi_qm *qm = acc_vf_dev->vf_qm;
-	struct device *dev = &qm->pdev->dev;
-	u32 vf_state = QM_NOT_READY;
-	int ret;
-
-	dev_info(dev, "QEMU prepare to Reset Guest!\n");
-	ret = qm_write_reg(qm, QM_VF_STATE, &vf_state, 1);
-	if (ret)
-		dev_err(dev, "failed to write QM_VF_STATE\n");
-}
-
 static long acc_vf_ioctl(void *device_data,
 			  unsigned int cmd, unsigned long arg)
 {
 	switch (cmd) {
 	case VFIO_DEVICE_GET_REGION_INFO:
 		return acc_vf_get_region_info(device_data, cmd, arg);
-	case VFIO_DEVICE_RESET:
-		acc_vf_reset(device_data);
-		return vfio_pci_ioctl(device_data, cmd, arg);
 	default:
 		return vfio_pci_ioctl(device_data, cmd, arg);
 	}
@@ -1651,19 +1603,6 @@ init_qm_error:
 	return -ENOMEM;
 }
 
-static int hisi_acc_get_vf_id(struct pci_dev *dev)
-{
-	struct pci_dev *pf;
-
-	if (!dev->is_virtfn)
-		return -EINVAL;
-
-	pf = pci_physfn(dev);
-	return (((dev->bus->number << 8) + dev->devfn) -
-		((pf->bus->number << 8) + pf->devfn + pf->sriov->offset)) /
-	       pf->sriov->stride;
-}
-
 static void *acc_vf_probe(struct pci_dev *pdev)
 {
 	struct acc_vf_migration *acc_vf_dev;
@@ -1689,7 +1628,7 @@ static void *acc_vf_probe(struct pci_dev *pdev)
 		return ERR_PTR(-EINVAL);
 	}
 
-	vf_id = hisi_acc_get_vf_id(vf_dev);
+	vf_id = PCI_FUNC(vf_dev->devfn);
 	if (vf_id < 0) {
 		dev_info(&pdev->dev, "vf device: %s, vf id: %d\n",
 			 pf_qm->dev_name, vf_id);
@@ -1706,7 +1645,7 @@ static void *acc_vf_probe(struct pci_dev *pdev)
 		return ERR_PTR(-ENOMEM);
 	}
 
-	acc_vf_dev->vf_id = vf_id + 1;
+	acc_vf_dev->vf_id = vf_id;
 	acc_vf_dev->vf_vendor = pdev->vendor;
 	acc_vf_dev->vf_device = pdev->device;
 	acc_vf_dev->pf_dev = pf_dev;
@@ -1736,8 +1675,6 @@ static void acc_vf_remove(void *vendor_data)
 static struct vfio_pci_vendor_driver_ops  sec_vf_mig_ops = {
 	.owner		= THIS_MODULE,
 	.name		= "hisi_sec2",
-	.vendor		= PCI_VENDOR_ID_HUAWEI,
-	.device		= PCI_DEVICE_ID_HUAWEI_SEC_VF,
 	.probe		= acc_vf_probe,
 	.remove		= acc_vf_remove,
 	.device_ops	= &acc_vf_device_ops_node,
@@ -1746,8 +1683,6 @@ static struct vfio_pci_vendor_driver_ops  sec_vf_mig_ops = {
 static struct vfio_pci_vendor_driver_ops  hpre_vf_mig_ops = {
 	.owner		= THIS_MODULE,
 	.name		= "hisi_hpre",
-	.vendor		= PCI_VENDOR_ID_HUAWEI,
-	.device		= PCI_DEVICE_ID_HUAWEI_HPRE_VF,
 	.probe		= acc_vf_probe,
 	.remove		= acc_vf_remove,
 	.device_ops	= &acc_vf_device_ops_node,
@@ -1756,8 +1691,6 @@ static struct vfio_pci_vendor_driver_ops  hpre_vf_mig_ops = {
 static struct vfio_pci_vendor_driver_ops  zip_vf_mig_ops = {
 	.owner		= THIS_MODULE,
 	.name		= "hisi_zip",
-	.vendor		= PCI_VENDOR_ID_HUAWEI,
-	.device		= PCI_DEVICE_ID_HUAWEI_ZIP_VF,
 	.probe		= acc_vf_probe,
 	.remove		= acc_vf_remove,
 	.device_ops	= &acc_vf_device_ops_node,
@@ -1776,13 +1709,11 @@ static int __init acc_vf_module_init(void)
 
 static void __exit acc_vf_module_exit(void)
 {
-	vfio_pci_unregister_vendor_driver(&sec_vf_mig_ops);
-	vfio_pci_unregister_vendor_driver(&hpre_vf_mig_ops);
-	vfio_pci_unregister_vendor_driver(&zip_vf_mig_ops);
+	vfio_pci_unregister_vendor_driver(&acc_vf_device_ops_node);
 };
 module_init(acc_vf_module_init);
 module_exit(acc_vf_module_exit);
 
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Longfang Liu <liulongfang@huawei.com>");
-MODULE_DESCRIPTION("HiSilicon Accelerator VF live migration driver");
+MODULE_DESCRIPTION("HiSilicon Accelerator VF live migration driver");
\ No newline at end of file
diff --git a/KAEKernelDriver/KAEKernelDriver-OLK-5.10/hisilicon/migration/acc_vf_migration.h b/KAEKernelDriver/KAEKernelDriver-OLK-5.10/hisilicon/migration/acc_vf_migration.h
index a2368ba..1fdcba0 100644
--- a/KAEKernelDriver/KAEKernelDriver-OLK-5.10/hisilicon/migration/acc_vf_migration.h
+++ b/KAEKernelDriver/KAEKernelDriver-OLK-5.10/hisilicon/migration/acc_vf_migration.h
@@ -6,7 +6,7 @@
 
 #include <linux/mdev.h>
 #include <linux/pci.h>
-#include "../vfio.h"
+#include "../../include_linux/vfio.h"
 
 #include "../hisi_acc_qm.h"
 
@@ -58,11 +58,9 @@
 #define QM_MB_CMD_SEND_BASE		0x300
 #define QM_MB_BUSY_SHIFT		13
 #define QM_MB_OP_SHIFT			14
-#define QM_MB_WAIT_READY_CNT		10
-#define QM_MB_MAX_WAIT_CNT		3000
-#define WAIT_PERIOD_US_MIN		100
-#define WAIT_PERIOD_US_MAX		200
-#define QM_MB_STATUS_MASK		GENMASK(12, 9)
+#define QM_MB_CMD_DATA_ADDR_L		0x304
+#define QM_MB_CMD_DATA_ADDR_H		0x308
+#define QM_MB_MAX_WAIT_CNT		6000
 
 /* doorbell */
 #define QM_DOORBELL_CMD_SQ		0
@@ -79,8 +77,6 @@
 #define QM_REG_ADDR_OFFSET		0x0004
 
 #define QM_XQC_ADDR_OFFSET		32U
-#define QM_XQC_ADDR_LOW		0x1
-#define QM_XQC_ADDR_HIGH	0x2
 #define QM_VF_AEQ_INT_MASK		0x0004
 #define QM_VF_EQ_INT_MASK		0x000c
 #define QM_IFC_INT_SOURCE_V		0x0020
@@ -219,32 +215,6 @@ struct acc_vf_region_ops {
 			   struct vfio_info_cap *caps);
 };
 
-/* Single Root I/O Virtualization */
-struct pci_sriov {
-	int		pos;		/* Capability position */
-	int		nres;		/* Number of resources */
-	u32		cap;		/* SR-IOV Capabilities */
-	u16		ctrl;		/* SR-IOV Control */
-	u16		total_VFs;	/* Total VFs associated with the PF */
-	u16		initial_VFs;	/* Initial VFs associated with the PF */
-	u16		num_VFs;	/* Number of VFs available */
-	u16		offset;		/* First VF Routing ID offset */
-	u16		stride;		/* Following VF stride */
-	u16		vf_device;	/* VF device ID */
-	u32		pgsz;		/* Page size for BAR alignment */
-	u8		link;		/* Function Dependency Link */
-	u8		max_VF_buses;	/* Max buses consumed by VFs */
-	u16		driver_max_VFs;	/* Max num VFs driver supports */
-	struct pci_dev	*dev;		/* Lowest numbered PF */
-	struct pci_dev	*self;		/* This PF */
-	u32		class;		/* VF device */
-	u8		hdr_type;	/* VF header type */
-	u16		subsystem_vendor; /* VF subsystem vendor */
-	u16		subsystem_device; /* VF subsystem device */
-	resource_size_t	barsz[PCI_SRIOV_NUM_BARS];	/* VF BAR size */
-	bool		drivers_autoprobe; /* Auto probing of VFs by driver */
-};
-
 struct acc_vf_region {
 	u32				type;
 	u32				subtype;
@@ -269,4 +239,4 @@ struct acc_vf_irq {
 	const struct acc_vf_irqops *ops;
 };
 
-#endif /* ACC_MIG_H */
+#endif /* ACC_MIG_H */
\ No newline at end of file
diff --git a/KAEKernelDriver/KAEKernelDriver-OLK-5.10/hisilicon/vfio.h b/KAEKernelDriver/KAEKernelDriver-OLK-5.10/include_linux/vfio.h
similarity index 98%
rename from KAEKernelDriver/KAEKernelDriver-OLK-5.10/hisilicon/vfio.h
rename to KAEKernelDriver/KAEKernelDriver-OLK-5.10/include_linux/vfio.h
index 3489930..0b6cda3 100644
--- a/KAEKernelDriver/KAEKernelDriver-OLK-5.10/hisilicon/vfio.h
+++ b/KAEKernelDriver/KAEKernelDriver-OLK-5.10/include_linux/vfio.h
@@ -13,7 +13,11 @@
 #include <linux/mm.h>
 #include <linux/workqueue.h>
 #include <linux/poll.h>
-#include <uapi/linux/vfio.h>
+#include "../include_uapi_linux/vfio.h"
+
+#ifndef KABI_EXTEND
+#define KABI_EXTEND(_new)			_new;
+#endif
 
 struct vfio_device {
 	struct device *dev;
@@ -291,4 +295,4 @@ static void __exit device_ops ## _module_exit(void)			\
 module_init(device_ops ## _module_init);				\
 module_exit(device_ops ## _module_exit)
 
-#endif /* VFIO_H */
+#endif /* VFIO_H */
\ No newline at end of file
diff --git a/KAEKernelDriver/KAEKernelDriver-OLK-5.10/include_uapi_linux/vfio.h b/KAEKernelDriver/KAEKernelDriver-OLK-5.10/include_uapi_linux/vfio.h
new file mode 100644
index 0000000..52658db
--- /dev/null
+++ b/KAEKernelDriver/KAEKernelDriver-OLK-5.10/include_uapi_linux/vfio.h
@@ -0,0 +1,1444 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * VFIO API definition
+ *
+ * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
+ *     Author: Alex Williamson <alex.williamson@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef _UAPIVFIO_H
+#define _UAPIVFIO_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+#define VFIO_API_VERSION	0
+
+
+/* Kernel & User level defines for VFIO IOCTLs. */
+
+/* Extensions */
+
+#define VFIO_TYPE1_IOMMU		1
+#define VFIO_SPAPR_TCE_IOMMU		2
+#define VFIO_TYPE1v2_IOMMU		3
+/*
+ * IOMMU enforces DMA cache coherence (ex. PCIe NoSnoop stripping).  This
+ * capability is subject to change as groups are added or removed.
+ */
+#define VFIO_DMA_CC_IOMMU		4
+
+/* Check if EEH is supported */
+#define VFIO_EEH			5
+
+/* Two-stage IOMMU */
+#define VFIO_TYPE1_NESTING_IOMMU	6	/* Implies v2 */
+
+#define VFIO_SPAPR_TCE_v2_IOMMU		7
+
+/*
+ * The No-IOMMU IOMMU offers no translation or isolation for devices and
+ * supports no ioctls outside of VFIO_CHECK_EXTENSION.  Use of VFIO's No-IOMMU
+ * code will taint the host kernel and should be used with extreme caution.
+ */
+#define VFIO_NOIOMMU_IOMMU		8
+
+/*
+ * The vfio_iommu driver may support user clears dirty log manually, which means
+ * dirty log can be requested to not cleared automatically after dirty log is
+ * copied to userspace, it's user's duty to clear dirty log.
+ *
+ * Note: please refer to VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP_NOCLEAR and
+ * VFIO_IOMMU_DIRTY_PAGES_FLAG_CLEAR_BITMAP.
+ */
+#define VFIO_DIRTY_LOG_MANUAL_CLEAR	11
+
+/*
+ * The IOCTL interface is designed for extensibility by embedding the
+ * structure length (argsz) and flags into structures passed between
+ * kernel and userspace.  We therefore use the _IO() macro for these
+ * defines to avoid implicitly embedding a size into the ioctl request.
+ * As structure fields are added, argsz will increase to match and flag
+ * bits will be defined to indicate additional fields with valid data.
+ * It's *always* the caller's responsibility to indicate the size of
+ * the structure passed by setting argsz appropriately.
+ */
+
+#define VFIO_TYPE	(';')
+#define VFIO_BASE	100
+
+/*
+ * For extension of INFO ioctls, VFIO makes use of a capability chain
+ * designed after PCI/e capabilities.  A flag bit indicates whether
+ * this capability chain is supported and a field defined in the fixed
+ * structure defines the offset of the first capability in the chain.
+ * This field is only valid when the corresponding bit in the flags
+ * bitmap is set.  This offset field is relative to the start of the
+ * INFO buffer, as is the next field within each capability header.
+ * The id within the header is a shared address space per INFO ioctl,
+ * while the version field is specific to the capability id.  The
+ * contents following the header are specific to the capability id.
+ */
+struct vfio_info_cap_header {
+	__u16	id;		/* Identifies capability */
+	__u16	version;	/* Version specific to the capability ID */
+	__u32	next;		/* Offset of next capability */
+};
+
+/*
+ * Callers of INFO ioctls passing insufficiently sized buffers will see
+ * the capability chain flag bit set, a zero value for the first capability
+ * offset (if available within the provided argsz), and argsz will be
+ * updated to report the necessary buffer size.  For compatibility, the
+ * INFO ioctl will not report error in this case, but the capability chain
+ * will not be available.
+ */
+
+/* -------- IOCTLs for VFIO file descriptor (/dev/vfio/vfio) -------- */
+
+/**
+ * VFIO_GET_API_VERSION - _IO(VFIO_TYPE, VFIO_BASE + 0)
+ *
+ * Report the version of the VFIO API.  This allows us to bump the entire
+ * API version should we later need to add or change features in incompatible
+ * ways.
+ * Return: VFIO_API_VERSION
+ * Availability: Always
+ */
+#define VFIO_GET_API_VERSION		_IO(VFIO_TYPE, VFIO_BASE + 0)
+
+/**
+ * VFIO_CHECK_EXTENSION - _IOW(VFIO_TYPE, VFIO_BASE + 1, __u32)
+ *
+ * Check whether an extension is supported.
+ * Return: 0 if not supported, 1 (or some other positive integer) if supported.
+ * Availability: Always
+ */
+#define VFIO_CHECK_EXTENSION		_IO(VFIO_TYPE, VFIO_BASE + 1)
+
+/**
+ * VFIO_SET_IOMMU - _IOW(VFIO_TYPE, VFIO_BASE + 2, __s32)
+ *
+ * Set the iommu to the given type.  The type must be supported by an
+ * iommu driver as verified by calling CHECK_EXTENSION using the same
+ * type.  A group must be set to this file descriptor before this
+ * ioctl is available.  The IOMMU interfaces enabled by this call are
+ * specific to the value set.
+ * Return: 0 on success, -errno on failure
+ * Availability: When VFIO group attached
+ */
+#define VFIO_SET_IOMMU			_IO(VFIO_TYPE, VFIO_BASE + 2)
+
+/* -------- IOCTLs for GROUP file descriptors (/dev/vfio/$GROUP) -------- */
+
+/**
+ * VFIO_GROUP_GET_STATUS - _IOR(VFIO_TYPE, VFIO_BASE + 3,
+ *						struct vfio_group_status)
+ *
+ * Retrieve information about the group.  Fills in provided
+ * struct vfio_group_info.  Caller sets argsz.
+ * Return: 0 on succes, -errno on failure.
+ * Availability: Always
+ */
+struct vfio_group_status {
+	__u32	argsz;
+	__u32	flags;
+#define VFIO_GROUP_FLAGS_VIABLE		(1 << 0)
+#define VFIO_GROUP_FLAGS_CONTAINER_SET	(1 << 1)
+};
+#define VFIO_GROUP_GET_STATUS		_IO(VFIO_TYPE, VFIO_BASE + 3)
+
+/**
+ * VFIO_GROUP_SET_CONTAINER - _IOW(VFIO_TYPE, VFIO_BASE + 4, __s32)
+ *
+ * Set the container for the VFIO group to the open VFIO file
+ * descriptor provided.  Groups may only belong to a single
+ * container.  Containers may, at their discretion, support multiple
+ * groups.  Only when a container is set are all of the interfaces
+ * of the VFIO file descriptor and the VFIO group file descriptor
+ * available to the user.
+ * Return: 0 on success, -errno on failure.
+ * Availability: Always
+ */
+#define VFIO_GROUP_SET_CONTAINER	_IO(VFIO_TYPE, VFIO_BASE + 4)
+
+/**
+ * VFIO_GROUP_UNSET_CONTAINER - _IO(VFIO_TYPE, VFIO_BASE + 5)
+ *
+ * Remove the group from the attached container.  This is the
+ * opposite of the SET_CONTAINER call and returns the group to
+ * an initial state.  All device file descriptors must be released
+ * prior to calling this interface.  When removing the last group
+ * from a container, the IOMMU will be disabled and all state lost,
+ * effectively also returning the VFIO file descriptor to an initial
+ * state.
+ * Return: 0 on success, -errno on failure.
+ * Availability: When attached to container
+ */
+#define VFIO_GROUP_UNSET_CONTAINER	_IO(VFIO_TYPE, VFIO_BASE + 5)
+
+/**
+ * VFIO_GROUP_GET_DEVICE_FD - _IOW(VFIO_TYPE, VFIO_BASE + 6, char)
+ *
+ * Return a new file descriptor for the device object described by
+ * the provided string.  The string should match a device listed in
+ * the devices subdirectory of the IOMMU group sysfs entry.  The
+ * group containing the device must already be added to this context.
+ * Return: new file descriptor on success, -errno on failure.
+ * Availability: When attached to container
+ */
+#define VFIO_GROUP_GET_DEVICE_FD	_IO(VFIO_TYPE, VFIO_BASE + 6)
+
+/* --------------- IOCTLs for DEVICE file descriptors --------------- */
+
+/**
+ * VFIO_DEVICE_GET_INFO - _IOR(VFIO_TYPE, VFIO_BASE + 7,
+ *						struct vfio_device_info)
+ *
+ * Retrieve information about the device.  Fills in provided
+ * struct vfio_device_info.  Caller sets argsz.
+ * Return: 0 on success, -errno on failure.
+ */
+struct vfio_device_info {
+	__u32	argsz;
+	__u32	flags;
+#define VFIO_DEVICE_FLAGS_RESET	(1 << 0)	/* Device supports reset */
+#define VFIO_DEVICE_FLAGS_PCI	(1 << 1)	/* vfio-pci device */
+#define VFIO_DEVICE_FLAGS_PLATFORM (1 << 2)	/* vfio-platform device */
+#define VFIO_DEVICE_FLAGS_AMBA  (1 << 3)	/* vfio-amba device */
+#define VFIO_DEVICE_FLAGS_CCW	(1 << 4)	/* vfio-ccw device */
+#define VFIO_DEVICE_FLAGS_AP	(1 << 5)	/* vfio-ap device */
+#define VFIO_DEVICE_FLAGS_FSL_MC (1 << 6)	/* vfio-fsl-mc device */
+#define VFIO_DEVICE_FLAGS_CAPS	(1 << 7)	/* Info supports caps */
+	__u32	num_regions;	/* Max region index + 1 */
+	__u32	num_irqs;	/* Max IRQ index + 1 */
+	__u32   cap_offset;	/* Offset within info struct of first cap */
+};
+#define VFIO_DEVICE_GET_INFO		_IO(VFIO_TYPE, VFIO_BASE + 7)
+
+/*
+ * Vendor driver using Mediated device framework should provide device_api
+ * attribute in supported type attribute groups. Device API string should be one
+ * of the following corresponding to device flags in vfio_device_info structure.
+ */
+
+#define VFIO_DEVICE_API_PCI_STRING		"vfio-pci"
+#define VFIO_DEVICE_API_PLATFORM_STRING		"vfio-platform"
+#define VFIO_DEVICE_API_AMBA_STRING		"vfio-amba"
+#define VFIO_DEVICE_API_CCW_STRING		"vfio-ccw"
+#define VFIO_DEVICE_API_AP_STRING		"vfio-ap"
+
+/*
+ * The following capabilities are unique to s390 zPCI devices.  Their contents
+ * are further-defined in vfio_zdev.h
+ */
+#define VFIO_DEVICE_INFO_CAP_ZPCI_BASE		1
+#define VFIO_DEVICE_INFO_CAP_ZPCI_GROUP		2
+#define VFIO_DEVICE_INFO_CAP_ZPCI_UTIL		3
+#define VFIO_DEVICE_INFO_CAP_ZPCI_PFIP		4
+
+/**
+ * VFIO_DEVICE_GET_REGION_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 8,
+ *				       struct vfio_region_info)
+ *
+ * Retrieve information about a device region.  Caller provides
+ * struct vfio_region_info with index value set.  Caller sets argsz.
+ * Implementation of region mapping is bus driver specific.  This is
+ * intended to describe MMIO, I/O port, as well as bus specific
+ * regions (ex. PCI config space).  Zero sized regions may be used
+ * to describe unimplemented regions (ex. unimplemented PCI BARs).
+ * Return: 0 on success, -errno on failure.
+ */
+struct vfio_region_info {
+	__u32	argsz;
+	__u32	flags;
+#define VFIO_REGION_INFO_FLAG_READ	(1 << 0) /* Region supports read */
+#define VFIO_REGION_INFO_FLAG_WRITE	(1 << 1) /* Region supports write */
+#define VFIO_REGION_INFO_FLAG_MMAP	(1 << 2) /* Region supports mmap */
+#define VFIO_REGION_INFO_FLAG_CAPS	(1 << 3) /* Info supports caps */
+	__u32	index;		/* Region index */
+	__u32	cap_offset;	/* Offset within info struct of first cap */
+	__u64	size;		/* Region size (bytes) */
+	__u64	offset;		/* Region offset from start of device fd */
+};
+#define VFIO_DEVICE_GET_REGION_INFO	_IO(VFIO_TYPE, VFIO_BASE + 8)
+
+/*
+ * The sparse mmap capability allows finer granularity of specifying areas
+ * within a region with mmap support.  When specified, the user should only
+ * mmap the offset ranges specified by the areas array.  mmaps outside of the
+ * areas specified may fail (such as the range covering a PCI MSI-X table) or
+ * may result in improper device behavior.
+ *
+ * The structures below define version 1 of this capability.
+ */
+#define VFIO_REGION_INFO_CAP_SPARSE_MMAP	1
+
+struct vfio_region_sparse_mmap_area {
+	__u64	offset;	/* Offset of mmap'able area within region */
+	__u64	size;	/* Size of mmap'able area */
+};
+
+struct vfio_region_info_cap_sparse_mmap {
+	struct vfio_info_cap_header header;
+	__u32	nr_areas;
+	__u32	reserved;
+	struct vfio_region_sparse_mmap_area areas[];
+};
+
+/*
+ * The device specific type capability allows regions unique to a specific
+ * device or class of devices to be exposed.  This helps solve the problem for
+ * vfio bus drivers of defining which region indexes correspond to which region
+ * on the device, without needing to resort to static indexes, as done by
+ * vfio-pci.  For instance, if we were to go back in time, we might remove
+ * VFIO_PCI_VGA_REGION_INDEX and let vfio-pci simply define that all indexes
+ * greater than or equal to VFIO_PCI_NUM_REGIONS are device specific and we'd
+ * make a "VGA" device specific type to describe the VGA access space.  This
+ * means that non-VGA devices wouldn't need to waste this index, and thus the
+ * address space associated with it due to implementation of device file
+ * descriptor offsets in vfio-pci.
+ *
+ * The current implementation is now part of the user ABI, so we can't use this
+ * for VGA, but there are other upcoming use cases, such as opregions for Intel
+ * IGD devices and framebuffers for vGPU devices.  We missed VGA, but we'll
+ * use this for future additions.
+ *
+ * The structure below defines version 1 of this capability.
+ */
+#define VFIO_REGION_INFO_CAP_TYPE	2
+
+struct vfio_region_info_cap_type {
+	struct vfio_info_cap_header header;
+	__u32 type;	/* global per bus driver */
+	__u32 subtype;	/* type specific */
+};
+
+/*
+ * List of region types, global per bus driver.
+ * If you introduce a new type, please add it here.
+ */
+
+/* PCI region type containing a PCI vendor part */
+#define VFIO_REGION_TYPE_PCI_VENDOR_TYPE	(1 << 31)
+#define VFIO_REGION_TYPE_PCI_VENDOR_MASK	(0xffff)
+#define VFIO_REGION_TYPE_GFX                    (1)
+#define VFIO_REGION_TYPE_CCW			(2)
+#define VFIO_REGION_TYPE_MIGRATION              (3)
+
+/* sub-types for VFIO_REGION_TYPE_PCI_* */
+
+/* 8086 vendor PCI sub-types */
+#define VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION	(1)
+#define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG	(2)
+#define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG	(3)
+
+/* 10de vendor PCI sub-types */
+/*
+ * NVIDIA GPU NVlink2 RAM is coherent RAM mapped onto the host address space.
+ */
+#define VFIO_REGION_SUBTYPE_NVIDIA_NVLINK2_RAM	(1)
+
+/* 1014 vendor PCI sub-types */
+/*
+ * IBM NPU NVlink2 ATSD (Address Translation Shootdown) register of NPU
+ * to do TLB invalidation on a GPU.
+ */
+#define VFIO_REGION_SUBTYPE_IBM_NVLINK2_ATSD	(1)
+
+/* sub-types for VFIO_REGION_TYPE_GFX */
+#define VFIO_REGION_SUBTYPE_GFX_EDID            (1)
+
+/**
+ * struct vfio_region_gfx_edid - EDID region layout.
+ *
+ * Set display link state and EDID blob.
+ *
+ * The EDID blob has monitor information such as brand, name, serial
+ * number, physical size, supported video modes and more.
+ *
+ * This special region allows userspace (typically qemu) set a virtual
+ * EDID for the virtual monitor, which allows a flexible display
+ * configuration.
+ *
+ * For the edid blob spec look here:
+ *    https://en.wikipedia.org/wiki/Extended_Display_Identification_Data
+ *
+ * On linux systems you can find the EDID blob in sysfs:
+ *    /sys/class/drm/${card}/${connector}/edid
+ *
+ * You can use the edid-decode ulility (comes with xorg-x11-utils) to
+ * decode the EDID blob.
+ *
+ * @edid_offset: location of the edid blob, relative to the
+ *               start of the region (readonly).
+ * @edid_max_size: max size of the edid blob (readonly).
+ * @edid_size: actual edid size (read/write).
+ * @link_state: display link state (read/write).
+ * VFIO_DEVICE_GFX_LINK_STATE_UP: Monitor is turned on.
+ * VFIO_DEVICE_GFX_LINK_STATE_DOWN: Monitor is turned off.
+ * @max_xres: max display width (0 == no limitation, readonly).
+ * @max_yres: max display height (0 == no limitation, readonly).
+ *
+ * EDID update protocol:
+ *   (1) set link-state to down.
+ *   (2) update edid blob and size.
+ *   (3) set link-state to up.
+ */
+struct vfio_region_gfx_edid {
+	__u32 edid_offset;
+	__u32 edid_max_size;
+	__u32 edid_size;
+	__u32 max_xres;
+	__u32 max_yres;
+	__u32 link_state;
+#define VFIO_DEVICE_GFX_LINK_STATE_UP    1
+#define VFIO_DEVICE_GFX_LINK_STATE_DOWN  2
+};
+
+/* sub-types for VFIO_REGION_TYPE_CCW */
+#define VFIO_REGION_SUBTYPE_CCW_ASYNC_CMD	(1)
+#define VFIO_REGION_SUBTYPE_CCW_SCHIB		(2)
+#define VFIO_REGION_SUBTYPE_CCW_CRW		(3)
+
+/* sub-types for VFIO_REGION_TYPE_MIGRATION */
+#define VFIO_REGION_SUBTYPE_MIGRATION           (1)
+
+/*
+ * The structure vfio_device_migration_info is placed at the 0th offset of
+ * the VFIO_REGION_SUBTYPE_MIGRATION region to get and set VFIO device related
+ * migration information. Field accesses from this structure are only supported
+ * at their native width and alignment. Otherwise, the result is undefined and
+ * vendor drivers should return an error.
+ *
+ * device_state: (read/write)
+ *      - The user application writes to this field to inform the vendor driver
+ *        about the device state to be transitioned to.
+ *      - The vendor driver should take the necessary actions to change the
+ *        device state. After successful transition to a given state, the
+ *        vendor driver should return success on write(device_state, state)
+ *        system call. If the device state transition fails, the vendor driver
+ *        should return an appropriate -errno for the fault condition.
+ *      - On the user application side, if the device state transition fails,
+ *	  that is, if write(device_state, state) returns an error, read
+ *	  device_state again to determine the current state of the device from
+ *	  the vendor driver.
+ *      - The vendor driver should return previous state of the device unless
+ *        the vendor driver has encountered an internal error, in which case
+ *        the vendor driver may report the device_state VFIO_DEVICE_STATE_ERROR.
+ *      - The user application must use the device reset ioctl to recover the
+ *        device from VFIO_DEVICE_STATE_ERROR state. If the device is
+ *        indicated to be in a valid device state by reading device_state, the
+ *        user application may attempt to transition the device to any valid
+ *        state reachable from the current state or terminate itself.
+ *
+ *      device_state consists of 3 bits:
+ *      - If bit 0 is set, it indicates the _RUNNING state. If bit 0 is clear,
+ *        it indicates the _STOP state. When the device state is changed to
+ *        _STOP, driver should stop the device before write() returns.
+ *      - If bit 1 is set, it indicates the _SAVING state, which means that the
+ *        driver should start gathering device state information that will be
+ *        provided to the VFIO user application to save the device's state.
+ *      - If bit 2 is set, it indicates the _RESUMING state, which means that
+ *        the driver should prepare to resume the device. Data provided through
+ *        the migration region should be used to resume the device.
+ *      Bits 3 - 31 are reserved for future use. To preserve them, the user
+ *      application should perform a read-modify-write operation on this
+ *      field when modifying the specified bits.
+ *
+ *  +------- _RESUMING
+ *  |+------ _SAVING
+ *  ||+----- _RUNNING
+ *  |||
+ *  000b => Device Stopped, not saving or resuming
+ *  001b => Device running, which is the default state
+ *  010b => Stop the device & save the device state, stop-and-copy state
+ *  011b => Device running and save the device state, pre-copy state
+ *  100b => Device stopped and the device state is resuming
+ *  101b => Invalid state
+ *  110b => Error state
+ *  111b => Invalid state
+ *
+ * State transitions:
+ *
+ *              _RESUMING  _RUNNING    Pre-copy    Stop-and-copy   _STOP
+ *                (100b)     (001b)     (011b)        (010b)       (000b)
+ * 0. Running or default state
+ *                             |
+ *
+ * 1. Normal Shutdown (optional)
+ *                             |------------------------------------->|
+ *
+ * 2. Save the state or suspend
+ *                             |------------------------->|---------->|
+ *
+ * 3. Save the state during live migration
+ *                             |----------->|------------>|---------->|
+ *
+ * 4. Resuming
+ *                  |<---------|
+ *
+ * 5. Resumed
+ *                  |--------->|
+ *
+ * 0. Default state of VFIO device is _RUNNING when the user application starts.
+ * 1. During normal shutdown of the user application, the user application may
+ *    optionally change the VFIO device state from _RUNNING to _STOP. This
+ *    transition is optional. The vendor driver must support this transition but
+ *    must not require it.
+ * 2. When the user application saves state or suspends the application, the
+ *    device state transitions from _RUNNING to stop-and-copy and then to _STOP.
+ *    On state transition from _RUNNING to stop-and-copy, driver must stop the
+ *    device, save the device state and send it to the application through the
+ *    migration region. The sequence to be followed for such transition is given
+ *    below.
+ * 3. In live migration of user application, the state transitions from _RUNNING
+ *    to pre-copy, to stop-and-copy, and to _STOP.
+ *    On state transition from _RUNNING to pre-copy, the driver should start
+ *    gathering the device state while the application is still running and send
+ *    the device state data to application through the migration region.
+ *    On state transition from pre-copy to stop-and-copy, the driver must stop
+ *    the device, save the device state and send it to the user application
+ *    through the migration region.
+ *    Vendor drivers must support the pre-copy state even for implementations
+ *    where no data is provided to the user before the stop-and-copy state. The
+ *    user must not be required to consume all migration data before the device
+ *    transitions to a new state, including the stop-and-copy state.
+ *    The sequence to be followed for above two transitions is given below.
+ * 4. To start the resuming phase, the device state should be transitioned from
+ *    the _RUNNING to the _RESUMING state.
+ *    In the _RESUMING state, the driver should use the device state data
+ *    received through the migration region to resume the device.
+ * 5. After providing saved device data to the driver, the application should
+ *    change the state from _RESUMING to _RUNNING.
+ *
+ * reserved:
+ *      Reads on this field return zero and writes are ignored.
+ *
+ * pending_bytes: (read only)
+ *      The number of pending bytes still to be migrated from the vendor driver.
+ *
+ * data_offset: (read only)
+ *      The user application should read data_offset field from the migration
+ *      region. The user application should read the device data from this
+ *      offset within the migration region during the _SAVING state or write
+ *      the device data during the _RESUMING state. See below for details of
+ *      sequence to be followed.
+ *
+ * data_size: (read/write)
+ *      The user application should read data_size to get the size in bytes of
+ *      the data copied in the migration region during the _SAVING state and
+ *      write the size in bytes of the data copied in the migration region
+ *      during the _RESUMING state.
+ *
+ * The format of the migration region is as follows:
+ *  ------------------------------------------------------------------
+ * |vfio_device_migration_info|    data section                      |
+ * |                          |     ///////////////////////////////  |
+ * ------------------------------------------------------------------
+ *   ^                              ^
+ *  offset 0-trapped part        data_offset
+ *
+ * The structure vfio_device_migration_info is always followed by the data
+ * section in the region, so data_offset will always be nonzero. The offset
+ * from where the data is copied is decided by the kernel driver. The data
+ * section can be trapped, mmapped, or partitioned, depending on how the kernel
+ * driver defines the data section. The data section partition can be defined
+ * as mapped by the sparse mmap capability. If mmapped, data_offset must be
+ * page aligned, whereas initial section which contains the
+ * vfio_device_migration_info structure, might not end at the offset, which is
+ * page aligned. The user is not required to access through mmap regardless
+ * of the capabilities of the region mmap.
+ * The vendor driver should determine whether and how to partition the data
+ * section. The vendor driver should return data_offset accordingly.
+ *
+ * The sequence to be followed while in pre-copy state and stop-and-copy state
+ * is as follows:
+ * a. Read pending_bytes, indicating the start of a new iteration to get device
+ *    data. Repeated read on pending_bytes at this stage should have no side
+ *    effects.
+ *    If pending_bytes == 0, the user application should not iterate to get data
+ *    for that device.
+ *    If pending_bytes > 0, perform the following steps.
+ * b. Read data_offset, indicating that the vendor driver should make data
+ *    available through the data section. The vendor driver should return this
+ *    read operation only after data is available from (region + data_offset)
+ *    to (region + data_offset + data_size).
+ * c. Read data_size, which is the amount of data in bytes available through
+ *    the migration region.
+ *    Read on data_offset and data_size should return the offset and size of
+ *    the current buffer if the user application reads data_offset and
+ *    data_size more than once here.
+ * d. Read data_size bytes of data from (region + data_offset) from the
+ *    migration region.
+ * e. Process the data.
+ * f. Read pending_bytes, which indicates that the data from the previous
+ *    iteration has been read. If pending_bytes > 0, go to step b.
+ *
+ * The user application can transition from the _SAVING|_RUNNING
+ * (pre-copy state) to the _SAVING (stop-and-copy) state regardless of the
+ * number of pending bytes. The user application should iterate in _SAVING
+ * (stop-and-copy) until pending_bytes is 0.
+ *
+ * The sequence to be followed while _RESUMING device state is as follows:
+ * While data for this device is available, repeat the following steps:
+ * a. Read data_offset from where the user application should write data.
+ * b. Write migration data starting at the migration region + data_offset for
+ *    the length determined by data_size from the migration source.
+ * c. Write data_size, which indicates to the vendor driver that data is
+ *    written in the migration region. Vendor driver must return this write
+ *    operations on consuming data. Vendor driver should apply the
+ *    user-provided migration region data to the device resume state.
+ *
+ * If an error occurs during the above sequences, the vendor driver can return
+ * an error code for next read() or write() operation, which will terminate the
+ * loop. The user application should then take the next necessary action, for
+ * example, failing migration or terminating the user application.
+ *
+ * For the user application, data is opaque. The user application should write
+ * data in the same order as the data is received and the data should be of
+ * same transaction size at the source.
+ */
+
+struct vfio_device_migration_info {
+	__u32 device_state;         /* VFIO device state */
+#define VFIO_DEVICE_STATE_STOP      (0)
+#define VFIO_DEVICE_STATE_RUNNING   (1 << 0)
+#define VFIO_DEVICE_STATE_SAVING    (1 << 1)
+#define VFIO_DEVICE_STATE_RESUMING  (1 << 2)
+#define VFIO_DEVICE_STATE_MASK      (VFIO_DEVICE_STATE_RUNNING | \
+				     VFIO_DEVICE_STATE_SAVING |  \
+				     VFIO_DEVICE_STATE_RESUMING)
+
+#define VFIO_DEVICE_STATE_VALID(state) \
+	(state & VFIO_DEVICE_STATE_RESUMING ? \
+	(state & VFIO_DEVICE_STATE_MASK) == VFIO_DEVICE_STATE_RESUMING : 1)
+
+#define VFIO_DEVICE_STATE_IS_ERROR(state) \
+	((state & VFIO_DEVICE_STATE_MASK) == (VFIO_DEVICE_STATE_SAVING | \
+					      VFIO_DEVICE_STATE_RESUMING))
+
+#define VFIO_DEVICE_STATE_SET_ERROR(state) \
+	((state & ~VFIO_DEVICE_STATE_MASK) | VFIO_DEVICE_SATE_SAVING | \
+					     VFIO_DEVICE_STATE_RESUMING)
+
+	__u32 reserved;
+	__u64 pending_bytes;
+	__u64 data_offset;
+	__u64 data_size;
+};
+
+/*
+ * The MSIX mappable capability informs that MSIX data of a BAR can be mmapped
+ * which allows direct access to non-MSIX registers which happened to be within
+ * the same system page.
+ *
+ * Even though the userspace gets direct access to the MSIX data, the existing
+ * VFIO_DEVICE_SET_IRQS interface must still be used for MSIX configuration.
+ */
+#define VFIO_REGION_INFO_CAP_MSIX_MAPPABLE	3
+
+/*
+ * Capability with compressed real address (aka SSA - small system address)
+ * where GPU RAM is mapped on a system bus. Used by a GPU for DMA routing
+ * and by the userspace to associate a NVLink bridge with a GPU.
+ */
+#define VFIO_REGION_INFO_CAP_NVLINK2_SSATGT	4
+
+struct vfio_region_info_cap_nvlink2_ssatgt {
+	struct vfio_info_cap_header header;
+	__u64 tgt;
+};
+
+/*
+ * Capability with an NVLink link speed. The value is read by
+ * the NVlink2 bridge driver from the bridge's "ibm,nvlink-speed"
+ * property in the device tree. The value is fixed in the hardware
+ * and failing to provide the correct value results in the link
+ * not working with no indication from the driver why.
+ */
+#define VFIO_REGION_INFO_CAP_NVLINK2_LNKSPD	5
+
+struct vfio_region_info_cap_nvlink2_lnkspd {
+	struct vfio_info_cap_header header;
+	__u32 link_speed;
+	__u32 __pad;
+};
+
+/**
+ * VFIO_DEVICE_GET_IRQ_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 9,
+ *				    struct vfio_irq_info)
+ *
+ * Retrieve information about a device IRQ.  Caller provides
+ * struct vfio_irq_info with index value set.  Caller sets argsz.
+ * Implementation of IRQ mapping is bus driver specific.  Indexes
+ * using multiple IRQs are primarily intended to support MSI-like
+ * interrupt blocks.  Zero count irq blocks may be used to describe
+ * unimplemented interrupt types.
+ *
+ * The EVENTFD flag indicates the interrupt index supports eventfd based
+ * signaling.
+ *
+ * The MASKABLE flags indicates the index supports MASK and UNMASK
+ * actions described below.
+ *
+ * AUTOMASKED indicates that after signaling, the interrupt line is
+ * automatically masked by VFIO and the user needs to unmask the line
+ * to receive new interrupts.  This is primarily intended to distinguish
+ * level triggered interrupts.
+ *
+ * The NORESIZE flag indicates that the interrupt lines within the index
+ * are setup as a set and new subindexes cannot be enabled without first
+ * disabling the entire index.  This is used for interrupts like PCI MSI
+ * and MSI-X where the driver may only use a subset of the available
+ * indexes, but VFIO needs to enable a specific number of vectors
+ * upfront.  In the case of MSI-X, where the user can enable MSI-X and
+ * then add and unmask vectors, it's up to userspace to make the decision
+ * whether to allocate the maximum supported number of vectors or tear
+ * down setup and incrementally increase the vectors as each is enabled.
+ */
+struct vfio_irq_info {
+	__u32	argsz;
+	__u32	flags;
+#define VFIO_IRQ_INFO_EVENTFD		(1 << 0)
+#define VFIO_IRQ_INFO_MASKABLE		(1 << 1)
+#define VFIO_IRQ_INFO_AUTOMASKED	(1 << 2)
+#define VFIO_IRQ_INFO_NORESIZE		(1 << 3)
+	__u32	index;		/* IRQ index */
+	__u32	count;		/* Number of IRQs within this index */
+};
+#define VFIO_DEVICE_GET_IRQ_INFO	_IO(VFIO_TYPE, VFIO_BASE + 9)
+
+/**
+ * VFIO_DEVICE_SET_IRQS - _IOW(VFIO_TYPE, VFIO_BASE + 10, struct vfio_irq_set)
+ *
+ * Set signaling, masking, and unmasking of interrupts.  Caller provides
+ * struct vfio_irq_set with all fields set.  'start' and 'count' indicate
+ * the range of subindexes being specified.
+ *
+ * The DATA flags specify the type of data provided.  If DATA_NONE, the
+ * operation performs the specified action immediately on the specified
+ * interrupt(s).  For example, to unmask AUTOMASKED interrupt [0,0]:
+ * flags = (DATA_NONE|ACTION_UNMASK), index = 0, start = 0, count = 1.
+ *
+ * DATA_BOOL allows sparse support for the same on arrays of interrupts.
+ * For example, to mask interrupts [0,1] and [0,3] (but not [0,2]):
+ * flags = (DATA_BOOL|ACTION_MASK), index = 0, start = 1, count = 3,
+ * data = {1,0,1}
+ *
+ * DATA_EVENTFD binds the specified ACTION to the provided __s32 eventfd.
+ * A value of -1 can be used to either de-assign interrupts if already
+ * assigned or skip un-assigned interrupts.  For example, to set an eventfd
+ * to be trigger for interrupts [0,0] and [0,2]:
+ * flags = (DATA_EVENTFD|ACTION_TRIGGER), index = 0, start = 0, count = 3,
+ * data = {fd1, -1, fd2}
+ * If index [0,1] is previously set, two count = 1 ioctls calls would be
+ * required to set [0,0] and [0,2] without changing [0,1].
+ *
+ * Once a signaling mechanism is set, DATA_BOOL or DATA_NONE can be used
+ * with ACTION_TRIGGER to perform kernel level interrupt loopback testing
+ * from userspace (ie. simulate hardware triggering).
+ *
+ * Setting of an event triggering mechanism to userspace for ACTION_TRIGGER
+ * enables the interrupt index for the device.  Individual subindex interrupts
+ * can be disabled using the -1 value for DATA_EVENTFD or the index can be
+ * disabled as a whole with: flags = (DATA_NONE|ACTION_TRIGGER), count = 0.
+ *
+ * Note that ACTION_[UN]MASK specify user->kernel signaling (irqfds) while
+ * ACTION_TRIGGER specifies kernel->user signaling.
+ */
+struct vfio_irq_set {
+	__u32	argsz;
+	__u32	flags;
+#define VFIO_IRQ_SET_DATA_NONE		(1 << 0) /* Data not present */
+#define VFIO_IRQ_SET_DATA_BOOL		(1 << 1) /* Data is bool (u8) */
+#define VFIO_IRQ_SET_DATA_EVENTFD	(1 << 2) /* Data is eventfd (s32) */
+#define VFIO_IRQ_SET_ACTION_MASK	(1 << 3) /* Mask interrupt */
+#define VFIO_IRQ_SET_ACTION_UNMASK	(1 << 4) /* Unmask interrupt */
+#define VFIO_IRQ_SET_ACTION_TRIGGER	(1 << 5) /* Trigger interrupt */
+	__u32	index;
+	__u32	start;
+	__u32	count;
+	__u8	data[];
+};
+#define VFIO_DEVICE_SET_IRQS		_IO(VFIO_TYPE, VFIO_BASE + 10)
+
+#define VFIO_IRQ_SET_DATA_TYPE_MASK	(VFIO_IRQ_SET_DATA_NONE | \
+					 VFIO_IRQ_SET_DATA_BOOL | \
+					 VFIO_IRQ_SET_DATA_EVENTFD)
+#define VFIO_IRQ_SET_ACTION_TYPE_MASK	(VFIO_IRQ_SET_ACTION_MASK | \
+					 VFIO_IRQ_SET_ACTION_UNMASK | \
+					 VFIO_IRQ_SET_ACTION_TRIGGER)
+/**
+ * VFIO_DEVICE_RESET - _IO(VFIO_TYPE, VFIO_BASE + 11)
+ *
+ * Reset a device.
+ */
+#define VFIO_DEVICE_RESET		_IO(VFIO_TYPE, VFIO_BASE + 11)
+
+/*
+ * The VFIO-PCI bus driver makes use of the following fixed region and
+ * IRQ index mapping.  Unimplemented regions return a size of zero.
+ * Unimplemented IRQ types return a count of zero.
+ */
+
+enum {
+	VFIO_PCI_BAR0_REGION_INDEX,
+	VFIO_PCI_BAR1_REGION_INDEX,
+	VFIO_PCI_BAR2_REGION_INDEX,
+	VFIO_PCI_BAR3_REGION_INDEX,
+	VFIO_PCI_BAR4_REGION_INDEX,
+	VFIO_PCI_BAR5_REGION_INDEX,
+	VFIO_PCI_ROM_REGION_INDEX,
+	VFIO_PCI_CONFIG_REGION_INDEX,
+	/*
+	 * Expose VGA regions defined for PCI base class 03, subclass 00.
+	 * This includes I/O port ranges 0x3b0 to 0x3bb and 0x3c0 to 0x3df
+	 * as well as the MMIO range 0xa0000 to 0xbffff.  Each implemented
+	 * range is found at it's identity mapped offset from the region
+	 * offset, for example 0x3b0 is region_info.offset + 0x3b0.  Areas
+	 * between described ranges are unimplemented.
+	 */
+	VFIO_PCI_VGA_REGION_INDEX,
+	VFIO_PCI_NUM_REGIONS = 9 /* Fixed user ABI, region indexes >=9 use */
+				 /* device specific cap to define content. */
+};
+
+enum {
+	VFIO_PCI_INTX_IRQ_INDEX,
+	VFIO_PCI_MSI_IRQ_INDEX,
+	VFIO_PCI_MSIX_IRQ_INDEX,
+	VFIO_PCI_ERR_IRQ_INDEX,
+	VFIO_PCI_REQ_IRQ_INDEX,
+	VFIO_PCI_NUM_IRQS
+};
+
+/*
+ * The vfio-ccw bus driver makes use of the following fixed region and
+ * IRQ index mapping. Unimplemented regions return a size of zero.
+ * Unimplemented IRQ types return a count of zero.
+ */
+
+enum {
+	VFIO_CCW_CONFIG_REGION_INDEX,
+	VFIO_CCW_NUM_REGIONS
+};
+
+enum {
+	VFIO_CCW_IO_IRQ_INDEX,
+	VFIO_CCW_CRW_IRQ_INDEX,
+	VFIO_CCW_NUM_IRQS
+};
+
+/**
+ * VFIO_DEVICE_GET_PCI_HOT_RESET_INFO - _IORW(VFIO_TYPE, VFIO_BASE + 12,
+ *					      struct vfio_pci_hot_reset_info)
+ *
+ * Return: 0 on success, -errno on failure:
+ *	-enospc = insufficient buffer, -enodev = unsupported for device.
+ */
+struct vfio_pci_dependent_device {
+	__u32	group_id;
+	__u16	segment;
+	__u8	bus;
+	__u8	devfn; /* Use PCI_SLOT/PCI_FUNC */
+};
+
+struct vfio_pci_hot_reset_info {
+	__u32	argsz;
+	__u32	flags;
+	__u32	count;
+	struct vfio_pci_dependent_device	devices[];
+};
+
+#define VFIO_DEVICE_GET_PCI_HOT_RESET_INFO	_IO(VFIO_TYPE, VFIO_BASE + 12)
+
+/**
+ * VFIO_DEVICE_PCI_HOT_RESET - _IOW(VFIO_TYPE, VFIO_BASE + 13,
+ *				    struct vfio_pci_hot_reset)
+ *
+ * Return: 0 on success, -errno on failure.
+ */
+struct vfio_pci_hot_reset {
+	__u32	argsz;
+	__u32	flags;
+	__u32	count;
+	__s32	group_fds[];
+};
+
+#define VFIO_DEVICE_PCI_HOT_RESET	_IO(VFIO_TYPE, VFIO_BASE + 13)
+
+/**
+ * VFIO_DEVICE_QUERY_GFX_PLANE - _IOW(VFIO_TYPE, VFIO_BASE + 14,
+ *                                    struct vfio_device_query_gfx_plane)
+ *
+ * Set the drm_plane_type and flags, then retrieve the gfx plane info.
+ *
+ * flags supported:
+ * - VFIO_GFX_PLANE_TYPE_PROBE and VFIO_GFX_PLANE_TYPE_DMABUF are set
+ *   to ask if the mdev supports dma-buf. 0 on support, -EINVAL on no
+ *   support for dma-buf.
+ * - VFIO_GFX_PLANE_TYPE_PROBE and VFIO_GFX_PLANE_TYPE_REGION are set
+ *   to ask if the mdev supports region. 0 on support, -EINVAL on no
+ *   support for region.
+ * - VFIO_GFX_PLANE_TYPE_DMABUF or VFIO_GFX_PLANE_TYPE_REGION is set
+ *   with each call to query the plane info.
+ * - Others are invalid and return -EINVAL.
+ *
+ * Note:
+ * 1. Plane could be disabled by guest. In that case, success will be
+ *    returned with zero-initialized drm_format, size, width and height
+ *    fields.
+ * 2. x_hot/y_hot is set to 0xFFFFFFFF if no hotspot information available
+ *
+ * Return: 0 on success, -errno on other failure.
+ */
+struct vfio_device_gfx_plane_info {
+	__u32 argsz;
+	__u32 flags;
+#define VFIO_GFX_PLANE_TYPE_PROBE (1 << 0)
+#define VFIO_GFX_PLANE_TYPE_DMABUF (1 << 1)
+#define VFIO_GFX_PLANE_TYPE_REGION (1 << 2)
+	/* in */
+	__u32 drm_plane_type;	/* type of plane: DRM_PLANE_TYPE_* */
+	/* out */
+	__u32 drm_format;	/* drm format of plane */
+	__u64 drm_format_mod;   /* tiled mode */
+	__u32 width;	/* width of plane */
+	__u32 height;	/* height of plane */
+	__u32 stride;	/* stride of plane */
+	__u32 size;	/* size of plane in bytes, align on page*/
+	__u32 x_pos;	/* horizontal position of cursor plane */
+	__u32 y_pos;	/* vertical position of cursor plane*/
+	__u32 x_hot;    /* horizontal position of cursor hotspot */
+	__u32 y_hot;    /* vertical position of cursor hotspot */
+	union {
+		__u32 region_index;	/* region index */
+		__u32 dmabuf_id;	/* dma-buf id */
+	};
+};
+
+#define VFIO_DEVICE_QUERY_GFX_PLANE _IO(VFIO_TYPE, VFIO_BASE + 14)
+
+/**
+ * VFIO_DEVICE_GET_GFX_DMABUF - _IOW(VFIO_TYPE, VFIO_BASE + 15, __u32)
+ *
+ * Return a new dma-buf file descriptor for an exposed guest framebuffer
+ * described by the provided dmabuf_id. The dmabuf_id is returned from VFIO_
+ * DEVICE_QUERY_GFX_PLANE as a token of the exposed guest framebuffer.
+ */
+
+#define VFIO_DEVICE_GET_GFX_DMABUF _IO(VFIO_TYPE, VFIO_BASE + 15)
+
+/**
+ * VFIO_DEVICE_IOEVENTFD - _IOW(VFIO_TYPE, VFIO_BASE + 16,
+ *                              struct vfio_device_ioeventfd)
+ *
+ * Perform a write to the device at the specified device fd offset, with
+ * the specified data and width when the provided eventfd is triggered.
+ * vfio bus drivers may not support this for all regions, for all widths,
+ * or at all.  vfio-pci currently only enables support for BAR regions,
+ * excluding the MSI-X vector table.
+ *
+ * Return: 0 on success, -errno on failure.
+ */
+struct vfio_device_ioeventfd {
+	__u32	argsz;
+	__u32	flags;
+#define VFIO_DEVICE_IOEVENTFD_8		(1 << 0) /* 1-byte write */
+#define VFIO_DEVICE_IOEVENTFD_16	(1 << 1) /* 2-byte write */
+#define VFIO_DEVICE_IOEVENTFD_32	(1 << 2) /* 4-byte write */
+#define VFIO_DEVICE_IOEVENTFD_64	(1 << 3) /* 8-byte write */
+#define VFIO_DEVICE_IOEVENTFD_SIZE_MASK	(0xf)
+	__u64	offset;			/* device fd offset of write */
+	__u64	data;			/* data to be written */
+	__s32	fd;			/* -1 for de-assignment */
+};
+
+#define VFIO_DEVICE_IOEVENTFD		_IO(VFIO_TYPE, VFIO_BASE + 16)
+
+/**
+ * VFIO_DEVICE_FEATURE - _IORW(VFIO_TYPE, VFIO_BASE + 17,
+ *			       struct vfio_device_feature)
+ *
+ * Get, set, or probe feature data of the device.  The feature is selected
+ * using the FEATURE_MASK portion of the flags field.  Support for a feature
+ * can be probed by setting both the FEATURE_MASK and PROBE bits.  A probe
+ * may optionally include the GET and/or SET bits to determine read vs write
+ * access of the feature respectively.  Probing a feature will return success
+ * if the feature is supported and all of the optionally indicated GET/SET
+ * methods are supported.  The format of the data portion of the structure is
+ * specific to the given feature.  The data portion is not required for
+ * probing.  GET and SET are mutually exclusive, except for use with PROBE.
+ *
+ * Return 0 on success, -errno on failure.
+ */
+struct vfio_device_feature {
+	__u32	argsz;
+	__u32	flags;
+#define VFIO_DEVICE_FEATURE_MASK	(0xffff) /* 16-bit feature index */
+#define VFIO_DEVICE_FEATURE_GET		(1 << 16) /* Get feature into data[] */
+#define VFIO_DEVICE_FEATURE_SET		(1 << 17) /* Set feature from data[] */
+#define VFIO_DEVICE_FEATURE_PROBE	(1 << 18) /* Probe feature support */
+	__u8	data[];
+};
+
+#define VFIO_DEVICE_FEATURE		_IO(VFIO_TYPE, VFIO_BASE + 17)
+
+/*
+ * Provide support for setting a PCI VF Token, which is used as a shared
+ * secret between PF and VF drivers.  This feature may only be set on a
+ * PCI SR-IOV PF when SR-IOV is enabled on the PF and there are no existing
+ * open VFs.  Data provided when setting this feature is a 16-byte array
+ * (__u8 b[16]), representing a UUID.
+ */
+#define VFIO_DEVICE_FEATURE_PCI_VF_TOKEN	(0)
+
+/* -------- API for Type1 VFIO IOMMU -------- */
+
+/**
+ * VFIO_IOMMU_GET_INFO - _IOR(VFIO_TYPE, VFIO_BASE + 12, struct vfio_iommu_info)
+ *
+ * Retrieve information about the IOMMU object. Fills in provided
+ * struct vfio_iommu_info. Caller sets argsz.
+ *
+ * XXX Should we do these by CHECK_EXTENSION too?
+ */
+struct vfio_iommu_type1_info {
+	__u32	argsz;
+	__u32	flags;
+#define VFIO_IOMMU_INFO_PGSIZES (1 << 0)	/* supported page sizes info */
+#define VFIO_IOMMU_INFO_CAPS	(1 << 1)	/* Info supports caps */
+	__u64	iova_pgsizes;	/* Bitmap of supported page sizes */
+	__u32   cap_offset;	/* Offset within info struct of first cap */
+};
+
+/*
+ * The IOVA capability allows to report the valid IOVA range(s)
+ * excluding any non-relaxable reserved regions exposed by
+ * devices attached to the container. Any DMA map attempt
+ * outside the valid iova range will return error.
+ *
+ * The structures below define version 1 of this capability.
+ */
+#define VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE  1
+
+struct vfio_iova_range {
+	__u64	start;
+	__u64	end;
+};
+
+struct vfio_iommu_type1_info_cap_iova_range {
+	struct	vfio_info_cap_header header;
+	__u32	nr_iovas;
+	__u32	reserved;
+	struct	vfio_iova_range iova_ranges[];
+};
+
+/*
+ * The migration capability allows to report supported features for migration.
+ *
+ * The structures below define version 1 of this capability.
+ *
+ * The existence of this capability indicates that IOMMU kernel driver supports
+ * dirty page logging.
+ *
+ * pgsize_bitmap: Kernel driver returns bitmap of supported page sizes for dirty
+ * page logging.
+ * max_dirty_bitmap_size: Kernel driver returns maximum supported dirty bitmap
+ * size in bytes that can be used by user applications when getting the dirty
+ * bitmap.
+ */
+#define VFIO_IOMMU_TYPE1_INFO_CAP_MIGRATION  2
+
+struct vfio_iommu_type1_info_cap_migration {
+	struct	vfio_info_cap_header header;
+	__u32	flags;
+	__u64	pgsize_bitmap;
+	__u64	max_dirty_bitmap_size;		/* in bytes */
+};
+
+/*
+ * The DMA available capability allows to report the current number of
+ * simultaneously outstanding DMA mappings that are allowed.
+ *
+ * The structure below defines version 1 of this capability.
+ *
+ * avail: specifies the current number of outstanding DMA mappings allowed.
+ */
+#define VFIO_IOMMU_TYPE1_INFO_DMA_AVAIL 3
+
+struct vfio_iommu_type1_info_dma_avail {
+	struct	vfio_info_cap_header header;
+	__u32	avail;
+};
+
+#define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
+
+/**
+ * VFIO_IOMMU_MAP_DMA - _IOW(VFIO_TYPE, VFIO_BASE + 13, struct vfio_dma_map)
+ *
+ * Map process virtual addresses to IO virtual addresses using the
+ * provided struct vfio_dma_map. Caller sets argsz. READ &/ WRITE required.
+ */
+struct vfio_iommu_type1_dma_map {
+	__u32	argsz;
+	__u32	flags;
+#define VFIO_DMA_MAP_FLAG_READ (1 << 0)		/* readable from device */
+#define VFIO_DMA_MAP_FLAG_WRITE (1 << 1)	/* writable from device */
+	__u64	vaddr;				/* Process virtual address */
+	__u64	iova;				/* IO virtual address */
+	__u64	size;				/* Size of mapping (bytes) */
+};
+
+#define VFIO_IOMMU_MAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 13)
+
+struct vfio_bitmap {
+	__u64        pgsize;	/* page size for bitmap in bytes */
+	__u64        size;	/* in bytes */
+	__u64 __user *data;	/* one bit per page */
+};
+
+/**
+ * VFIO_IOMMU_UNMAP_DMA - _IOWR(VFIO_TYPE, VFIO_BASE + 14,
+ *							struct vfio_dma_unmap)
+ *
+ * Unmap IO virtual addresses using the provided struct vfio_dma_unmap.
+ * Caller sets argsz.  The actual unmapped size is returned in the size
+ * field.  No guarantee is made to the user that arbitrary unmaps of iova
+ * or size different from those used in the original mapping call will
+ * succeed.
+ * VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP should be set to get the dirty bitmap
+ * before unmapping IO virtual addresses. When this flag is set, the user must
+ * provide a struct vfio_bitmap in data[]. User must provide zero-allocated
+ * memory via vfio_bitmap.data and its size in the vfio_bitmap.size field.
+ * A bit in the bitmap represents one page, of user provided page size in
+ * vfio_bitmap.pgsize field, consecutively starting from iova offset. Bit set
+ * indicates that the page at that offset from iova is dirty. A Bitmap of the
+ * pages in the range of unmapped size is returned in the user-provided
+ * vfio_bitmap.data.
+ */
+struct vfio_iommu_type1_dma_unmap {
+	__u32	argsz;
+	__u32	flags;
+#define VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP (1 << 0)
+	__u64	iova;				/* IO virtual address */
+	__u64	size;				/* Size of mapping (bytes) */
+	__u8    data[];
+};
+
+#define VFIO_IOMMU_UNMAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 14)
+
+/*
+ * IOCTLs to enable/disable IOMMU container usage.
+ * No parameters are supported.
+ */
+#define VFIO_IOMMU_ENABLE	_IO(VFIO_TYPE, VFIO_BASE + 15)
+#define VFIO_IOMMU_DISABLE	_IO(VFIO_TYPE, VFIO_BASE + 16)
+
+/**
+ * VFIO_IOMMU_DIRTY_PAGES - _IOWR(VFIO_TYPE, VFIO_BASE + 17,
+ *                                     struct vfio_iommu_type1_dirty_bitmap)
+ * IOCTL is used for dirty pages logging.
+ * Caller should set flag depending on which operation to perform, details as
+ * below:
+ *
+ * Calling the IOCTL with VFIO_IOMMU_DIRTY_PAGES_FLAG_START flag set, instructs
+ * the IOMMU driver to log pages that are dirtied or potentially dirtied by
+ * the device; designed to be used when a migration is in progress. Dirty pages
+ * are logged until logging is disabled by user application by calling the IOCTL
+ * with VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP flag.
+ *
+ * Calling the IOCTL with VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP flag set, instructs
+ * the IOMMU driver to stop logging dirtied pages.
+ *
+ * Calling the IOCTL with VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP flag set
+ * returns the dirty pages bitmap for IOMMU container for a given IOVA range.
+ * The user must specify the IOVA range and the pgsize through the structure
+ * vfio_iommu_type1_dirty_bitmap_get in the data[] portion. This interface
+ * supports getting a bitmap of the smallest supported pgsize only and can be
+ * modified in future to get a bitmap of any specified supported pgsize. The
+ * user must provide a zeroed memory area for the bitmap memory and specify its
+ * size in bitmap.size. One bit is used to represent one page consecutively
+ * starting from iova offset. The user should provide page size in bitmap.pgsize
+ * field. A bit set in the bitmap indicates that the page at that offset from
+ * iova is dirty. The caller must set argsz to a value including the size of
+ * structure vfio_iommu_type1_dirty_bitmap_get, but excluding the size of the
+ * actual bitmap. If dirty pages logging is not enabled, an error will be
+ * returned.
+ *
+ * The VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP_NOCLEAR flag is almost same as
+ * VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP, except that it requires underlying
+ * dirty bitmap is not cleared automatically. The user can clear it manually by
+ * calling the IOCTL with VFIO_IOMMU_DIRTY_PAGES_FLAG_CLEAR_BITMAP flag set.
+ *
+ * Calling the IOCTL with VFIO_IOMMU_DIRTY_PAGES_FLAG_CLEAR_BITMAP flag set,
+ * instructs the IOMMU driver to clear the dirty status of pages in a bitmap
+ * for IOMMU container for a given IOVA range. The user must specify the IOVA
+ * range, the bitmap and the pgsize through the structure
+ * vfio_iommu_type1_dirty_bitmap_get in the data[] portion. This interface
+ * supports clearing a bitmap of the smallest supported pgsize only and can be
+ * modified in future to clear a bitmap of any specified supported pgsize. The
+ * user must provide a memory area for the bitmap memory and specify its size
+ * in bitmap.size. One bit is used to represent one page consecutively starting
+ * from iova offset. The user should provide page size in bitmap.pgsize field.
+ * A bit set in the bitmap indicates that the page at that offset from iova is
+ * cleared the dirty status, and dirty tracking is re-enabled for that page. The
+ * caller must set argsz to a value including the size of structure
+ * vfio_iommu_dirty_bitmap_get, but excluing the size of the actual bitmap. If
+ * dirty pages logging is not enabled, an error will be returned. Note: user
+ * should clear dirty log before handle corresponding dirty pages.
+ *
+ * Only one of the flags _START, _STOP, _GET, _GET_NOCLEAR_, and _CLEAR may be
+ * specified at a time.
+ */
+struct vfio_iommu_type1_dirty_bitmap {
+	__u32        argsz;
+	__u32        flags;
+#define VFIO_IOMMU_DIRTY_PAGES_FLAG_START	(1 << 0)
+#define VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP	(1 << 1)
+#define VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP	(1 << 2)
+#define VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP_NOCLEAR	(1 << 3)
+#define VFIO_IOMMU_DIRTY_PAGES_FLAG_CLEAR_BITMAP	(1 << 4)
+	__u8         data[];
+};
+
+struct vfio_iommu_type1_dirty_bitmap_get {
+	__u64              iova;	/* IO virtual address */
+	__u64              size;	/* Size of iova range */
+	struct vfio_bitmap bitmap;
+};
+
+#define VFIO_IOMMU_DIRTY_PAGES             _IO(VFIO_TYPE, VFIO_BASE + 17)
+
+/*
+ * VFIO_IOMMU_BIND_PROCESS
+ *
+ * Allocate a PASID for a process address space, and use it to attach this
+ * process to all devices in the container. Devices can then tag their DMA
+ * traffic with the returned @pasid to perform transactions on the associated
+ * virtual address space. Mapping and unmapping buffers is performed by standard
+ * functions such as mmap and malloc.
+ *
+ * If flag is VFIO_IOMMU_BIND_PID, @pid contains the pid of a foreign process to
+ * bind. Otherwise the current task is bound. Given that the caller owns the
+ * device, setting this flag grants the caller read and write permissions on the
+ * entire address space of foreign process described by @pid. Therefore,
+ * permission to perform the bind operation on a foreign process is governed by
+ * the ptrace access mode PTRACE_MODE_ATTACH_REALCREDS check. See man ptrace(2)
+ * for more information.
+ *
+ * On success, VFIO writes a Process Address Space ID (PASID) into @pasid. This
+ * ID is unique to a process and can be used on all devices in the container.
+ *
+ * On fork, the child inherits the device fd and can use the bonds setup by its
+ * parent. Consequently, the child has R/W access on the address spaces bound by
+ * its parent. After an execv, the device fd is closed and the child doesn't
+ * have access to the address space anymore.
+ *
+ * To remove a bond between process and container, VFIO_IOMMU_UNBIND ioctl is
+ * issued with the same parameters. If a pid was specified in VFIO_IOMMU_BIND,
+ * it should also be present for VFIO_IOMMU_UNBIND. Otherwise unbind the current
+ * task from the container.
+ */
+struct vfio_iommu_type1_bind_process {
+	__u32	flags;
+#define VFIO_IOMMU_BIND_PID		(1 << 0)
+	__u32	pasid;
+	__s32	pid;
+};
+
+/*
+ * Only mode supported at the moment is VFIO_IOMMU_BIND_PROCESS, which takes
+ * vfio_iommu_type1_bind_process in data.
+ */
+struct vfio_iommu_type1_bind {
+	__u32	argsz;
+	__u32	flags;
+#define VFIO_IOMMU_BIND_PROCESS		(1 << 0)
+	__u8	data[];
+};
+
+/*
+ * VFIO_IOMMU_BIND - _IOWR(VFIO_TYPE, VFIO_BASE + 22, struct vfio_iommu_bind)
+ *
+ * Manage address spaces of devices in this container. Initially a TYPE1
+ * container can only have one address space, managed with
+ * VFIO_IOMMU_MAP/UNMAP_DMA.
+ *
+ * An IOMMU of type VFIO_TYPE1_NESTING_IOMMU can be managed by both MAP/UNMAP
+ * and BIND ioctls at the same time. MAP/UNMAP acts on the stage-2 (host) page
+ * tables, and BIND manages the stage-1 (guest) page tables. Other types of
+ * IOMMU may allow MAP/UNMAP and BIND to coexist, where MAP/UNMAP controls
+ * non-PASID traffic and BIND controls PASID traffic. But this depends on the
+ * underlying IOMMU architecture and isn't guaranteed.
+ *
+ * Availability of this feature depends on the device, its bus, the underlying
+ * IOMMU and the CPU architecture.
+ *
+ * returns: 0 on success, -errno on failure.
+ */
+#define VFIO_IOMMU_BIND		_IO(VFIO_TYPE, VFIO_BASE + 22)
+
+/*
+ * VFIO_IOMMU_UNBIND - _IOWR(VFIO_TYPE, VFIO_BASE + 23, struct vfio_iommu_bind)
+ *
+ * Undo what was done by the corresponding VFIO_IOMMU_BIND ioctl.
+ */
+#define VFIO_IOMMU_UNBIND	_IO(VFIO_TYPE, VFIO_BASE + 23)
+
+/* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU -------- */
+
+/*
+ * The SPAPR TCE DDW info struct provides the information about
+ * the details of Dynamic DMA window capability.
+ *
+ * @pgsizes contains a page size bitmask, 4K/64K/16M are supported.
+ * @max_dynamic_windows_supported tells the maximum number of windows
+ * which the platform can create.
+ * @levels tells the maximum number of levels in multi-level IOMMU tables;
+ * this allows splitting a table into smaller chunks which reduces
+ * the amount of physically contiguous memory required for the table.
+ */
+struct vfio_iommu_spapr_tce_ddw_info {
+	__u64 pgsizes;			/* Bitmap of supported page sizes */
+	__u32 max_dynamic_windows_supported;
+	__u32 levels;
+};
+
+/*
+ * The SPAPR TCE info struct provides the information about the PCI bus
+ * address ranges available for DMA, these values are programmed into
+ * the hardware so the guest has to know that information.
+ *
+ * The DMA 32 bit window start is an absolute PCI bus address.
+ * The IOVA address passed via map/unmap ioctls are absolute PCI bus
+ * addresses too so the window works as a filter rather than an offset
+ * for IOVA addresses.
+ *
+ * Flags supported:
+ * - VFIO_IOMMU_SPAPR_INFO_DDW: informs the userspace that dynamic DMA windows
+ *   (DDW) support is present. @ddw is only supported when DDW is present.
+ */
+struct vfio_iommu_spapr_tce_info {
+	__u32 argsz;
+	__u32 flags;
+#define VFIO_IOMMU_SPAPR_INFO_DDW	(1 << 0)	/* DDW supported */
+	__u32 dma32_window_start;	/* 32 bit window start (bytes) */
+	__u32 dma32_window_size;	/* 32 bit window size (bytes) */
+	struct vfio_iommu_spapr_tce_ddw_info ddw;
+};
+
+#define VFIO_IOMMU_SPAPR_TCE_GET_INFO	_IO(VFIO_TYPE, VFIO_BASE + 12)
+
+/*
+ * EEH PE operation struct provides ways to:
+ * - enable/disable EEH functionality;
+ * - unfreeze IO/DMA for frozen PE;
+ * - read PE state;
+ * - reset PE;
+ * - configure PE;
+ * - inject EEH error.
+ */
+struct vfio_eeh_pe_err {
+	__u32 type;
+	__u32 func;
+	__u64 addr;
+	__u64 mask;
+};
+
+struct vfio_eeh_pe_op {
+	__u32 argsz;
+	__u32 flags;
+	__u32 op;
+	union {
+		struct vfio_eeh_pe_err err;
+	};
+};
+
+#define VFIO_EEH_PE_DISABLE		0	/* Disable EEH functionality */
+#define VFIO_EEH_PE_ENABLE		1	/* Enable EEH functionality  */
+#define VFIO_EEH_PE_UNFREEZE_IO		2	/* Enable IO for frozen PE   */
+#define VFIO_EEH_PE_UNFREEZE_DMA	3	/* Enable DMA for frozen PE  */
+#define VFIO_EEH_PE_GET_STATE		4	/* PE state retrieval        */
+#define  VFIO_EEH_PE_STATE_NORMAL	0	/* PE in functional state    */
+#define  VFIO_EEH_PE_STATE_RESET	1	/* PE reset in progress      */
+#define  VFIO_EEH_PE_STATE_STOPPED	2	/* Stopped DMA and IO        */
+#define  VFIO_EEH_PE_STATE_STOPPED_DMA	4	/* Stopped DMA only          */
+#define  VFIO_EEH_PE_STATE_UNAVAIL	5	/* State unavailable         */
+#define VFIO_EEH_PE_RESET_DEACTIVATE	5	/* Deassert PE reset         */
+#define VFIO_EEH_PE_RESET_HOT		6	/* Assert hot reset          */
+#define VFIO_EEH_PE_RESET_FUNDAMENTAL	7	/* Assert fundamental reset  */
+#define VFIO_EEH_PE_CONFIGURE		8	/* PE configuration          */
+#define VFIO_EEH_PE_INJECT_ERR		9	/* Inject EEH error          */
+
+#define VFIO_EEH_PE_OP			_IO(VFIO_TYPE, VFIO_BASE + 21)
+
+/**
+ * VFIO_IOMMU_SPAPR_REGISTER_MEMORY - _IOW(VFIO_TYPE, VFIO_BASE + 17, struct vfio_iommu_spapr_register_memory)
+ *
+ * Registers user space memory where DMA is allowed. It pins
+ * user pages and does the locked memory accounting so
+ * subsequent VFIO_IOMMU_MAP_DMA/VFIO_IOMMU_UNMAP_DMA calls
+ * get faster.
+ */
+struct vfio_iommu_spapr_register_memory {
+	__u32	argsz;
+	__u32	flags;
+	__u64	vaddr;				/* Process virtual address */
+	__u64	size;				/* Size of mapping (bytes) */
+};
+#define VFIO_IOMMU_SPAPR_REGISTER_MEMORY	_IO(VFIO_TYPE, VFIO_BASE + 17)
+
+/**
+ * VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY - _IOW(VFIO_TYPE, VFIO_BASE + 18, struct vfio_iommu_spapr_register_memory)
+ *
+ * Unregisters user space memory registered with
+ * VFIO_IOMMU_SPAPR_REGISTER_MEMORY.
+ * Uses vfio_iommu_spapr_register_memory for parameters.
+ */
+#define VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY	_IO(VFIO_TYPE, VFIO_BASE + 18)
+
+/**
+ * VFIO_IOMMU_SPAPR_TCE_CREATE - _IOWR(VFIO_TYPE, VFIO_BASE + 19, struct vfio_iommu_spapr_tce_create)
+ *
+ * Creates an additional TCE table and programs it (sets a new DMA window)
+ * to every IOMMU group in the container. It receives page shift, window
+ * size and number of levels in the TCE table being created.
+ *
+ * It allocates and returns an offset on a PCI bus of the new DMA window.
+ */
+struct vfio_iommu_spapr_tce_create {
+	__u32 argsz;
+	__u32 flags;
+	/* in */
+	__u32 page_shift;
+	__u32 __resv1;
+	__u64 window_size;
+	__u32 levels;
+	__u32 __resv2;
+	/* out */
+	__u64 start_addr;
+};
+#define VFIO_IOMMU_SPAPR_TCE_CREATE	_IO(VFIO_TYPE, VFIO_BASE + 19)
+
+/**
+ * VFIO_IOMMU_SPAPR_TCE_REMOVE - _IOW(VFIO_TYPE, VFIO_BASE + 20, struct vfio_iommu_spapr_tce_remove)
+ *
+ * Unprograms a TCE table from all groups in the container and destroys it.
+ * It receives a PCI bus offset as a window id.
+ */
+struct vfio_iommu_spapr_tce_remove {
+	__u32 argsz;
+	__u32 flags;
+	/* in */
+	__u64 start_addr;
+};
+#define VFIO_IOMMU_SPAPR_TCE_REMOVE	_IO(VFIO_TYPE, VFIO_BASE + 20)
+
+/* ***************************************************************** */
+
+#endif /* _UAPIVFIO_H */
diff --git a/KAEKernelDriver/KAEKernelDriver-OLK-5.4/Makefile b/KAEKernelDriver/KAEKernelDriver-OLK-5.4/Makefile
index 1ad372c..cecd57f 100644
--- a/KAEKernelDriver/KAEKernelDriver-OLK-5.4/Makefile
+++ b/KAEKernelDriver/KAEKernelDriver-OLK-5.4/Makefile
@@ -1,10 +1,10 @@
 KERNEL_VERSION_BY_BUILDENV := `uname -r`
 KERNEL_PATH := /lib/modules/$(KERNEL_VERSION_BY_BUILDENV)/build
-KSP :=	$(shell if test -d /lib/modules/$(KERNEL_VERSION_BY_BUILDENV)/source; then  \
-                    echo /lib/modules/$(KERNEL_VERSION_BY_BUILDENV)/source; \
-	        else  \
-                    echo /lib/modules/$(KERNEL_VERSION_BY_BUILDENV)/build;  \
-                fi)
+# KSP :=	$(shell if test -d /lib/modules/$(KERNEL_VERSION_BY_BUILDENV)/source; then  \
+#                     echo /lib/modules/$(KERNEL_VERSION_BY_BUILDENV)/source; \
+# 	        else  \
+#                     echo /lib/modules/$(KERNEL_VERSION_BY_BUILDENV)/build;  \
+#                 fi)
 
 obj-m += uacce/
 obj-m += hisilicon/
@@ -13,17 +13,25 @@ DIRS := $(shell find . -maxdepth 3 -type d)
 TARGET = $(foreach dir,$(DIRS),$(wildcard \
 	$(dir)/*.o) $(dir)/*.ko $(dir)/*.tmp_versions $(dir)/*.depend $(dir)/*.mod.c $(dir)/*.order $(dir)/*.symvers)
 
+CONFIG_FLAGS = CONFIG_CC_STACKPROTECTOR_STRONG=y \
+	CONFIG_UACCE=m \
+	CONFIG_CRYPTO_QM_UACCE=m \
+	CONFIG_CRYPTO_DEV_HISI_SGL=m \
+	CONFIG_CRYPTO_DEV_HISI_QM=m \
+	CONFIG_CRYPTO_DEV_HISI_ZIP=m \
+	CONFIG_CRYPTO_DEV_HISI_HPRE=m \
+	CONFIG_CRYPTO_DEV_HISI_SEC2=m \
+	CONFIG_CRYPTO_DEV_HISI_TRNG=m
+
+ifeq ($(ENABLE_MIGRATION), y)
+CONFIG_FLAGS += CONFIG_CRYPTO_DEV_HISI_MIGRATION=m
+else
+CONFIG_FLAGS += CONFIG_CRYPTO_DEV_HISI_MIGRATION=n
+endif
+
 default: 
-	$(MAKE) -C $(KERNEL_PATH) M=$(shell pwd) modules \
-		CONFIG_CC_STACKPROTECTOR_STRONG=y \
-		CONFIG_UACCE=m \
-		CONFIG_CRYPTO_QM_UACCE=m \
-		CONFIG_CRYPTO_DEV_HISI_SGL=m \
-		CONFIG_CRYPTO_DEV_HISI_QM=m \
-		CONFIG_CRYPTO_DEV_HISI_ZIP=m \
-		CONFIG_CRYPTO_DEV_HISI_HPRE=m \
-		CONFIG_CRYPTO_DEV_HISI_SEC2=m \
-		CONFIG_CRYPTO_DEV_HISI_TRNG=m
+	$(MAKE) -C $(KERNEL_PATH) M=$(shell pwd) modules $(CONFIG_FLAGS)
+
 #copy:
 #	cp -f $(shell pwd)/include_linux/uacce.h $(KSP)/include/linux
 #	cp -f $(shell pwd)/include_uapi_linux/uacce.h $(KSP)/include/uapi/linux
@@ -40,6 +48,9 @@ install:
 	-modprobe hisi_sec2 uacce_mode=1 pf_q_num=256
 	-modprobe hisi_hpre uacce_mode=1 pf_q_num=256
 	-modprobe hisi_zip  uacce_mode=1 pf_q_num=256
+	$(shell if [ "$(ENABLE_MIGRATION)" = "y" ]; then \
+		modprobe hisi_migration; \
+	fi)
 	-echo "options hisi_sec2 uacce_mode=1 pf_q_num=256" > /etc/modprobe.d/hisi_sec2.conf
 	-echo "options hisi_hpre uacce_mode=1 pf_q_num=256" > /etc/modprobe.d/hisi_hpre.conf
 	-echo "options hisi_zip  uacce_mode=1 pf_q_num=256" > /etc/modprobe.d/hisi_zip.conf
@@ -61,6 +72,9 @@ check:
 	done
 
 uninstall:
+	$(shell if [ "$(ENABLE_MIGRATION)" = "y" ]; then \
+		modprobe -r hisi_migration; \
+	fi)
 	modprobe -r hisi_zip
 	modprobe -r hisi_hpre
 	modprobe -r hisi_sec2
@@ -75,6 +89,9 @@ uninstall:
 	rm -rf /lib/modules/$(KERNEL_VERSION_BY_BUILDENV)/extra/hisi_sec2.ko
 	rm -rf /lib/modules/$(KERNEL_VERSION_BY_BUILDENV)/extra/hisi_hpre.ko
 	rm -rf /lib/modules/$(KERNEL_VERSION_BY_BUILDENV)/extra/hisi_zip.ko
+	$(shell if [ "$(ENABLE_MIGRATION)" = "y" ]; then \
+		rm -rf /lib/modules/$(KERNEL_VERSION_BY_BUILDENV)/extra/hisi_migration.ko; \
+	fi)
 
 nosva:
 	$(shell mkdir -p /lib/modules/$(KERNEL_VERSION_BY_BUILDENV)/extra)
@@ -88,7 +105,9 @@ nosva:
 	-modprobe hisi_sec2 uacce_mode=2 pf_q_num=256
 	-modprobe hisi_hpre uacce_mode=2 pf_q_num=256
 	-modprobe hisi_zip  uacce_mode=2 pf_q_num=256
-
+	$(shell if [ "$(ENABLE_MIGRATION)" = "y" ]; then \
+		modprobe hisi_migration; \
+	fi)
 	-echo "options hisi_sec2 uacce_mode=2 pf_q_num=256" > /etc/modprobe.d/hisi_sec2.conf
 	-echo "options hisi_hpre uacce_mode=2 pf_q_num=256" > /etc/modprobe.d/hisi_hpre.conf
 	-echo "options hisi_zip  uacce_mode=2 pf_q_num=256" > /etc/modprobe.d/hisi_zip.conf
diff --git a/KAEKernelDriver/KAEKernelDriver-OLK-5.4/hisilicon/Makefile b/KAEKernelDriver/KAEKernelDriver-OLK-5.4/hisilicon/Makefile
index a62965b..19e6627 100644
--- a/KAEKernelDriver/KAEKernelDriver-OLK-5.4/hisilicon/Makefile
+++ b/KAEKernelDriver/KAEKernelDriver-OLK-5.4/hisilicon/Makefile
@@ -4,3 +4,4 @@ obj-$(CONFIG_CRYPTO_DEV_HISI_SEC2) += sec2/
 obj-$(CONFIG_CRYPTO_DEV_HISI_QM) += hisi_qm.o
 hisi_qm-objs = qm.o sgl.o debugfs.o
 obj-$(CONFIG_CRYPTO_DEV_HISI_ZIP) += zip/
+obj-$(CONFIG_CRYPTO_DEV_HISI_MIGRATION) += migration/
\ No newline at end of file
diff --git a/KAEKernelDriver/KAEKernelDriver-OLK-5.4/hisilicon/migration/Makefile b/KAEKernelDriver/KAEKernelDriver-OLK-5.4/hisilicon/migration/Makefile
new file mode 100644
index 0000000..c2c5219
--- /dev/null
+++ b/KAEKernelDriver/KAEKernelDriver-OLK-5.4/hisilicon/migration/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_CRYPTO_DEV_HISI_MIGRATION) += hisi_migration.o
+hisi_migration-objs = acc_vf_migration.o
\ No newline at end of file
diff --git a/KAEKernelDriver/KAEKernelDriver-OLK-5.4/hisilicon/migration/acc_vf_migration.c b/KAEKernelDriver/KAEKernelDriver-OLK-5.4/hisilicon/migration/acc_vf_migration.c
new file mode 100644
index 0000000..8a7196a
--- /dev/null
+++ b/KAEKernelDriver/KAEKernelDriver-OLK-5.4/hisilicon/migration/acc_vf_migration.c
@@ -0,0 +1,1719 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 HiSilicon Limited. */
+
+#include <linux/device.h>
+#include <linux/debugfs.h>
+#include <linux/eventfd.h>
+#include <linux/file.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/sysfs.h>
+#include <linux/vfio.h>
+
+#include "acc_vf_migration.h"
+
+#define VDM_OFFSET(x) offsetof(struct vfio_device_migration_info, x)
+static struct dentry *mig_debugfs_root;
+static int mig_root_ref;
+
+/* return 0 mailbox ready, -ETIMEDOUT hardware timeout */
+static int qm_wait_mb_ready(struct hisi_qm *qm)
+{
+	u32 val;
+
+	return readl_relaxed_poll_timeout(qm->io_base + QM_MB_CMD_SEND_BASE,
+					  val, !((val >> QM_MB_BUSY_SHIFT) &
+					  0x1), POLL_PERIOD, POLL_TIMEOUT);
+}
+
+/* return 0 VM acc device ready, -ETIMEDOUT hardware timeout */
+static int qm_wait_dev_ready(struct hisi_qm *qm)
+{
+	u32 val;
+
+	return readl_relaxed_poll_timeout(qm->io_base + QM_VF_STATE,
+				val, !(val & 0x1), POLL_PERIOD, POLL_TIMEOUT);
+}
+
+
+/* 128 bit should be written to hardware at one time to trigger a mailbox */
+static void qm_mb_write(struct hisi_qm *qm, const void *src)
+{
+	void __iomem *fun_base = qm->io_base + QM_MB_CMD_SEND_BASE;
+	unsigned long tmp0 = 0;
+	unsigned long tmp1 = 0;
+
+	if (!IS_ENABLED(CONFIG_ARM64)) {
+		memcpy_toio(fun_base, src, 16);
+		wmb();
+		return;
+	}
+
+	asm volatile("ldp %0, %1, %3\n"
+		     "stp %0, %1, %2\n"
+		     "dsb sy\n"
+		     : "=&r" (tmp0),
+		       "=&r" (tmp1),
+		       "+Q" (*((char __iomem *)fun_base))
+		     : "Q" (*((char *)src))
+		     : "memory");
+}
+
+static void qm_mb_pre_init(struct qm_mailbox *mailbox, u8 cmd,
+			   u16 queue, bool op)
+{
+	mailbox->w0 = cpu_to_le16(cmd |
+		     (op ? 0x1 << QM_MB_OP_SHIFT : 0) |
+		     (0x1 << QM_MB_BUSY_SHIFT));
+	mailbox->queue_num = cpu_to_le16(queue);
+	mailbox->rsvd = 0;
+}
+
+static int qm_mb_nolock(struct hisi_qm *qm, struct qm_mailbox *mailbox)
+{
+	int cnt = 0;
+
+	if (unlikely(qm_wait_mb_ready(qm))) {
+		dev_err(&qm->pdev->dev, "QM mailbox is busy to start!\n");
+		return -EBUSY;
+	}
+
+	qm_mb_write(qm, mailbox);
+	while (true) {
+		if (!qm_wait_mb_ready(qm))
+			break;
+		if (++cnt > QM_MB_MAX_WAIT_CNT) {
+			dev_err(&qm->pdev->dev, "QM mailbox operation timeout!\n");
+			return -EBUSY;
+		}
+	}
+	return 0;
+}
+
+static int qm_mb(struct hisi_qm *qm, u8 cmd, dma_addr_t dma_addr, u16 queue,
+		 bool op)
+{
+	struct qm_mailbox mailbox;
+	int ret;
+
+	dev_dbg(&qm->pdev->dev, "QM mailbox request to q%u: %u-0x%llx\n",
+		queue, cmd, (unsigned long long)dma_addr);
+
+	qm_mb_pre_init(&mailbox, cmd, queue, op);
+	mailbox.base_l = cpu_to_le32(lower_32_bits(dma_addr));
+	mailbox.base_h = cpu_to_le32(upper_32_bits(dma_addr));
+
+	mutex_lock(&qm->mailbox_lock);
+	ret = qm_mb_nolock(qm, &mailbox);
+	mutex_unlock(&qm->mailbox_lock);
+
+	return ret;
+}
+
+/*
+ * Each state Reg is checked 100 times,
+ * with a delay of 100 microseconds after each check
+ */
+static u32 acc_check_reg_state(struct hisi_qm *qm, u32 regs)
+{
+	int check_times = 0;
+	u32 state;
+
+	state = readl(qm->io_base + regs);
+	while (state && check_times < ERROR_CHECK_TIMEOUT) {
+		udelay(CHECK_DELAY_TIME);
+		state = readl(qm->io_base + regs);
+		check_times++;
+	}
+
+	return state;
+}
+
+/* Check the  PF's RAS state and Function INT state */
+static int qm_check_int_state(struct acc_vf_migration *acc_vf_dev)
+{
+	struct hisi_qm *vfqm = acc_vf_dev->vf_qm;
+	struct hisi_qm *qm = acc_vf_dev->pf_qm;
+	struct device *dev = &qm->pdev->dev;
+	u32 state;
+
+	/* Check RAS state */
+	state = acc_check_reg_state(qm, QM_ABNORMAL_INT_STATUS);
+	if (state) {
+		dev_err(dev, "failed to check QM RAS state!\n");
+		return -EBUSY;
+	}
+
+	/* Check Function Communication  state between PF and VF */
+	state = acc_check_reg_state(vfqm, QM_IFC_INT_STATUS);
+	if (state) {
+		dev_err(dev, "failed to check QM IFC INT state!\n");
+		return -EBUSY;
+	}
+	state = acc_check_reg_state(vfqm, QM_IFC_INT_SET_V);
+	if (state) {
+		dev_err(dev, "failed to check QM IFC INT SET state!\n");
+		return -EBUSY;
+	}
+
+	/* Check submodule task state */
+	switch (acc_vf_dev->acc_type) {
+	case HISI_SEC:
+		state = acc_check_reg_state(qm, SEC_CORE_INT_STATUS);
+		if (state) {
+			dev_err(dev, "failed to check QM SEC Core INT state!\n");
+			return -EBUSY;
+		}
+		break;
+	case HISI_HPRE:
+		state = acc_check_reg_state(qm, HPRE_HAC_INT_STATUS);
+		if (state) {
+			dev_err(dev, "failed to check QM HPRE HAC INT state!\n");
+			return -EBUSY;
+		}
+		break;
+	case HISI_ZIP:
+		state = acc_check_reg_state(qm, HZIP_CORE_INT_STATUS);
+		if (state) {
+			dev_err(dev, "failed to check QM ZIP Core INT state!\n");
+			return -EBUSY;
+		}
+		break;
+	default:
+		dev_err(dev, "failed to detect acc module type!\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int qm_read_reg(struct hisi_qm *qm, u32 reg_addr,
+			 u32 *data, u8 nums)
+{
+	int i;
+
+	if (nums < 1 || nums > QM_REGS_MAX_LEN) {
+		dev_err(&qm->pdev->dev, "QM read input parameter is error!\n");
+		return -EINVAL;
+	}
+
+	for (i = 0; i < nums; i++) {
+		data[i] = readl(qm->io_base + reg_addr);
+		reg_addr += QM_REG_ADDR_OFFSET;
+	}
+
+	return 0;
+}
+
+static int qm_write_reg(struct hisi_qm *qm, u32 reg_addr,
+			 u32 *data, u8 nums)
+{
+	int i;
+
+	if (nums < 1 || nums > QM_REGS_MAX_LEN) {
+		dev_err(&qm->pdev->dev, "QM write input parameter is error!\n");
+		return -EINVAL;
+	}
+
+	for (i = 0; i < nums; i++) {
+		writel(data[i], qm->io_base + reg_addr);
+		reg_addr += QM_REG_ADDR_OFFSET;
+	}
+
+	return 0;
+}
+
+static int qm_get_vft(struct hisi_qm *qm, u32 *base, u32 *number)
+{
+	u64 sqc_vft;
+	int ret;
+
+	ret = qm_mb(qm, QM_MB_CMD_SQC_VFT_V2, 0, 0, 1);
+	if (ret)
+		return ret;
+
+	sqc_vft = readl(qm->io_base + QM_MB_CMD_DATA_ADDR_L) |
+		  ((u64)readl(qm->io_base + QM_MB_CMD_DATA_ADDR_H) <<
+		  QM_XQC_ADDR_OFFSET);
+	*base = QM_SQC_VFT_BASE_MASK_V2 & (sqc_vft >> QM_SQC_VFT_BASE_SHIFT_V2);
+	*number = (QM_SQC_VFT_NUM_MASK_V2 &
+		  (sqc_vft >> QM_SQC_VFT_NUM_SHIFT_V2)) + 1;
+
+	return 0;
+}
+
+static int qm_get_sqc(struct hisi_qm *qm, u64 *addr)
+{
+	int ret;
+
+	ret = qm_mb(qm, QM_MB_CMD_SQC_BT, 0, 0, 1);
+	if (ret)
+		return ret;
+
+	*addr = readl(qm->io_base + QM_MB_CMD_DATA_ADDR_L) |
+		  ((u64)readl(qm->io_base + QM_MB_CMD_DATA_ADDR_H) <<
+		  QM_XQC_ADDR_OFFSET);
+
+	return 0;
+}
+
+static int qm_get_cqc(struct hisi_qm *qm, u64 *addr)
+{
+	int ret;
+
+	ret = qm_mb(qm, QM_MB_CMD_CQC_BT, 0, 0, 1);
+	if (ret)
+		return ret;
+
+	*addr = readl(qm->io_base + QM_MB_CMD_DATA_ADDR_L) |
+		  ((u64)readl(qm->io_base + QM_MB_CMD_DATA_ADDR_H) <<
+		  QM_XQC_ADDR_OFFSET);
+
+	return 0;
+}
+
+static int qm_rw_regs_read(struct hisi_qm *qm, struct acc_vf_data *vf_data)
+{
+	struct device *dev = &qm->pdev->dev;
+	int ret;
+
+	ret = qm_read_reg(qm, QM_VF_AEQ_INT_MASK, &vf_data->aeq_int_mask, 1);
+	if (ret) {
+		dev_err(dev, "failed to read QM_VF_AEQ_INT_MASK!\n");
+		return ret;
+	}
+
+	ret = qm_read_reg(qm, QM_VF_EQ_INT_MASK, &vf_data->eq_int_mask, 1);
+	if (ret) {
+		dev_err(dev, "failed to read QM_VF_EQ_INT_MASK!\n");
+		return ret;
+	}
+
+	ret = qm_read_reg(qm, QM_IFC_INT_SOURCE_V,
+			   &vf_data->ifc_int_source, 1);
+	if (ret) {
+		dev_err(dev, "failed to read QM_IFC_INT_SOURCE_V!\n");
+		return ret;
+	}
+
+	ret = qm_read_reg(qm, QM_IFC_INT_MASK, &vf_data->ifc_int_mask, 1);
+	if (ret) {
+		dev_err(dev, "failed to read QM_IFC_INT_MASK!\n");
+		return ret;
+	}
+
+	ret = qm_read_reg(qm, QM_IFC_INT_SET_V, &vf_data->ifc_int_set, 1);
+	if (ret) {
+		dev_err(dev, "failed to read QM_IFC_INT_SET_V!\n");
+		return ret;
+	}
+
+	ret = qm_read_reg(qm, QM_PAGE_SIZE, &vf_data->page_size, 1);
+	if (ret) {
+		dev_err(dev, "failed to read QM_PAGE_SIZE!\n");
+		return ret;
+	}
+
+	ret = qm_read_reg(qm, QM_VF_STATE, &vf_data->vf_state, 1);
+	if (ret) {
+		dev_err(dev, "failed to read QM_VF_STATE!\n");
+		return ret;
+	}
+
+	/* QM_EQC_DW has 7 regs */
+	ret = qm_read_reg(qm, QM_EQC_DW0, vf_data->qm_eqc_dw, 7);
+	if (ret) {
+		dev_err(dev, "failed to read QM_EQC_DW!\n");
+		return ret;
+	}
+
+	/* QM_AEQC_DW has 7 regs */
+	ret = qm_read_reg(qm, QM_AEQC_DW0, vf_data->qm_aeqc_dw, 7);
+	if (ret) {
+		dev_err(dev, "failed to read QM_AEQC_DW!\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static int qm_rw_regs_write(struct hisi_qm *qm, struct acc_vf_data *vf_data)
+{
+	struct device *dev = &qm->pdev->dev;
+	int ret;
+
+	/* check VF state */
+	if (unlikely(qm_wait_mb_ready(qm))) {
+		dev_err(&qm->pdev->dev, "QM device is not ready to write!\n");
+		return -EBUSY;
+	}
+
+	ret = qm_write_reg(qm, QM_VF_AEQ_INT_MASK, &vf_data->aeq_int_mask, 1);
+	if (ret) {
+		dev_err(dev, "failed to write QM_VF_AEQ_INT_MASK!\n");
+		return ret;
+	}
+
+	ret = qm_write_reg(qm, QM_VF_EQ_INT_MASK, &vf_data->eq_int_mask, 1);
+	if (ret) {
+		dev_err(dev, "failed to write QM_VF_EQ_INT_MASK!\n");
+		return ret;
+	}
+
+	ret = qm_write_reg(qm, QM_IFC_INT_SOURCE_V,
+			   &vf_data->ifc_int_source, 1);
+	if (ret) {
+		dev_err(dev, "failed to write QM_IFC_INT_SOURCE_V!\n");
+		return ret;
+	}
+
+	ret = qm_write_reg(qm, QM_IFC_INT_MASK, &vf_data->ifc_int_mask, 1);
+	if (ret) {
+		dev_err(dev, "failed to write QM_IFC_INT_MASK!\n");
+		return ret;
+	}
+
+	ret = qm_write_reg(qm, QM_IFC_INT_SET_V, &vf_data->ifc_int_set, 1);
+	if (ret) {
+		dev_err(dev, "failed to write QM_IFC_INT_SET_V!\n");
+		return ret;
+	}
+
+	ret = qm_write_reg(qm, QM_QUE_ISO_CFG_V, &vf_data->que_iso_cfg, 1);
+	if (ret) {
+		dev_err(dev, "failed to write QM_QUE_ISO_CFG_V!\n");
+		return ret;
+	}
+
+	ret = qm_write_reg(qm, QM_PAGE_SIZE, &vf_data->page_size, 1);
+	if (ret) {
+		dev_err(dev, "failed to write QM_PAGE_SIZE!\n");
+		return ret;
+	}
+
+	ret = qm_write_reg(qm, QM_VF_STATE, &vf_data->vf_state, 1);
+	if (ret) {
+		dev_err(dev, "failed to write QM_VF_STATE!\n");
+		return ret;
+	}
+
+	/* QM_EQC_DW has 7 regs */
+	ret = qm_write_reg(qm, QM_EQC_DW0, vf_data->qm_eqc_dw, 7);
+	if (ret) {
+		dev_err(dev, "failed to write QM_EQC_DW!\n");
+		return ret;
+	}
+
+	/* QM_AEQC_DW has 7 regs */
+	ret = qm_write_reg(qm, QM_AEQC_DW0, vf_data->qm_aeqc_dw, 7);
+	if (ret) {
+		dev_err(dev, "failed to write QM_AEQC_DW!\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+/*
+ * the vf QM have unbind from host, insmod in the VM
+ * so, qm just have the addr from pci dev
+ * others is null.
+ * so we need read from the SEC hardware REGs.
+ */
+static int vf_migration_data_store(struct hisi_qm *qm,
+			struct acc_vf_migration *acc_vf_dev)
+{
+	struct acc_vf_data *vf_data = acc_vf_dev->vf_data;
+	struct device *dev = &qm->pdev->dev;
+	int ret;
+
+	ret = qm_rw_regs_read(qm, vf_data);
+	if (ret) {
+		dev_err(dev, "failed to read QM regs!\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * every Reg is 32 bit, the dma address is 64 bit
+	 * so, the dma address is store in the Reg2 and Reg1
+	 */
+	vf_data->eqe_dma = vf_data->qm_eqc_dw[2];
+	vf_data->eqe_dma <<= QM_XQC_ADDR_OFFSET;
+	vf_data->eqe_dma |= vf_data->qm_eqc_dw[1];
+	vf_data->aeqe_dma = vf_data->qm_aeqc_dw[2];
+	vf_data->aeqe_dma <<= QM_XQC_ADDR_OFFSET;
+	vf_data->aeqe_dma |= vf_data->qm_aeqc_dw[1];
+
+	/* Through SQC_BT/CQC_BT to get sqc and cqc address */
+	ret = qm_get_sqc(qm, &vf_data->sqc_dma);
+	if (ret) {
+		dev_err(dev, "failed to read SQC addr!\n");
+		return -EINVAL;
+	}
+
+	ret = qm_get_cqc(qm, &vf_data->cqc_dma);
+	if (ret) {
+		dev_err(dev, "failed to read CQC addr!\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void qm_dev_cmd_init(struct hisi_qm *qm)
+{
+	/* clear VF communication status registers. */
+	writel(0x1, qm->io_base + QM_IFC_INT_SOURCE_V);
+
+	/* enable pf and vf communication. */
+	writel(0x0, qm->io_base + QM_IFC_INT_MASK);
+}
+
+static void qm_db(struct hisi_qm *qm, u16 qn, u8 cmd,
+	u16 index, u8 priority)
+{
+	void __iomem *io_base = qm->io_base;
+	u16 randata = 0;
+	u64 doorbell;
+
+	if (cmd == QM_DOORBELL_CMD_SQ || cmd == QM_DOORBELL_CMD_CQ)
+		io_base = qm->db_io_base + (u64)qn * qm->db_interval +
+			  QM_DOORBELL_SQ_CQ_BASE_V2;
+	else
+		io_base += QM_DOORBELL_EQ_AEQ_BASE_V2;
+
+	doorbell = qn | ((u64)cmd << QM_DB_CMD_SHIFT_V2) |
+		   ((u64)randata << QM_DB_RAND_SHIFT_V2) |
+		   ((u64)index << QM_DB_INDEX_SHIFT_V2) |
+		   ((u64)priority << QM_DB_PRIORITY_SHIFT_V2);
+
+	writeq(doorbell, io_base);
+}
+
+static void vf_qm_fun_restart(struct hisi_qm *qm,
+	struct acc_vf_migration *acc_vf_dev)
+{
+	struct acc_vf_data *vf_data = acc_vf_dev->vf_data;
+	struct device *dev = &qm->pdev->dev;
+	int i;
+
+	/*
+	 * When the system is rebooted, the SMMU page table is destroyed,
+	 * and the QP queue cannot be returned normally at this time.
+	 * if vf_ready == 0x2, don't need to restart QP.
+	 */
+	if (vf_data->vf_state != QM_READY) {
+		dev_err(dev, "failed to restart VF!\n");
+		return;
+	}
+
+	for (i = 0; i < qm->qp_num; i++)
+		qm_db(qm, i, QM_DOORBELL_CMD_SQ, 0, 1);
+}
+
+static int vf_match_info_check(struct hisi_qm *qm,
+	struct acc_vf_migration *acc_vf_dev)
+{
+	struct acc_vf_data *vf_data = acc_vf_dev->vf_data;
+	struct device *dev = &qm->pdev->dev;
+	u32 que_iso_state;
+	int ret;
+
+	/* vf acc type check */
+	if (vf_data->acc_type != acc_vf_dev->acc_type) {
+		dev_err(dev, "failed to match VF acc type!\n");
+		return -EINVAL;
+	}
+
+	/* vf qp num check */
+	ret = qm_get_vft(qm, &qm->qp_base, &qm->qp_num);
+	if (ret || qm->qp_num <= 1) {
+		dev_err(dev, "failed to get vft qp nums!\n");
+		return ret;
+	}
+
+	if (vf_data->qp_num != qm->qp_num) {
+		dev_err(dev, "failed to match VF qp num!\n");
+		return -EINVAL;
+	}
+
+	/* vf isolation state check */
+	ret = qm_read_reg(qm, QM_QUE_ISO_CFG_V, &que_iso_state, 1);
+	if (ret) {
+		dev_err(dev, "failed to read QM_QUE_ISO_CFG_V!\n");
+		return ret;
+	}
+	if (vf_data->que_iso_cfg != que_iso_state) {
+		dev_err(dev, "failed to match isolation state!\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int vf_migration_data_recover(struct hisi_qm *qm,
+	struct acc_vf_data *vf_data)
+{
+	struct device *dev = &qm->pdev->dev;
+	int ret;
+
+	qm->eqe_dma = vf_data->eqe_dma;
+	qm->aeqe_dma = vf_data->aeqe_dma;
+	qm->sqc_dma = vf_data->sqc_dma;
+	qm->cqc_dma = vf_data->cqc_dma;
+
+	qm->qp_base = vf_data->qp_base;
+	qm->qp_num = vf_data->qp_num;
+
+	ret = qm_rw_regs_write(qm, vf_data);
+	if (ret) {
+		dev_err(dev, "Set VF regs failed!\n");
+		return ret;
+	}
+
+	ret = qm_mb(qm, QM_MB_CMD_SQC_BT, qm->sqc_dma, 0, 0);
+	if (ret) {
+		dev_err(dev, "Set sqc failed!\n");
+		return ret;
+	}
+
+	ret = qm_mb(qm, QM_MB_CMD_CQC_BT, qm->cqc_dma, 0, 0);
+	if (ret) {
+		dev_err(dev, "Set cqc failed!\n");
+		return ret;
+	}
+
+	/* which ACC module need to reinit? */
+	qm_dev_cmd_init(qm);
+
+	return 0;
+}
+
+static int vf_qm_cache_wb(struct hisi_qm *qm)
+{
+	unsigned int val;
+
+	writel(0x1, qm->io_base + QM_CACHE_WB_START);
+	if (readl_relaxed_poll_timeout(qm->io_base + QM_CACHE_WB_DONE,
+				       val, val & BIT(0), POLL_PERIOD,
+				       POLL_TIMEOUT)) {
+		dev_err(&qm->pdev->dev, "vf QM writeback sqc cache fail!\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int vf_qm_func_stop(struct hisi_qm *qm)
+{
+	return qm_mb(qm, QM_MB_CMD_PAUSE_QM, 0, 0, 0);
+}
+
+static int pf_qm_get_qp_num(struct hisi_qm *qm, int vf_id,
+	u32 *rbase, u32 *rnumber)
+{
+	unsigned int val;
+	u64 sqc_vft;
+	int ret;
+
+	ret = readl_relaxed_poll_timeout(qm->io_base + QM_VFT_CFG_RDY, val,
+					 val & BIT(0), POLL_PERIOD,
+					 POLL_TIMEOUT);
+	if (ret)
+		return ret;
+
+	writel(0x1, qm->io_base + QM_VFT_CFG_OP_WR);
+	/* 0 mean SQC VFT */
+	writel(0x0, qm->io_base + QM_VFT_CFG_TYPE);
+	writel(vf_id, qm->io_base + QM_VFT_CFG);
+
+	writel(0x0, qm->io_base + QM_VFT_CFG_RDY);
+	writel(0x1, qm->io_base + QM_VFT_CFG_OP_ENABLE);
+
+	ret = readl_relaxed_poll_timeout(qm->io_base + QM_VFT_CFG_RDY, val,
+					 val & BIT(0), POLL_PERIOD,
+					 POLL_TIMEOUT);
+	if (ret)
+		return ret;
+
+	sqc_vft = readl(qm->io_base + QM_VFT_CFG_DATA_L) |
+		  ((u64)readl(qm->io_base + QM_VFT_CFG_DATA_H) <<
+		  QM_XQC_ADDR_OFFSET);
+	*rbase = QM_SQC_VFT_BASE_MASK_V2 &
+		 (sqc_vft >> QM_SQC_VFT_BASE_SHIFT_V2);
+	*rnumber = (QM_SQC_VFT_NUM_MASK_V2 &
+		   (sqc_vft >> QM_SQC_VFT_NUM_SHIFT_V2)) + 1;
+
+	return 0;
+}
+
+static int pf_qm_state_pre_save(struct hisi_qm *qm,
+		struct acc_vf_migration *acc_vf_dev)
+{
+	struct acc_vf_data *vf_data = acc_vf_dev->vf_data;
+	struct device *dev = &qm->pdev->dev;
+	int vf_id = acc_vf_dev->vf_id;
+	int ret;
+
+	/* vf acc type save */
+	vf_data->acc_type = acc_vf_dev->acc_type;
+
+	/* vf qp num save from PF */
+	ret = pf_qm_get_qp_num(qm, vf_id, &qm->qp_base, &qm->qp_num);
+	if (ret || qm->qp_num <= 1) {
+		dev_err(dev, "failed to get vft qp nums!\n");
+		return -EINVAL;
+	}
+	vf_data->qp_base = qm->qp_base;
+	vf_data->qp_num = qm->qp_num;
+
+	/* vf isolation state save from PF */
+	ret = qm_read_reg(qm, QM_QUE_ISO_CFG_V, &vf_data->que_iso_cfg, 1);
+	if (ret) {
+		dev_err(dev, "failed to read QM_QUE_ISO_CFG_V!\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static int vf_qm_state_save(struct hisi_qm *qm,
+		struct acc_vf_migration *acc_vf_dev)
+{
+	struct device *dev = &acc_vf_dev->vf_dev->dev;
+	int ret;
+
+	/*
+	 * check VM task driver state
+	 * if vf_ready == 0x1, skip migrate.
+	 */
+	if (unlikely(qm_wait_dev_ready(qm))) {
+		acc_vf_dev->mig_ignore = true;
+		dev_err(&qm->pdev->dev, "QM device is not ready to read!\n");
+		return 0;
+	}
+
+	/* First stop the ACC vf function */
+	ret = vf_qm_func_stop(qm);
+	if (ret) {
+		dev_err(dev, "failed to stop QM VF function!\n");
+		return ret;
+	}
+
+	/* Check the VF's RAS and Interrution state */
+	ret = qm_check_int_state(acc_vf_dev);
+	if (ret) {
+		dev_err(dev, "failed to check QM INT state!\n");
+		goto state_error;
+	}
+
+	/* hisi_qm_cache_wb store cache data to DDR */
+	ret = vf_qm_cache_wb(qm);
+	if (ret) {
+		dev_err(dev, "failed to writeback QM Cache!\n");
+		goto state_error;
+	}
+
+	ret = vf_migration_data_store(qm, acc_vf_dev);
+	if (ret) {
+		dev_err(dev, "failed to get and store migration data!\n");
+		goto state_error;
+	}
+
+	return 0;
+
+state_error:
+	vf_qm_fun_restart(qm, acc_vf_dev);
+	return ret;
+}
+
+static int vf_qm_state_resume(struct hisi_qm *qm,
+		struct acc_vf_migration *acc_vf_dev)
+{
+	struct device *dev = &acc_vf_dev->vf_dev->dev;
+	int ret;
+
+	/* recover data to VF */
+	ret = vf_migration_data_recover(qm, acc_vf_dev->vf_data);
+	if (ret) {
+		dev_err(dev, "failed to recover the VF!\n");
+		return ret;
+	}
+
+	/* restart all destination VF's QP */
+	vf_qm_fun_restart(qm, acc_vf_dev);
+
+	return 0;
+}
+
+static int acc_vf_set_device_state(struct acc_vf_migration *acc_vf_dev,
+				       u32 state)
+{
+	struct vfio_device_migration_info *mig_ctl = acc_vf_dev->mig_ctl;
+	struct device *dev = &acc_vf_dev->vf_dev->dev;
+	struct hisi_qm *pfqm = acc_vf_dev->pf_qm;
+	struct hisi_qm *qm = acc_vf_dev->vf_qm;
+	int ret = 0;
+
+	if (state == mig_ctl->device_state)
+		return 0;
+
+	switch (state) {
+	case VFIO_DEVICE_STATE_RUNNING:
+		if (!mig_ctl->data_size)
+			break;
+
+		if (mig_ctl->device_state == VFIO_DEVICE_STATE_RESUMING) {
+			ret = vf_qm_state_resume(qm, acc_vf_dev);
+			if (ret) {
+				dev_err(dev, "failed to resume device!\n");
+				return -EFAULT;
+			}
+		}
+
+		break;
+	case VFIO_DEVICE_STATE_SAVING | VFIO_DEVICE_STATE_RUNNING:
+		/* ACC should in the pre cycle to read match information data */
+		ret = pf_qm_state_pre_save(pfqm, acc_vf_dev);
+		if (ret) {
+			dev_err(dev, "failed to pre save device state!\n");
+			return -EFAULT;
+		}
+
+		/* set the pending_byte and match data size */
+		mig_ctl->data_size = QM_MATCH_SIZE;
+		mig_ctl->pending_bytes = mig_ctl->data_size;
+
+		break;
+	case VFIO_DEVICE_STATE_SAVING:
+		/* stop the vf function */
+		ret = vf_qm_state_save(qm, acc_vf_dev);
+		if (ret) {
+			dev_err(dev, "failed to save device state!\n");
+			return -EFAULT;
+		}
+
+		if (acc_vf_dev->mig_ignore) {
+			mig_ctl->data_size = 0;
+			mig_ctl->pending_bytes = 0;
+			break;
+		}
+
+		/* set the pending_byte and data_size */
+		mig_ctl->data_size = sizeof(struct acc_vf_data);
+		mig_ctl->pending_bytes = mig_ctl->data_size;
+
+		break;
+	case VFIO_DEVICE_STATE_STOP:
+		/* restart all  VF's QP */
+		vf_qm_fun_restart(qm, acc_vf_dev);
+
+		break;
+	case VFIO_DEVICE_STATE_RESUMING:
+
+		break;
+	default:
+		ret = -EFAULT;
+	}
+
+	if (!ret) {
+		dev_info(dev, "migration state: %s ----------> %s!\n",
+			 vf_dev_state[mig_ctl->device_state],
+			 vf_dev_state[state]);
+		mig_ctl->device_state = state;
+	}
+
+	return ret;
+}
+
+static int acc_vf_data_transfer(struct acc_vf_migration *acc_vf_dev,
+	char __user *buf, size_t count, u64 pos, bool iswrite)
+{
+	struct vfio_device_migration_info *mig_ctl = acc_vf_dev->mig_ctl;
+	void *data_addr = acc_vf_dev->vf_data;
+	int ret = 0;
+
+	if (!count) {
+		dev_err(&acc_vf_dev->vf_dev->dev,
+			"Qemu operation data size error!\n");
+		return -EINVAL;
+	}
+
+	data_addr += pos - mig_ctl->data_offset;
+	if (iswrite)  {
+		ret = copy_from_user(data_addr, buf, count) ?
+				     -EFAULT : count;
+		if (ret == count)
+			mig_ctl->pending_bytes += count;
+	} else {
+		ret = copy_to_user(buf, data_addr, count) ?
+				   -EFAULT : count;
+		if (ret == count)
+			mig_ctl->pending_bytes -= count;
+	}
+
+	return ret;
+}
+
+static int acc_vf_region_migration_rw(struct acc_vf_migration *acc_vf_dev,
+	char __user *buf, size_t count, loff_t *ppos, bool iswrite)
+{
+	struct vfio_device_migration_info *mig_ctl = acc_vf_dev->mig_ctl;
+	struct device *dev = &acc_vf_dev->vf_dev->dev;
+	struct hisi_qm *qm = acc_vf_dev->vf_qm;
+	u64 pos = *ppos & VFIO_PCI_OFFSET_MASK;
+	u32 device_state;
+	int ret = 0;
+
+	switch (pos) {
+	case VDM_OFFSET(device_state):
+		if (count != sizeof(mig_ctl->device_state)) {
+			ret = -EINVAL;
+			break;
+		}
+
+		if (iswrite) {
+			if (copy_from_user(&device_state, buf, count)) {
+				ret = -EFAULT;
+				break;
+			}
+
+			ret = acc_vf_set_device_state(acc_vf_dev,
+					   device_state) ? ret : count;
+		} else {
+			ret = copy_to_user(buf, &mig_ctl->device_state,
+					   count) ? -EFAULT : count;
+		}
+		break;
+	case VDM_OFFSET(reserved):
+		ret = -EFAULT;
+		break;
+	case VDM_OFFSET(pending_bytes):
+		if (count != sizeof(mig_ctl->pending_bytes)) {
+			ret = -EINVAL;
+			break;
+		}
+
+		if (iswrite)
+			ret = -EFAULT;
+		else
+			ret = copy_to_user(buf, &mig_ctl->pending_bytes,
+					   count) ? -EFAULT : count;
+		break;
+	case VDM_OFFSET(data_offset):
+		if (count != sizeof(mig_ctl->data_offset)) {
+			ret = -EINVAL;
+			break;
+		}
+		if (iswrite)
+			ret = copy_from_user(&mig_ctl->data_offset, buf, count) ?
+					     -EFAULT : count;
+		else
+			ret = copy_to_user(buf, &mig_ctl->data_offset, count) ?
+					   -EFAULT : count;
+		break;
+	case VDM_OFFSET(data_size):
+		if (count != sizeof(mig_ctl->data_size)) {
+			ret = -EINVAL;
+			break;
+		}
+
+		if (iswrite)
+			ret = copy_from_user(&mig_ctl->data_size, buf, count) ?
+					     -EFAULT : count;
+		else
+			ret = copy_to_user(buf, &mig_ctl->data_size, count) ?
+					   -EFAULT : count;
+		break;
+	default:
+		ret = -EFAULT;
+		break;
+	}
+
+	/* Transfer data section */
+	if (pos >= mig_ctl->data_offset &&
+	    pos < MIGRATION_REGION_SZ) {
+		ret = acc_vf_data_transfer(acc_vf_dev, buf,
+					   count, pos, iswrite);
+		if (ret != count)
+			return ret;
+	}
+
+	if (mig_ctl->device_state == VFIO_DEVICE_STATE_RESUMING &&
+	    mig_ctl->pending_bytes == QM_MATCH_SIZE &&
+	    mig_ctl->data_size == QM_MATCH_SIZE) {
+		/* check the VF match information */
+		ret = vf_match_info_check(qm, acc_vf_dev);
+		if (ret) {
+			dev_err(dev, "failed to check match information!\n");
+			return -EFAULT;
+		}
+		ret = count;
+
+		/* clear the VF match data size */
+		mig_ctl->pending_bytes = 0;
+		mig_ctl->data_size = 0;
+	}
+	return ret;
+}
+
+static int acc_vf_region_migration_mmap(struct acc_vf_migration *acc_vf_dev,
+					struct acc_vf_region *region,
+					struct vm_area_struct *vma)
+{
+	return -EFAULT;
+}
+
+static void acc_vf_region_migration_release(struct acc_vf_migration *acc_vf_dev,
+					    struct acc_vf_region *region)
+{
+	kfree(acc_vf_dev->mig_ctl);
+	acc_vf_dev->mig_ctl = NULL;
+}
+
+static const struct acc_vf_region_ops acc_vf_region_ops_migration = {
+	.rw		= acc_vf_region_migration_rw,
+	.release	= acc_vf_region_migration_release,
+	.mmap		= acc_vf_region_migration_mmap,
+};
+
+static int acc_vf_register_region(struct acc_vf_migration *acc_vf_dev,
+				   const struct acc_vf_region_ops *ops,
+				   void *data)
+{
+	struct acc_vf_region *regions;
+
+	regions = krealloc(acc_vf_dev->regions,
+			   (acc_vf_dev->num_regions + 1) * sizeof(*regions),
+			   GFP_KERNEL);
+	if (!regions)
+		return -ENOMEM;
+
+	acc_vf_dev->regions = regions;
+	regions[acc_vf_dev->num_regions].type =
+		VFIO_REGION_TYPE_MIGRATION;
+	regions[acc_vf_dev->num_regions].subtype =
+		VFIO_REGION_SUBTYPE_MIGRATION;
+	regions[acc_vf_dev->num_regions].ops = ops;
+	regions[acc_vf_dev->num_regions].size =
+		MIGRATION_REGION_SZ;
+	regions[acc_vf_dev->num_regions].flags =
+		VFIO_REGION_INFO_FLAG_READ | VFIO_REGION_INFO_FLAG_WRITE;
+	regions[acc_vf_dev->num_regions].data = data;
+	acc_vf_dev->num_regions++;
+
+	return 0;
+}
+
+static long acc_vf_get_region_info(void *device_data,
+				    unsigned int cmd, unsigned long arg)
+{
+	int num_vdev_regions = vfio_pci_num_regions(device_data);
+	struct acc_vf_migration *acc_vf_dev =
+		vfio_pci_vendor_data(device_data);
+	struct vfio_region_info_cap_type cap_type;
+	struct acc_vf_region *regions;
+	struct vfio_region_info info;
+	struct vfio_info_cap caps;
+	unsigned long minsz;
+	int index, ret;
+
+	minsz = offsetofend(struct vfio_region_info, offset);
+
+	if (cmd != VFIO_DEVICE_GET_REGION_INFO)
+		return -EINVAL;
+
+	if (copy_from_user(&info, (void __user *)arg, minsz))
+		return -EFAULT;
+
+	if (info.argsz < minsz)
+		return -EINVAL;
+
+	if (info.index < VFIO_PCI_NUM_REGIONS + num_vdev_regions)
+		goto default_handle;
+
+	index = info.index - VFIO_PCI_NUM_REGIONS - num_vdev_regions;
+	if (index > acc_vf_dev->num_regions) {
+		dev_err(&acc_vf_dev->vf_dev->dev,
+			"failed to check region numbers!\n");
+		return -EINVAL;
+	}
+
+	info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
+	regions = acc_vf_dev->regions;
+	info.size = regions[index].size;
+	info.flags = regions[index].flags;
+	caps.buf = NULL;
+	caps.size = 0;
+	cap_type.header.id = VFIO_REGION_INFO_CAP_TYPE;
+	cap_type.header.version = 1;
+	cap_type.type = regions[index].type;
+	cap_type.subtype = regions[index].subtype;
+
+	ret = vfio_info_add_capability(&caps, &cap_type.header,
+				       sizeof(cap_type));
+	if (ret)
+		return ret;
+
+	if (regions[index].ops->add_cap) {
+		ret = regions[index].ops->add_cap(acc_vf_dev,
+						  &regions[index], &caps);
+		if (ret) {
+			kfree(caps.buf);
+			return ret;
+		}
+	}
+
+	if (caps.size) {
+		info.flags |= VFIO_REGION_INFO_FLAG_CAPS;
+		if (info.argsz < sizeof(info) + caps.size) {
+			info.argsz = sizeof(info) + caps.size;
+			info.cap_offset = 0;
+		} else {
+			vfio_info_cap_shift(&caps, sizeof(info));
+			if (copy_to_user((void __user *)arg + sizeof(info),
+					 caps.buf, caps.size)) {
+				kfree(caps.buf);
+				return -EFAULT;
+			}
+			info.cap_offset = sizeof(info);
+		}
+		kfree(caps.buf);
+	}
+
+	return copy_to_user((void __user *)arg, &info, minsz) ?
+		-EFAULT : 0;
+
+default_handle:
+	ret = vfio_pci_ioctl(device_data, cmd, arg);
+	if (ret)
+		return ret;
+
+	if (info.index == VFIO_PCI_BAR0_REGION_INDEX) {
+		if (!acc_vf_dev->in_dirty_track)
+			return ret;
+
+		/* read default handler's data back */
+		if (copy_from_user(&info, (void __user *)arg, minsz))
+			return -EFAULT;
+
+		info.flags = VFIO_REGION_INFO_FLAG_READ |
+			     VFIO_REGION_INFO_FLAG_WRITE;
+		/* update customized region info */
+		if (copy_to_user((void __user *)arg, &info, minsz))
+			return -EFAULT;
+	}
+
+	if (info.index == VFIO_PCI_BAR2_REGION_INDEX) {
+		info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
+		/*
+		 * ACC VF dev BAR2 region(64K) consists of both functional
+		 * register space and migration control register space.
+		 * Report only the first 32K(functional region) to Guest.
+		 */
+		info.size = pci_resource_len(acc_vf_dev->vf_dev, info.index) >> 1;
+		info.flags = VFIO_REGION_INFO_FLAG_READ |
+			     VFIO_REGION_INFO_FLAG_WRITE |
+			     VFIO_REGION_INFO_FLAG_MMAP;
+		if (copy_to_user((void __user *)arg, &info, minsz))
+			return -EFAULT;
+	}
+
+	return ret;
+}
+
+static int acc_vf_open(void *device_data)
+{
+	struct acc_vf_migration *acc_vf_dev =
+		vfio_pci_vendor_data(device_data);
+	struct vfio_device_migration_info *mig_ctl;
+	__u64 mig_offset;
+	void *vf_data;
+	int ret;
+
+	if (!try_module_get(THIS_MODULE))
+		return -ENODEV;
+
+	mutex_lock(&acc_vf_dev->reflock);
+	if (!acc_vf_dev->refcnt) {
+		ret = acc_vf_register_region(acc_vf_dev,
+					     &acc_vf_region_ops_migration,
+					     NULL);
+		if (ret)
+			goto region_error;
+		vfio_pci_set_vendor_regions(device_data,
+					       acc_vf_dev->num_regions);
+
+		/* the data region must follow migration info */
+		mig_offset = sizeof(struct vfio_device_migration_info);
+		mig_ctl = kzalloc(MIGRATION_REGION_SZ, GFP_KERNEL);
+		if (!mig_ctl) {
+			ret = -ENOMEM;
+			goto mig_error;
+		}
+		acc_vf_dev->mig_ctl = mig_ctl;
+
+		vf_data = (void *)mig_ctl + mig_offset;
+		acc_vf_dev->vf_data = vf_data;
+
+		mig_ctl->device_state = VFIO_DEVICE_STATE_RUNNING;
+		mig_ctl->data_offset = mig_offset;
+		mig_ctl->data_size = 0;
+	}
+
+	ret = vfio_pci_open(device_data);
+	if (ret)
+		goto open_error;
+
+	acc_vf_dev->refcnt++;
+	mutex_unlock(&acc_vf_dev->reflock);
+
+	return 0;
+
+open_error:
+	if (!acc_vf_dev->refcnt) {
+		kfree(acc_vf_dev->mig_ctl);
+		acc_vf_dev->mig_ctl = NULL;
+	}
+mig_error:
+	vfio_pci_set_vendor_regions(device_data, 0);
+region_error:
+	mutex_unlock(&acc_vf_dev->reflock);
+	module_put(THIS_MODULE);
+	return ret;
+}
+
+static void acc_vf_release(void *device_data)
+{
+	struct acc_vf_migration *acc_vf_dev =
+		vfio_pci_vendor_data(device_data);
+	int i;
+
+	mutex_lock(&acc_vf_dev->reflock);
+	if (!--acc_vf_dev->refcnt) {
+		for (i = 0; i < acc_vf_dev->num_regions; i++) {
+			if (!acc_vf_dev->regions[i].ops)
+				continue;
+			acc_vf_dev->regions[i].ops->release(acc_vf_dev,
+						&acc_vf_dev->regions[i]);
+		}
+		kfree(acc_vf_dev->regions);
+		acc_vf_dev->regions = NULL;
+		acc_vf_dev->num_regions = 0;
+		vfio_pci_set_vendor_regions(device_data, 0);
+
+		kfree(acc_vf_dev->mig_ctl);
+		acc_vf_dev->mig_ctl = NULL;
+	}
+	vfio_pci_release(device_data);
+	mutex_unlock(&acc_vf_dev->reflock);
+	module_put(THIS_MODULE);
+}
+
+static long acc_vf_ioctl(void *device_data,
+			  unsigned int cmd, unsigned long arg)
+{
+	switch (cmd) {
+	case VFIO_DEVICE_GET_REGION_INFO:
+		return acc_vf_get_region_info(device_data, cmd, arg);
+	default:
+		return vfio_pci_ioctl(device_data, cmd, arg);
+	}
+}
+
+static ssize_t acc_vf_read(void *device_data, char __user *buf,
+			    size_t count, loff_t *ppos)
+{
+	struct acc_vf_migration *acc_vf_dev =
+		vfio_pci_vendor_data(device_data);
+	int num_vdev_regions = vfio_pci_num_regions(device_data);
+	unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
+	int num_vendor_region = acc_vf_dev->num_regions;
+	struct acc_vf_region *region;
+
+	if (index >= VFIO_PCI_NUM_REGIONS + num_vdev_regions +
+	    num_vendor_region) {
+		dev_err(&acc_vf_dev->vf_dev->dev,
+			"failed to check read regions index!\n");
+		return -EINVAL;
+	}
+
+	if (index < VFIO_PCI_NUM_REGIONS + num_vdev_regions)
+		return vfio_pci_read(device_data, buf, count, ppos);
+
+	index -= VFIO_PCI_NUM_REGIONS + num_vdev_regions;
+
+	region = &acc_vf_dev->regions[index];
+	if (!region->ops->rw) {
+		dev_err(&acc_vf_dev->vf_dev->dev,
+			"failed to check regions read ops!\n");
+		return -EINVAL;
+	}
+
+	return region->ops->rw(acc_vf_dev, buf, count, ppos, false);
+}
+
+static ssize_t acc_vf_write(void *device_data, const char __user *buf,
+			     size_t count, loff_t *ppos)
+{
+	struct acc_vf_migration *acc_vf_dev =
+		vfio_pci_vendor_data(device_data);
+	int num_vdev_regions = vfio_pci_num_regions(device_data);
+	unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
+	int num_vendor_region = acc_vf_dev->num_regions;
+	struct acc_vf_region *region;
+
+	if (index == VFIO_PCI_BAR0_REGION_INDEX)
+		pr_debug("vfio bar 0 write\n");
+
+	if (index >= VFIO_PCI_NUM_REGIONS + num_vdev_regions +
+	    num_vendor_region) {
+		dev_err(&acc_vf_dev->vf_dev->dev,
+			"failed to check write regions index!\n");
+		return -EINVAL;
+	}
+
+	if (index < VFIO_PCI_NUM_REGIONS + num_vdev_regions)
+		return vfio_pci_write(device_data, buf, count, ppos);
+
+	index -= VFIO_PCI_NUM_REGIONS + num_vdev_regions;
+
+	region = &acc_vf_dev->regions[index];
+
+	if (!region->ops->rw) {
+		dev_err(&acc_vf_dev->vf_dev->dev,
+			"failed to check regions write ops!\n");
+		return -EINVAL;
+	}
+
+	return region->ops->rw(acc_vf_dev, (char __user *)buf,
+			       count, ppos, true);
+}
+
+static int acc_vf_mmap(void *device_data, struct vm_area_struct *vma)
+{
+	return vfio_pci_mmap(device_data, vma);
+}
+
+static void acc_vf_request(void *device_data, unsigned int count)
+{
+	vfio_pci_request(device_data, count);
+}
+
+static struct vfio_device_ops acc_vf_device_ops_node = {
+	.name		= "acc_vf",
+	.open		= acc_vf_open,
+	.release	= acc_vf_release,
+	.ioctl		= acc_vf_ioctl,
+	.read		= acc_vf_read,
+	.write		= acc_vf_write,
+	.mmap		= acc_vf_mmap,
+	.request	= acc_vf_request,
+};
+
+static ssize_t acc_vf_debug_read(struct file *filp, char __user *buffer,
+			   size_t count, loff_t *pos)
+{
+	char buf[VFIO_DEV_DBG_LEN];
+	int len;
+
+	len = scnprintf(buf, VFIO_DEV_DBG_LEN, "%s\n",
+			"echo 0: test vf data store\n"
+			"echo 1: test vf data writeback\n"
+			"echo 2: test vf send mailbox\n"
+			"echo 3: dump vf dev data\n"
+			"echo 4: dump migration state\n");
+
+	return simple_read_from_buffer(buffer, count, pos, buf, len);
+}
+
+static ssize_t acc_vf_debug_write(struct file *filp, const char __user *buffer,
+			    size_t count, loff_t *pos)
+{
+	struct acc_vf_migration *acc_vf_dev = filp->private_data;
+	struct device *dev = &acc_vf_dev->vf_dev->dev;
+	struct hisi_qm *qm = acc_vf_dev->vf_qm;
+	char tbuf[VFIO_DEV_DBG_LEN];
+	unsigned long val;
+	u64 data;
+	int len, ret;
+
+	if (*pos)
+		return 0;
+
+	if (count >= VFIO_DEV_DBG_LEN)
+		return -ENOSPC;
+
+	len = simple_write_to_buffer(tbuf, VFIO_DEV_DBG_LEN - 1,
+					pos, buffer, count);
+	if (len < 0)
+		return len;
+	tbuf[len] = '\0';
+	if (kstrtoul(tbuf, 0, &val))
+		return -EFAULT;
+
+	switch (val) {
+	case STATE_SAVE:
+		ret = vf_qm_state_save(qm, acc_vf_dev);
+		if (ret)
+			return -EINVAL;
+		break;
+	case STATE_RESUME:
+		ret = vf_qm_state_resume(qm, acc_vf_dev);
+		if (ret)
+			return -EINVAL;
+		break;
+	case MB_TEST:
+		data = readl(qm->io_base + QM_MB_CMD_SEND_BASE);
+		dev_info(dev, "debug mailbox addr: 0x%lx, mailbox val: 0x%llx\n",
+			 (uintptr_t)qm->phys_base, data);
+		break;
+	case MIG_DATA_DUMP:
+		dev_info(dev, "dumped vf migration data:\n");
+		print_hex_dump(KERN_INFO, "Mig Data:", DUMP_PREFIX_OFFSET,
+				VFIO_DBG_LOG_LEN, 1,
+				(unsigned char *)acc_vf_dev->vf_data,
+				sizeof(struct acc_vf_data), false);
+		break;
+	case MIG_DEV_SHOW:
+		if (!acc_vf_dev->mig_ctl)
+			dev_info(dev, "migration region have release!\n");
+		else
+			dev_info(dev,
+				 "device  state: %u\n"
+				 "data   offset: %llu\n"
+				 "data     size: %llu\n"
+				 "pending bytes: %llu\n"
+				 "data     addr: 0x%lx\n",
+				 acc_vf_dev->mig_ctl->device_state,
+				 acc_vf_dev->mig_ctl->data_offset,
+				 acc_vf_dev->mig_ctl->data_size,
+				 acc_vf_dev->mig_ctl->pending_bytes,
+				 (uintptr_t)acc_vf_dev->vf_data);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return count;
+}
+
+static const struct file_operations acc_vf_debug_fops = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.read = acc_vf_debug_read,
+	.write = acc_vf_debug_write,
+};
+
+static ssize_t acc_vf_state_read(struct file *filp, char __user *buffer,
+			   size_t count, loff_t *pos)
+{
+	struct acc_vf_migration *acc_vf_dev = filp->private_data;
+	char buf[VFIO_DEV_DBG_LEN];
+	u32 state;
+	int len;
+
+	if (!acc_vf_dev->mig_ctl) {
+		len = scnprintf(buf, VFIO_DEV_DBG_LEN, "%s\n", "Invalid\n");
+	} else {
+		state = acc_vf_dev->mig_ctl->device_state;
+		switch (state) {
+		case VFIO_DEVICE_STATE_RUNNING:
+			len = scnprintf(buf, VFIO_DEV_DBG_LEN, "%s\n",
+				"RUNNING\n");
+			break;
+		case VFIO_DEVICE_STATE_SAVING | VFIO_DEVICE_STATE_RUNNING:
+			len = scnprintf(buf, VFIO_DEV_DBG_LEN, "%s\n",
+				"SAVING and RUNNING\n");
+			break;
+		case VFIO_DEVICE_STATE_SAVING:
+			len = scnprintf(buf, VFIO_DEV_DBG_LEN, "%s\n",
+				"SAVING\n");
+			break;
+		case VFIO_DEVICE_STATE_STOP:
+			len = scnprintf(buf, VFIO_DEV_DBG_LEN, "%s\n",
+				"STOP\n");
+			break;
+		case VFIO_DEVICE_STATE_RESUMING:
+			len = scnprintf(buf, VFIO_DEV_DBG_LEN, "%s\n",
+				"RESUMING\n");
+			break;
+		default:
+			len = scnprintf(buf, VFIO_DEV_DBG_LEN, "%s\n",
+				"Error\n");
+		}
+	}
+
+	return simple_read_from_buffer(buffer, count, pos, buf, len);
+}
+
+static const struct file_operations acc_vf_state_fops = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.read = acc_vf_state_read,
+};
+
+static void vf_debugfs_init(struct acc_vf_migration *acc_vf_dev)
+{
+	char name[VFIO_DEV_DBG_LEN];
+	int node_id;
+
+	if (!mig_root_ref)
+		mig_debugfs_root = debugfs_create_dir("vfio_acc", NULL);
+	mutex_lock(&acc_vf_dev->reflock);
+	mig_root_ref++;
+	mutex_unlock(&acc_vf_dev->reflock);
+
+	node_id = dev_to_node(&acc_vf_dev->vf_dev->dev);
+	if (node_id < 0)
+		node_id = 0;
+
+	if (acc_vf_dev->acc_type == HISI_SEC)
+		scnprintf(name, VFIO_DEV_DBG_LEN, "sec_vf%d-%d",
+			  node_id, acc_vf_dev->vf_id);
+	else if (acc_vf_dev->acc_type == HISI_HPRE)
+		scnprintf(name, VFIO_DEV_DBG_LEN, "hpre_vf%d-%d",
+			  node_id, acc_vf_dev->vf_id);
+	else
+		scnprintf(name, VFIO_DEV_DBG_LEN, "zip_vf%d-%d",
+			  node_id, acc_vf_dev->vf_id);
+
+	acc_vf_dev->debug_root = debugfs_create_dir(name, mig_debugfs_root);
+
+	debugfs_create_file("debug", 0644, acc_vf_dev->debug_root,
+			    acc_vf_dev, &acc_vf_debug_fops);
+	debugfs_create_file("state", 0444, acc_vf_dev->debug_root,
+			    acc_vf_dev, &acc_vf_state_fops);
+}
+
+static void vf_debugfs_exit(struct acc_vf_migration *acc_vf_dev)
+{
+	debugfs_remove_recursive(acc_vf_dev->debug_root);
+
+	mutex_lock(&acc_vf_dev->reflock);
+	mig_root_ref--;
+	mutex_unlock(&acc_vf_dev->reflock);
+
+	if (!mig_root_ref)
+		debugfs_remove_recursive(mig_debugfs_root);
+}
+
+static int qm_acc_type_init(struct acc_vf_migration *acc_vf_dev)
+{
+	struct hisi_qm *qm = acc_vf_dev->vf_qm;
+	int i;
+
+	acc_vf_dev->acc_type = 0;
+	for (i = 0; i < ARRAY_SIZE(vf_acc_types); i++) {
+		if (!strncmp(qm->dev_name, vf_acc_types[i].name,
+		    strlen(vf_acc_types[i].name)))
+			acc_vf_dev->acc_type = vf_acc_types[i].type;
+	}
+	if (!acc_vf_dev->acc_type) {
+		dev_err(&acc_vf_dev->vf_dev->dev, "failed to check acc type!\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int vf_qm_pci_init(struct pci_dev *pdev, struct hisi_qm *vfqm)
+{
+	struct device *dev = &pdev->dev;
+	u32 val;
+	int ret;
+
+	ret = pci_request_mem_regions(pdev, vfqm->dev_name);
+	if (ret < 0) {
+		dev_err(dev, "failed to request mem regions!\n");
+		return ret;
+	}
+
+	vfqm->phys_base = pci_resource_start(pdev, PCI_BAR_2);
+	vfqm->io_base = devm_ioremap(dev, pci_resource_start(pdev, PCI_BAR_2),
+				     pci_resource_len(pdev, PCI_BAR_2));
+	if (!vfqm->io_base) {
+		ret = -EIO;
+		goto err_ioremap;
+	}
+
+	val = readl(vfqm->io_base + QM_QUE_ISO_CFG_V);
+	val = val & BIT(0);
+	if (val) {
+		vfqm->db_phys_base = pci_resource_start(pdev, PCI_BAR_4);
+		vfqm->db_io_base = devm_ioremap(dev, pci_resource_start(pdev,
+				PCI_BAR_4), pci_resource_len(pdev, PCI_BAR_4));
+		if (!vfqm->db_io_base) {
+			ret = -EIO;
+			goto err_db_ioremap;
+		}
+	} else {
+		vfqm->db_phys_base = vfqm->phys_base;
+		vfqm->db_io_base = vfqm->io_base;
+	}
+
+	vfqm->pdev = pdev;
+	mutex_init(&vfqm->mailbox_lock);
+
+	/*
+	 * Allow VF devices to be loaded in VM when
+	 * it loaded in migration driver
+	 */
+	pci_release_mem_regions(pdev);
+
+	return 0;
+
+err_db_ioremap:
+	devm_iounmap(dev, vfqm->io_base);
+err_ioremap:
+	pci_release_mem_regions(pdev);
+	return ret;
+}
+
+static int acc_vf_dev_init(struct pci_dev *pdev, struct hisi_qm *pf_qm,
+			   struct acc_vf_migration *acc_vf_dev)
+{
+	struct hisi_qm *vf_qm;
+	int ret;
+
+	vf_qm = kzalloc(sizeof(struct hisi_qm), GFP_KERNEL);
+	if (!vf_qm)
+		return -ENOMEM;
+
+	/* get vf qm dev name from pf */
+	vf_qm->dev_name = pf_qm->dev_name;
+	vf_qm->fun_type = QM_HW_VF;
+	acc_vf_dev->vf_qm = vf_qm;
+	acc_vf_dev->pf_qm = pf_qm;
+
+	ret = vf_qm_pci_init(pdev, vf_qm);
+	if (ret)
+		goto init_qm_error;
+
+	ret = qm_acc_type_init(acc_vf_dev);
+	if (ret)
+		goto init_qm_error;
+
+	return 0;
+
+init_qm_error:
+	kfree(vf_qm);
+	return -ENOMEM;
+}
+
+static void *acc_vf_probe(struct pci_dev *pdev)
+{
+	struct acc_vf_migration *acc_vf_dev;
+	struct pci_dev *pf_dev, *vf_dev;
+	struct hisi_qm *pf_qm;
+	int vf_id, ret;
+
+	pf_dev = pdev->physfn;
+	vf_dev = pdev;
+	/*
+	 * the VF driver have been remove after unbind
+	 * the PF driver have probe
+	 */
+	pf_qm = pci_get_drvdata(pf_dev);
+	if (!pf_qm) {
+		dev_err(&pdev->dev, "host qm driver not insmod!\n");
+		return ERR_PTR(-EINVAL);
+	}
+	if (pf_qm->ver < QM_HW_V3) {
+		dev_err(&pdev->dev,
+			"device can't support migration! version: 0x%x\n",
+			pf_qm->ver);
+		return ERR_PTR(-EINVAL);
+	}
+
+	vf_id = PCI_FUNC(vf_dev->devfn);
+	if (vf_id < 0) {
+		dev_info(&pdev->dev, "vf device: %s, vf id: %d\n",
+			 pf_qm->dev_name, vf_id);
+		return ERR_PTR(-EINVAL);
+	}
+
+	acc_vf_dev = kzalloc(sizeof(*acc_vf_dev), GFP_KERNEL);
+	if (!acc_vf_dev)
+		return ERR_PTR(-ENOMEM);
+
+	ret = acc_vf_dev_init(pdev, pf_qm, acc_vf_dev);
+	if (ret) {
+		kfree(acc_vf_dev);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	acc_vf_dev->vf_id = vf_id;
+	acc_vf_dev->vf_vendor = pdev->vendor;
+	acc_vf_dev->vf_device = pdev->device;
+	acc_vf_dev->pf_dev = pf_dev;
+	acc_vf_dev->vf_dev = vf_dev;
+	acc_vf_dev->mig_ignore = false;
+	mutex_init(&acc_vf_dev->reflock);
+
+	vf_debugfs_init(acc_vf_dev);
+
+	return acc_vf_dev;
+}
+
+static void acc_vf_remove(void *vendor_data)
+{
+	struct acc_vf_migration *acc_vf_dev = vendor_data;
+	struct device *dev = &acc_vf_dev->vf_dev->dev;
+	struct hisi_qm *qm = acc_vf_dev->vf_qm;
+
+	vf_debugfs_exit(acc_vf_dev);
+
+	devm_iounmap(dev, qm->io_base);
+
+	kfree(qm);
+	kfree(acc_vf_dev);
+}
+
+static struct vfio_pci_vendor_driver_ops  sec_vf_mig_ops = {
+	.owner		= THIS_MODULE,
+	.name		= "hisi_sec2",
+	.probe		= acc_vf_probe,
+	.remove		= acc_vf_remove,
+	.device_ops	= &acc_vf_device_ops_node,
+};
+
+static struct vfio_pci_vendor_driver_ops  hpre_vf_mig_ops = {
+	.owner		= THIS_MODULE,
+	.name		= "hisi_hpre",
+	.probe		= acc_vf_probe,
+	.remove		= acc_vf_remove,
+	.device_ops	= &acc_vf_device_ops_node,
+};
+
+static struct vfio_pci_vendor_driver_ops  zip_vf_mig_ops = {
+	.owner		= THIS_MODULE,
+	.name		= "hisi_zip",
+	.probe		= acc_vf_probe,
+	.remove		= acc_vf_remove,
+	.device_ops	= &acc_vf_device_ops_node,
+};
+
+static int __init acc_vf_module_init(void)
+{
+	__vfio_pci_register_vendor_driver(&sec_vf_mig_ops);
+
+	__vfio_pci_register_vendor_driver(&hpre_vf_mig_ops);
+
+	__vfio_pci_register_vendor_driver(&zip_vf_mig_ops);
+
+	return 0;
+};
+
+static void __exit acc_vf_module_exit(void)
+{
+	vfio_pci_unregister_vendor_driver(&acc_vf_device_ops_node);
+};
+module_init(acc_vf_module_init);
+module_exit(acc_vf_module_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Longfang Liu <liulongfang@huawei.com>");
+MODULE_DESCRIPTION("HiSilicon Accelerator VF live migration driver");
\ No newline at end of file
diff --git a/KAEKernelDriver/KAEKernelDriver-OLK-5.4/hisilicon/migration/acc_vf_migration.h b/KAEKernelDriver/KAEKernelDriver-OLK-5.4/hisilicon/migration/acc_vf_migration.h
new file mode 100644
index 0000000..1fdcba0
--- /dev/null
+++ b/KAEKernelDriver/KAEKernelDriver-OLK-5.4/hisilicon/migration/acc_vf_migration.h
@@ -0,0 +1,242 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2021 HiSilicon Limited. */
+
+#ifndef ACC_MIG_H
+#define ACC_MIG_H
+
+#include <linux/mdev.h>
+#include <linux/pci.h>
+#include "../../include_linux/vfio.h"
+
+#include "../hisi_acc_qm.h"
+
+#define VFIO_PCI_OFFSET_SHIFT   40
+#define VFIO_PCI_OFFSET_TO_INDEX(off)   ((off) >> VFIO_PCI_OFFSET_SHIFT)
+#define VFIO_PCI_INDEX_TO_OFFSET(index)	((u64)(index) << VFIO_PCI_OFFSET_SHIFT)
+#define VFIO_PCI_OFFSET_MASK    (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1)
+
+#define MIGRATION_REGION_SZ (sizeof(struct acc_vf_data) + \
+			      sizeof(struct vfio_device_migration_info))
+#define VFIO_DEV_DBG_LEN		256
+#define VFIO_DBG_LOG_LEN		16
+#define VFIO_DEVFN_MASK		0xFF
+
+#define PCI_BAR_2			2
+#define PCI_BAR_4			4
+#define POLL_PERIOD			10
+#define POLL_TIMEOUT			1000
+#define QM_CACHE_WB_START		0x204
+#define QM_CACHE_WB_DONE		0x208
+#define QM_MB_CMD_PAUSE_QM		0xe
+#define QM_ABNORMAL_INT_STATUS	0x100008
+#define QM_IFC_INT_STATUS		0x0028
+#define SEC_CORE_INT_STATUS		0x301008
+#define HPRE_HAC_INT_STATUS		0x301800
+#define HZIP_CORE_INT_STATUS		0x3010AC
+
+#define QM_VFT_CFG_RDY			0x10006c
+#define QM_VFT_CFG_OP_WR		0x100058
+#define QM_VFT_CFG_TYPE			0x10005c
+#define QM_VFT_CFG			0x100060
+#define QM_VFT_CFG_OP_ENABLE		0x100054
+#define QM_VFT_CFG_DATA_L		0x100064
+#define QM_VFT_CFG_DATA_H		0x100068
+
+#define ERROR_CHECK_TIMEOUT		100
+#define CHECK_DELAY_TIME		100
+
+#define QM_SQC_VFT_BASE_SHIFT_V2	28
+#define QM_SQC_VFT_BASE_MASK_V2	GENMASK(15, 0)
+#define QM_SQC_VFT_NUM_SHIFT_V2	45
+#define QM_SQC_VFT_NUM_MASK_V2	GENMASK(9, 0)
+
+/* mailbox */
+#define QM_MB_CMD_SQC_BT		0x4
+#define QM_MB_CMD_CQC_BT		0x5
+#define QM_MB_CMD_SQC_VFT_V2		0x6
+
+#define QM_MB_CMD_SEND_BASE		0x300
+#define QM_MB_BUSY_SHIFT		13
+#define QM_MB_OP_SHIFT			14
+#define QM_MB_CMD_DATA_ADDR_L		0x304
+#define QM_MB_CMD_DATA_ADDR_H		0x308
+#define QM_MB_MAX_WAIT_CNT		6000
+
+/* doorbell */
+#define QM_DOORBELL_CMD_SQ		0
+#define QM_DOORBELL_CMD_CQ		1
+#define QM_DOORBELL_SQ_CQ_BASE_V2	0x1000
+#define QM_DOORBELL_EQ_AEQ_BASE_V2	0x2000
+#define QM_DB_CMD_SHIFT_V2		12
+#define QM_DB_RAND_SHIFT_V2		16
+#define QM_DB_INDEX_SHIFT_V2		32
+#define QM_DB_PRIORITY_SHIFT_V2	48
+
+/* RW regs */
+#define QM_REGS_MAX_LEN		7
+#define QM_REG_ADDR_OFFSET		0x0004
+
+#define QM_XQC_ADDR_OFFSET		32U
+#define QM_VF_AEQ_INT_MASK		0x0004
+#define QM_VF_EQ_INT_MASK		0x000c
+#define QM_IFC_INT_SOURCE_V		0x0020
+#define QM_IFC_INT_MASK		0x0024
+#define QM_IFC_INT_SET_V		0x002c
+#define QM_QUE_ISO_CFG_V		0x0030
+#define QM_PAGE_SIZE		0x0034
+
+#define QM_EQC_DW0		0X8000
+#define QM_AEQC_DW0		0X8020
+
+struct qm_mailbox {
+	__le16 w0;
+	__le16 queue_num;
+	__le32 base_l;
+	__le32 base_h;
+	__le32 rsvd;
+};
+
+enum acc_type {
+	HISI_SEC = 0x1,
+	HISI_HPRE = 0x2,
+	HISI_ZIP = 0x3,
+};
+
+struct vf_acc_type {
+	const char *name;
+	u32 type;
+};
+
+static struct vf_acc_type vf_acc_types[] = {
+	{"hisi_sec2", HISI_SEC},
+	{"hisi_hpre", HISI_HPRE},
+	{"hisi_zip", HISI_ZIP},
+};
+
+enum mig_debug_cmd {
+	STATE_SAVE,
+	STATE_RESUME,
+	MB_TEST,
+	MIG_DATA_DUMP,
+	MIG_DEV_SHOW,
+};
+
+static const char * const vf_dev_state[] = {
+	"Stop",
+	"Running",
+	"Saving",
+	"Running & Saving",
+	"Resuming",
+};
+
+#define QM_MATCH_SIZE		32L
+struct acc_vf_data {
+	/* QM match information */
+	u32 qp_num;
+	u32 acc_type;
+	u32 que_iso_cfg;
+	u32 qp_base;
+	/* QM reserved 4 match information */
+	u32 qm_rsv_state[4];
+
+	/* QM RW regs */
+	u32 aeq_int_mask;
+	u32 eq_int_mask;
+	u32 ifc_int_source;
+	u32 ifc_int_mask;
+	u32 ifc_int_set;
+	u32 page_size;
+	u32 vf_state;
+
+	/*
+	 * QM_VF_MB has 4 regs don't need to migration
+	 * mailbox regs writeback value will cause
+	 * hardware to perform command operations
+	 */
+
+	/* QM_EQC_DW has 7 regs */
+	u32 qm_eqc_dw[7];
+
+	/* QM_AEQC_DW has 7 regs */
+	u32 qm_aeqc_dw[7];
+
+	/* QM reserved 5 regs */
+	u32 qm_rsv_regs[5];
+
+	/* qm memory init information */
+	dma_addr_t eqe_dma;
+	dma_addr_t aeqe_dma;
+	dma_addr_t sqc_dma;
+	dma_addr_t cqc_dma;
+};
+
+struct acc_vf_remap_irq_ctx {
+	struct eventfd_ctx	*trigger;
+	struct virqfd		*sync;
+	atomic_t		cnt;
+	wait_queue_head_t	waitq;
+	bool			init;
+};
+
+struct acc_vf_migration {
+	__u32				vf_vendor;
+	__u32				vf_device;
+	__u32				handle;
+	struct pci_dev			*pf_dev;
+	struct pci_dev			*vf_dev;
+	struct hisi_qm			*pf_qm;
+	struct hisi_qm			*vf_qm;
+	int				vf_id;
+	int				refcnt;
+	u8				acc_type;
+	bool				mig_ignore;
+	struct mutex			reflock;
+
+	struct vfio_device_migration_info *mig_ctl;
+	struct acc_vf_data		*vf_data;
+	bool				in_dirty_track;
+	struct acc_vf_remap_irq_ctx	remap_irq_ctx;
+	struct acc_vf_region		*regions;
+	int				num_regions;
+	struct dentry			*debug_root;
+};
+
+struct acc_vf_region_ops {
+	int	(*rw)(struct acc_vf_migration *acc_vf_dev,
+		      char __user *buf, size_t count,
+		      loff_t *ppos, bool iswrite);
+	void	(*release)(struct acc_vf_migration *acc_vf_dev,
+			   struct acc_vf_region *region);
+	int	(*mmap)(struct acc_vf_migration *acc_vf_dev,
+			struct acc_vf_region *region,
+			struct vm_area_struct *vma);
+	int	(*add_cap)(struct acc_vf_migration *acc_vf_dev,
+			   struct acc_vf_region *region,
+			   struct vfio_info_cap *caps);
+};
+
+struct acc_vf_region {
+	u32				type;
+	u32				subtype;
+	size_t				size;
+	u32				flags;
+	const struct acc_vf_region_ops	*ops;
+	void				*data;
+};
+
+struct acc_vf_irqops {
+	int (*set_irqs)(struct acc_vf_migration *acc_vf_dev,
+			u32 flags, unsigned int index,
+			unsigned int start, unsigned int count,
+			void *data);
+};
+
+struct acc_vf_irq {
+	u32	type;
+	u32	subtype;
+	u32	flags;
+	u32	count;
+	const struct acc_vf_irqops *ops;
+};
+
+#endif /* ACC_MIG_H */
\ No newline at end of file
diff --git a/KAEKernelDriver/KAEKernelDriver-OLK-5.4/include_linux/vfio.h b/KAEKernelDriver/KAEKernelDriver-OLK-5.4/include_linux/vfio.h
new file mode 100644
index 0000000..0b6cda3
--- /dev/null
+++ b/KAEKernelDriver/KAEKernelDriver-OLK-5.4/include_linux/vfio.h
@@ -0,0 +1,298 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * VFIO API definition
+ *
+ * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
+ *     Author: Alex Williamson <alex.williamson@redhat.com>
+ */
+#ifndef VFIO_H
+#define VFIO_H
+
+
+#include <linux/iommu.h>
+#include <linux/mm.h>
+#include <linux/workqueue.h>
+#include <linux/poll.h>
+#include "../include_uapi_linux/vfio.h"
+
+#ifndef KABI_EXTEND
+#define KABI_EXTEND(_new)			_new;
+#endif
+
+struct vfio_device {
+	struct device *dev;
+	const struct vfio_device_ops *ops;
+	struct vfio_group *group;
+
+	/* Members below here are private, not for driver use */
+	refcount_t refcount;
+	struct completion comp;
+	struct list_head group_next;
+	void *device_data;
+};
+
+/**
+ * struct vfio_device_ops - VFIO bus driver device callbacks
+ *
+ * @open: Called when userspace creates new file descriptor for device
+ * @release: Called when userspace releases file descriptor for device
+ * @read: Perform read(2) on device file descriptor
+ * @write: Perform write(2) on device file descriptor
+ * @ioctl: Perform ioctl(2) on device file descriptor, supporting VFIO_DEVICE_*
+ *         operations documented below
+ * @mmap: Perform mmap(2) on a region of the device file descriptor
+ * @request: Request for the bus driver to release the device
+ * @match: Optional device name match callback (return: 0 for no-match, >0 for
+ *         match, -errno for abort (ex. match with insufficient or incorrect
+ *         additional args)
+ */
+struct vfio_device_ops {
+	char	*name;
+	int	(*open)(void *device_data);
+	void	(*release)(void *device_data);
+	ssize_t	(*read)(void *device_data, char __user *buf,
+			size_t count, loff_t *ppos);
+	ssize_t	(*write)(void *device_data, const char __user *buf,
+			 size_t count, loff_t *size);
+	long	(*ioctl)(void *device_data, unsigned int cmd,
+			 unsigned long arg);
+	int	(*mmap)(void *device_data, struct vm_area_struct *vma);
+	void	(*request)(void *device_data, unsigned int count);
+	int	(*match)(void *device_data, char *buf);
+};
+
+extern struct iommu_group *vfio_iommu_group_get(struct device *dev);
+extern void vfio_iommu_group_put(struct iommu_group *group, struct device *dev);
+
+void vfio_init_group_dev(struct vfio_device *device, struct device *dev,
+			 const struct vfio_device_ops *ops, void *device_data);
+int vfio_register_group_dev(struct vfio_device *device);
+extern int vfio_add_group_dev(struct device *dev,
+			      const struct vfio_device_ops *ops,
+			      void *device_data);
+
+extern void *vfio_del_group_dev(struct device *dev);
+void vfio_unregister_group_dev(struct vfio_device *device);
+extern struct vfio_device *vfio_device_get_from_dev(struct device *dev);
+extern void vfio_device_put(struct vfio_device *device);
+extern void *vfio_device_data(struct vfio_device *device);
+
+/**
+ * struct vfio_iommu_driver_ops - VFIO IOMMU driver callbacks
+ */
+struct vfio_iommu_driver_ops {
+	char		*name;
+	struct module	*owner;
+	void		*(*open)(unsigned long arg);
+	void		(*release)(void *iommu_data);
+	ssize_t		(*read)(void *iommu_data, char __user *buf,
+				size_t count, loff_t *ppos);
+	ssize_t		(*write)(void *iommu_data, const char __user *buf,
+				 size_t count, loff_t *size);
+	long		(*ioctl)(void *iommu_data, unsigned int cmd,
+				 unsigned long arg);
+	int		(*mmap)(void *iommu_data, struct vm_area_struct *vma);
+	int		(*attach_group)(void *iommu_data,
+					struct iommu_group *group);
+	void		(*detach_group)(void *iommu_data,
+					struct iommu_group *group);
+	int		(*pin_pages)(void *iommu_data,
+				     struct iommu_group *group,
+				     unsigned long *user_pfn,
+				     int npage, int prot,
+				     unsigned long *phys_pfn);
+	int		(*unpin_pages)(void *iommu_data,
+				       unsigned long *user_pfn, int npage);
+	int		(*register_notifier)(void *iommu_data,
+					     unsigned long *events,
+					     struct notifier_block *nb);
+	int		(*unregister_notifier)(void *iommu_data,
+					       struct notifier_block *nb);
+	int		(*dma_rw)(void *iommu_data, dma_addr_t user_iova,
+				  void *data, size_t count, bool write);
+	KABI_EXTEND(struct iommu_domain *(*group_iommu_domain)(void *iommu_data,
+						               struct iommu_group *group))
+};
+
+extern int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops);
+
+extern void vfio_unregister_iommu_driver(
+				const struct vfio_iommu_driver_ops *ops);
+
+/*
+ * External user API
+ */
+extern struct vfio_group *vfio_group_get_external_user(struct file *filep);
+extern void vfio_group_put_external_user(struct vfio_group *group);
+extern struct vfio_group *vfio_group_get_external_user_from_dev(struct device
+								*dev);
+extern bool vfio_external_group_match_file(struct vfio_group *group,
+					   struct file *filep);
+extern int vfio_external_user_iommu_id(struct vfio_group *group);
+extern long vfio_external_check_extension(struct vfio_group *group,
+					  unsigned long arg);
+
+#define VFIO_PIN_PAGES_MAX_ENTRIES	(PAGE_SIZE/sizeof(unsigned long))
+
+extern int vfio_pin_pages(struct device *dev, unsigned long *user_pfn,
+			  int npage, int prot, unsigned long *phys_pfn);
+extern int vfio_unpin_pages(struct device *dev, unsigned long *user_pfn,
+			    int npage);
+
+extern int vfio_group_pin_pages(struct vfio_group *group,
+				unsigned long *user_iova_pfn, int npage,
+				int prot, unsigned long *phys_pfn);
+extern int vfio_group_unpin_pages(struct vfio_group *group,
+				  unsigned long *user_iova_pfn, int npage);
+
+extern int vfio_dma_rw(struct vfio_group *group, dma_addr_t user_iova,
+		       void *data, size_t len, bool write);
+
+extern struct iommu_domain *vfio_group_iommu_domain(struct vfio_group *group);
+
+/* each type has independent events */
+enum vfio_notify_type {
+	VFIO_IOMMU_NOTIFY = 0,
+	VFIO_GROUP_NOTIFY = 1,
+};
+
+/* events for VFIO_IOMMU_NOTIFY */
+#define VFIO_IOMMU_NOTIFY_DMA_UNMAP	BIT(0)
+
+/* events for VFIO_GROUP_NOTIFY */
+#define VFIO_GROUP_NOTIFY_SET_KVM	BIT(0)
+
+extern int vfio_register_notifier(struct device *dev,
+				  enum vfio_notify_type type,
+				  unsigned long *required_events,
+				  struct notifier_block *nb);
+extern int vfio_unregister_notifier(struct device *dev,
+				    enum vfio_notify_type type,
+				    struct notifier_block *nb);
+
+struct kvm;
+extern void vfio_group_set_kvm(struct vfio_group *group, struct kvm *kvm);
+
+/*
+ * Sub-module helpers
+ */
+struct vfio_info_cap {
+	struct vfio_info_cap_header *buf;
+	size_t size;
+};
+extern struct vfio_info_cap_header *vfio_info_cap_add(
+		struct vfio_info_cap *caps, size_t size, u16 id, u16 version);
+extern void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset);
+
+extern int vfio_info_add_capability(struct vfio_info_cap *caps,
+				    struct vfio_info_cap_header *cap,
+				    size_t size);
+
+extern int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr,
+					      int num_irqs, int max_irq_type,
+					      size_t *data_size);
+
+struct pci_dev;
+#if IS_ENABLED(CONFIG_VFIO_SPAPR_EEH)
+extern void vfio_spapr_pci_eeh_open(struct pci_dev *pdev);
+extern void vfio_spapr_pci_eeh_release(struct pci_dev *pdev);
+extern long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
+				       unsigned int cmd,
+				       unsigned long arg);
+#else
+static inline void vfio_spapr_pci_eeh_open(struct pci_dev *pdev)
+{
+}
+
+static inline void vfio_spapr_pci_eeh_release(struct pci_dev *pdev)
+{
+}
+
+static inline long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
+					      unsigned int cmd,
+					      unsigned long arg)
+{
+	return -ENOTTY;
+}
+#endif /* CONFIG_VFIO_SPAPR_EEH */
+
+/*
+ * IRQfd - generic
+ */
+struct virqfd {
+	void			*opaque;
+	struct eventfd_ctx	*eventfd;
+	int			(*handler)(void *, void *);
+	void			(*thread)(void *, void *);
+	void			*data;
+	struct work_struct	inject;
+	wait_queue_entry_t		wait;
+	poll_table		pt;
+	struct work_struct	shutdown;
+	struct work_struct	flush_inject;
+	struct virqfd		**pvirqfd;
+};
+
+extern int vfio_virqfd_enable(void *opaque,
+			      int (*handler)(void *, void *),
+			      void (*thread)(void *, void *),
+			      void *data, struct virqfd **pvirqfd, int fd);
+extern void vfio_virqfd_disable(struct virqfd **pvirqfd);
+void vfio_virqfd_flush_thread(struct virqfd **pvirqfd);
+
+extern int vfio_pci_num_regions(void *device_data);
+extern struct pci_dev *vfio_pci_pdev(void *device_data);
+extern long vfio_pci_ioctl(void *device_data,
+			  unsigned int cmd, unsigned long arg);
+extern ssize_t vfio_pci_read(void *device_data, char __user *buf,
+			     size_t count, loff_t *ppos);
+extern ssize_t vfio_pci_write(void *device_data, const char __user *buf,
+			      size_t count, loff_t *ppos);
+extern int vfio_pci_mmap(void *device_data, struct vm_area_struct *vma);
+extern void vfio_pci_request(void *device_data, unsigned int count);
+extern int vfio_pci_open(void *device_data);
+extern void vfio_pci_release(void *device_data);
+extern void *vfio_pci_vendor_data(void *device_data);
+extern int vfio_pci_set_vendor_regions(void *device_data,
+					int num_vendor_regions);
+
+struct vfio_pci_vendor_driver_ops {
+	char			*name;
+	struct module		*owner;
+	/* Used to match device */
+	unsigned short		vendor;
+	unsigned short		device;
+	void			*(*probe)(struct pci_dev *pdev);
+	void			(*remove)(void *vendor_data);
+	struct vfio_device_ops *device_ops;
+};
+int __vfio_pci_register_vendor_driver(struct vfio_pci_vendor_driver_ops *ops);
+void vfio_pci_unregister_vendor_driver(struct vfio_pci_vendor_driver_ops *ops);
+
+#define vfio_pci_register_vendor_driver(__name, __probe, __remove,	\
+					__device_ops)			\
+static struct vfio_pci_vendor_driver_ops  __ops ## _node = {		\
+	.owner		= THIS_MODULE,					\
+	.name		= __name,					\
+	.probe		= __probe,					\
+	.remove		= __remove,					\
+	.device_ops	= __device_ops,					\
+};									\
+__vfio_pci_register_vendor_driver(&__ops ## _node)
+
+#define module_vfio_pci_register_vendor_handler(name, probe, remove,	\
+						device_ops)		\
+static int __init device_ops ## _module_init(void)			\
+{									\
+	vfio_pci_register_vendor_driver(name, probe, remove,		\
+					device_ops);			\
+	return 0;							\
+};									\
+static void __exit device_ops ## _module_exit(void)			\
+{									\
+	vfio_pci_unregister_vendor_driver(device_ops);			\
+};									\
+module_init(device_ops ## _module_init);				\
+module_exit(device_ops ## _module_exit)
+
+#endif /* VFIO_H */
\ No newline at end of file
diff --git a/KAEKernelDriver/KAEKernelDriver-OLK-5.4/include_uapi_linux/vfio.h b/KAEKernelDriver/KAEKernelDriver-OLK-5.4/include_uapi_linux/vfio.h
new file mode 100644
index 0000000..52658db
--- /dev/null
+++ b/KAEKernelDriver/KAEKernelDriver-OLK-5.4/include_uapi_linux/vfio.h
@@ -0,0 +1,1444 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * VFIO API definition
+ *
+ * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
+ *     Author: Alex Williamson <alex.williamson@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef _UAPIVFIO_H
+#define _UAPIVFIO_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+#define VFIO_API_VERSION	0
+
+
+/* Kernel & User level defines for VFIO IOCTLs. */
+
+/* Extensions */
+
+#define VFIO_TYPE1_IOMMU		1
+#define VFIO_SPAPR_TCE_IOMMU		2
+#define VFIO_TYPE1v2_IOMMU		3
+/*
+ * IOMMU enforces DMA cache coherence (ex. PCIe NoSnoop stripping).  This
+ * capability is subject to change as groups are added or removed.
+ */
+#define VFIO_DMA_CC_IOMMU		4
+
+/* Check if EEH is supported */
+#define VFIO_EEH			5
+
+/* Two-stage IOMMU */
+#define VFIO_TYPE1_NESTING_IOMMU	6	/* Implies v2 */
+
+#define VFIO_SPAPR_TCE_v2_IOMMU		7
+
+/*
+ * The No-IOMMU IOMMU offers no translation or isolation for devices and
+ * supports no ioctls outside of VFIO_CHECK_EXTENSION.  Use of VFIO's No-IOMMU
+ * code will taint the host kernel and should be used with extreme caution.
+ */
+#define VFIO_NOIOMMU_IOMMU		8
+
+/*
+ * The vfio_iommu driver may support user clears dirty log manually, which means
+ * dirty log can be requested to not cleared automatically after dirty log is
+ * copied to userspace, it's user's duty to clear dirty log.
+ *
+ * Note: please refer to VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP_NOCLEAR and
+ * VFIO_IOMMU_DIRTY_PAGES_FLAG_CLEAR_BITMAP.
+ */
+#define VFIO_DIRTY_LOG_MANUAL_CLEAR	11
+
+/*
+ * The IOCTL interface is designed for extensibility by embedding the
+ * structure length (argsz) and flags into structures passed between
+ * kernel and userspace.  We therefore use the _IO() macro for these
+ * defines to avoid implicitly embedding a size into the ioctl request.
+ * As structure fields are added, argsz will increase to match and flag
+ * bits will be defined to indicate additional fields with valid data.
+ * It's *always* the caller's responsibility to indicate the size of
+ * the structure passed by setting argsz appropriately.
+ */
+
+#define VFIO_TYPE	(';')
+#define VFIO_BASE	100
+
+/*
+ * For extension of INFO ioctls, VFIO makes use of a capability chain
+ * designed after PCI/e capabilities.  A flag bit indicates whether
+ * this capability chain is supported and a field defined in the fixed
+ * structure defines the offset of the first capability in the chain.
+ * This field is only valid when the corresponding bit in the flags
+ * bitmap is set.  This offset field is relative to the start of the
+ * INFO buffer, as is the next field within each capability header.
+ * The id within the header is a shared address space per INFO ioctl,
+ * while the version field is specific to the capability id.  The
+ * contents following the header are specific to the capability id.
+ */
+struct vfio_info_cap_header {
+	__u16	id;		/* Identifies capability */
+	__u16	version;	/* Version specific to the capability ID */
+	__u32	next;		/* Offset of next capability */
+};
+
+/*
+ * Callers of INFO ioctls passing insufficiently sized buffers will see
+ * the capability chain flag bit set, a zero value for the first capability
+ * offset (if available within the provided argsz), and argsz will be
+ * updated to report the necessary buffer size.  For compatibility, the
+ * INFO ioctl will not report error in this case, but the capability chain
+ * will not be available.
+ */
+
+/* -------- IOCTLs for VFIO file descriptor (/dev/vfio/vfio) -------- */
+
+/**
+ * VFIO_GET_API_VERSION - _IO(VFIO_TYPE, VFIO_BASE + 0)
+ *
+ * Report the version of the VFIO API.  This allows us to bump the entire
+ * API version should we later need to add or change features in incompatible
+ * ways.
+ * Return: VFIO_API_VERSION
+ * Availability: Always
+ */
+#define VFIO_GET_API_VERSION		_IO(VFIO_TYPE, VFIO_BASE + 0)
+
+/**
+ * VFIO_CHECK_EXTENSION - _IOW(VFIO_TYPE, VFIO_BASE + 1, __u32)
+ *
+ * Check whether an extension is supported.
+ * Return: 0 if not supported, 1 (or some other positive integer) if supported.
+ * Availability: Always
+ */
+#define VFIO_CHECK_EXTENSION		_IO(VFIO_TYPE, VFIO_BASE + 1)
+
+/**
+ * VFIO_SET_IOMMU - _IOW(VFIO_TYPE, VFIO_BASE + 2, __s32)
+ *
+ * Set the iommu to the given type.  The type must be supported by an
+ * iommu driver as verified by calling CHECK_EXTENSION using the same
+ * type.  A group must be set to this file descriptor before this
+ * ioctl is available.  The IOMMU interfaces enabled by this call are
+ * specific to the value set.
+ * Return: 0 on success, -errno on failure
+ * Availability: When VFIO group attached
+ */
+#define VFIO_SET_IOMMU			_IO(VFIO_TYPE, VFIO_BASE + 2)
+
+/* -------- IOCTLs for GROUP file descriptors (/dev/vfio/$GROUP) -------- */
+
+/**
+ * VFIO_GROUP_GET_STATUS - _IOR(VFIO_TYPE, VFIO_BASE + 3,
+ *						struct vfio_group_status)
+ *
+ * Retrieve information about the group.  Fills in provided
+ * struct vfio_group_info.  Caller sets argsz.
+ * Return: 0 on succes, -errno on failure.
+ * Availability: Always
+ */
+struct vfio_group_status {
+	__u32	argsz;
+	__u32	flags;
+#define VFIO_GROUP_FLAGS_VIABLE		(1 << 0)
+#define VFIO_GROUP_FLAGS_CONTAINER_SET	(1 << 1)
+};
+#define VFIO_GROUP_GET_STATUS		_IO(VFIO_TYPE, VFIO_BASE + 3)
+
+/**
+ * VFIO_GROUP_SET_CONTAINER - _IOW(VFIO_TYPE, VFIO_BASE + 4, __s32)
+ *
+ * Set the container for the VFIO group to the open VFIO file
+ * descriptor provided.  Groups may only belong to a single
+ * container.  Containers may, at their discretion, support multiple
+ * groups.  Only when a container is set are all of the interfaces
+ * of the VFIO file descriptor and the VFIO group file descriptor
+ * available to the user.
+ * Return: 0 on success, -errno on failure.
+ * Availability: Always
+ */
+#define VFIO_GROUP_SET_CONTAINER	_IO(VFIO_TYPE, VFIO_BASE + 4)
+
+/**
+ * VFIO_GROUP_UNSET_CONTAINER - _IO(VFIO_TYPE, VFIO_BASE + 5)
+ *
+ * Remove the group from the attached container.  This is the
+ * opposite of the SET_CONTAINER call and returns the group to
+ * an initial state.  All device file descriptors must be released
+ * prior to calling this interface.  When removing the last group
+ * from a container, the IOMMU will be disabled and all state lost,
+ * effectively also returning the VFIO file descriptor to an initial
+ * state.
+ * Return: 0 on success, -errno on failure.
+ * Availability: When attached to container
+ */
+#define VFIO_GROUP_UNSET_CONTAINER	_IO(VFIO_TYPE, VFIO_BASE + 5)
+
+/**
+ * VFIO_GROUP_GET_DEVICE_FD - _IOW(VFIO_TYPE, VFIO_BASE + 6, char)
+ *
+ * Return a new file descriptor for the device object described by
+ * the provided string.  The string should match a device listed in
+ * the devices subdirectory of the IOMMU group sysfs entry.  The
+ * group containing the device must already be added to this context.
+ * Return: new file descriptor on success, -errno on failure.
+ * Availability: When attached to container
+ */
+#define VFIO_GROUP_GET_DEVICE_FD	_IO(VFIO_TYPE, VFIO_BASE + 6)
+
+/* --------------- IOCTLs for DEVICE file descriptors --------------- */
+
+/**
+ * VFIO_DEVICE_GET_INFO - _IOR(VFIO_TYPE, VFIO_BASE + 7,
+ *						struct vfio_device_info)
+ *
+ * Retrieve information about the device.  Fills in provided
+ * struct vfio_device_info.  Caller sets argsz.
+ * Return: 0 on success, -errno on failure.
+ */
+struct vfio_device_info {
+	__u32	argsz;
+	__u32	flags;
+#define VFIO_DEVICE_FLAGS_RESET	(1 << 0)	/* Device supports reset */
+#define VFIO_DEVICE_FLAGS_PCI	(1 << 1)	/* vfio-pci device */
+#define VFIO_DEVICE_FLAGS_PLATFORM (1 << 2)	/* vfio-platform device */
+#define VFIO_DEVICE_FLAGS_AMBA  (1 << 3)	/* vfio-amba device */
+#define VFIO_DEVICE_FLAGS_CCW	(1 << 4)	/* vfio-ccw device */
+#define VFIO_DEVICE_FLAGS_AP	(1 << 5)	/* vfio-ap device */
+#define VFIO_DEVICE_FLAGS_FSL_MC (1 << 6)	/* vfio-fsl-mc device */
+#define VFIO_DEVICE_FLAGS_CAPS	(1 << 7)	/* Info supports caps */
+	__u32	num_regions;	/* Max region index + 1 */
+	__u32	num_irqs;	/* Max IRQ index + 1 */
+	__u32   cap_offset;	/* Offset within info struct of first cap */
+};
+#define VFIO_DEVICE_GET_INFO		_IO(VFIO_TYPE, VFIO_BASE + 7)
+
+/*
+ * Vendor driver using Mediated device framework should provide device_api
+ * attribute in supported type attribute groups. Device API string should be one
+ * of the following corresponding to device flags in vfio_device_info structure.
+ */
+
+#define VFIO_DEVICE_API_PCI_STRING		"vfio-pci"
+#define VFIO_DEVICE_API_PLATFORM_STRING		"vfio-platform"
+#define VFIO_DEVICE_API_AMBA_STRING		"vfio-amba"
+#define VFIO_DEVICE_API_CCW_STRING		"vfio-ccw"
+#define VFIO_DEVICE_API_AP_STRING		"vfio-ap"
+
+/*
+ * The following capabilities are unique to s390 zPCI devices.  Their contents
+ * are further-defined in vfio_zdev.h
+ */
+#define VFIO_DEVICE_INFO_CAP_ZPCI_BASE		1
+#define VFIO_DEVICE_INFO_CAP_ZPCI_GROUP		2
+#define VFIO_DEVICE_INFO_CAP_ZPCI_UTIL		3
+#define VFIO_DEVICE_INFO_CAP_ZPCI_PFIP		4
+
+/**
+ * VFIO_DEVICE_GET_REGION_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 8,
+ *				       struct vfio_region_info)
+ *
+ * Retrieve information about a device region.  Caller provides
+ * struct vfio_region_info with index value set.  Caller sets argsz.
+ * Implementation of region mapping is bus driver specific.  This is
+ * intended to describe MMIO, I/O port, as well as bus specific
+ * regions (ex. PCI config space).  Zero sized regions may be used
+ * to describe unimplemented regions (ex. unimplemented PCI BARs).
+ * Return: 0 on success, -errno on failure.
+ */
+struct vfio_region_info {
+	__u32	argsz;
+	__u32	flags;
+#define VFIO_REGION_INFO_FLAG_READ	(1 << 0) /* Region supports read */
+#define VFIO_REGION_INFO_FLAG_WRITE	(1 << 1) /* Region supports write */
+#define VFIO_REGION_INFO_FLAG_MMAP	(1 << 2) /* Region supports mmap */
+#define VFIO_REGION_INFO_FLAG_CAPS	(1 << 3) /* Info supports caps */
+	__u32	index;		/* Region index */
+	__u32	cap_offset;	/* Offset within info struct of first cap */
+	__u64	size;		/* Region size (bytes) */
+	__u64	offset;		/* Region offset from start of device fd */
+};
+#define VFIO_DEVICE_GET_REGION_INFO	_IO(VFIO_TYPE, VFIO_BASE + 8)
+
+/*
+ * The sparse mmap capability allows finer granularity of specifying areas
+ * within a region with mmap support.  When specified, the user should only
+ * mmap the offset ranges specified by the areas array.  mmaps outside of the
+ * areas specified may fail (such as the range covering a PCI MSI-X table) or
+ * may result in improper device behavior.
+ *
+ * The structures below define version 1 of this capability.
+ */
+#define VFIO_REGION_INFO_CAP_SPARSE_MMAP	1
+
+struct vfio_region_sparse_mmap_area {
+	__u64	offset;	/* Offset of mmap'able area within region */
+	__u64	size;	/* Size of mmap'able area */
+};
+
+struct vfio_region_info_cap_sparse_mmap {
+	struct vfio_info_cap_header header;
+	__u32	nr_areas;
+	__u32	reserved;
+	struct vfio_region_sparse_mmap_area areas[];
+};
+
+/*
+ * The device specific type capability allows regions unique to a specific
+ * device or class of devices to be exposed.  This helps solve the problem for
+ * vfio bus drivers of defining which region indexes correspond to which region
+ * on the device, without needing to resort to static indexes, as done by
+ * vfio-pci.  For instance, if we were to go back in time, we might remove
+ * VFIO_PCI_VGA_REGION_INDEX and let vfio-pci simply define that all indexes
+ * greater than or equal to VFIO_PCI_NUM_REGIONS are device specific and we'd
+ * make a "VGA" device specific type to describe the VGA access space.  This
+ * means that non-VGA devices wouldn't need to waste this index, and thus the
+ * address space associated with it due to implementation of device file
+ * descriptor offsets in vfio-pci.
+ *
+ * The current implementation is now part of the user ABI, so we can't use this
+ * for VGA, but there are other upcoming use cases, such as opregions for Intel
+ * IGD devices and framebuffers for vGPU devices.  We missed VGA, but we'll
+ * use this for future additions.
+ *
+ * The structure below defines version 1 of this capability.
+ */
+#define VFIO_REGION_INFO_CAP_TYPE	2
+
+struct vfio_region_info_cap_type {
+	struct vfio_info_cap_header header;
+	__u32 type;	/* global per bus driver */
+	__u32 subtype;	/* type specific */
+};
+
+/*
+ * List of region types, global per bus driver.
+ * If you introduce a new type, please add it here.
+ */
+
+/* PCI region type containing a PCI vendor part */
+#define VFIO_REGION_TYPE_PCI_VENDOR_TYPE	(1 << 31)
+#define VFIO_REGION_TYPE_PCI_VENDOR_MASK	(0xffff)
+#define VFIO_REGION_TYPE_GFX                    (1)
+#define VFIO_REGION_TYPE_CCW			(2)
+#define VFIO_REGION_TYPE_MIGRATION              (3)
+
+/* sub-types for VFIO_REGION_TYPE_PCI_* */
+
+/* 8086 vendor PCI sub-types */
+#define VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION	(1)
+#define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG	(2)
+#define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG	(3)
+
+/* 10de vendor PCI sub-types */
+/*
+ * NVIDIA GPU NVlink2 RAM is coherent RAM mapped onto the host address space.
+ */
+#define VFIO_REGION_SUBTYPE_NVIDIA_NVLINK2_RAM	(1)
+
+/* 1014 vendor PCI sub-types */
+/*
+ * IBM NPU NVlink2 ATSD (Address Translation Shootdown) register of NPU
+ * to do TLB invalidation on a GPU.
+ */
+#define VFIO_REGION_SUBTYPE_IBM_NVLINK2_ATSD	(1)
+
+/* sub-types for VFIO_REGION_TYPE_GFX */
+#define VFIO_REGION_SUBTYPE_GFX_EDID            (1)
+
+/**
+ * struct vfio_region_gfx_edid - EDID region layout.
+ *
+ * Set display link state and EDID blob.
+ *
+ * The EDID blob has monitor information such as brand, name, serial
+ * number, physical size, supported video modes and more.
+ *
+ * This special region allows userspace (typically qemu) set a virtual
+ * EDID for the virtual monitor, which allows a flexible display
+ * configuration.
+ *
+ * For the edid blob spec look here:
+ *    https://en.wikipedia.org/wiki/Extended_Display_Identification_Data
+ *
+ * On linux systems you can find the EDID blob in sysfs:
+ *    /sys/class/drm/${card}/${connector}/edid
+ *
+ * You can use the edid-decode ulility (comes with xorg-x11-utils) to
+ * decode the EDID blob.
+ *
+ * @edid_offset: location of the edid blob, relative to the
+ *               start of the region (readonly).
+ * @edid_max_size: max size of the edid blob (readonly).
+ * @edid_size: actual edid size (read/write).
+ * @link_state: display link state (read/write).
+ * VFIO_DEVICE_GFX_LINK_STATE_UP: Monitor is turned on.
+ * VFIO_DEVICE_GFX_LINK_STATE_DOWN: Monitor is turned off.
+ * @max_xres: max display width (0 == no limitation, readonly).
+ * @max_yres: max display height (0 == no limitation, readonly).
+ *
+ * EDID update protocol:
+ *   (1) set link-state to down.
+ *   (2) update edid blob and size.
+ *   (3) set link-state to up.
+ */
+struct vfio_region_gfx_edid {
+	__u32 edid_offset;
+	__u32 edid_max_size;
+	__u32 edid_size;
+	__u32 max_xres;
+	__u32 max_yres;
+	__u32 link_state;
+#define VFIO_DEVICE_GFX_LINK_STATE_UP    1
+#define VFIO_DEVICE_GFX_LINK_STATE_DOWN  2
+};
+
+/* sub-types for VFIO_REGION_TYPE_CCW */
+#define VFIO_REGION_SUBTYPE_CCW_ASYNC_CMD	(1)
+#define VFIO_REGION_SUBTYPE_CCW_SCHIB		(2)
+#define VFIO_REGION_SUBTYPE_CCW_CRW		(3)
+
+/* sub-types for VFIO_REGION_TYPE_MIGRATION */
+#define VFIO_REGION_SUBTYPE_MIGRATION           (1)
+
+/*
+ * The structure vfio_device_migration_info is placed at the 0th offset of
+ * the VFIO_REGION_SUBTYPE_MIGRATION region to get and set VFIO device related
+ * migration information. Field accesses from this structure are only supported
+ * at their native width and alignment. Otherwise, the result is undefined and
+ * vendor drivers should return an error.
+ *
+ * device_state: (read/write)
+ *      - The user application writes to this field to inform the vendor driver
+ *        about the device state to be transitioned to.
+ *      - The vendor driver should take the necessary actions to change the
+ *        device state. After successful transition to a given state, the
+ *        vendor driver should return success on write(device_state, state)
+ *        system call. If the device state transition fails, the vendor driver
+ *        should return an appropriate -errno for the fault condition.
+ *      - On the user application side, if the device state transition fails,
+ *	  that is, if write(device_state, state) returns an error, read
+ *	  device_state again to determine the current state of the device from
+ *	  the vendor driver.
+ *      - The vendor driver should return previous state of the device unless
+ *        the vendor driver has encountered an internal error, in which case
+ *        the vendor driver may report the device_state VFIO_DEVICE_STATE_ERROR.
+ *      - The user application must use the device reset ioctl to recover the
+ *        device from VFIO_DEVICE_STATE_ERROR state. If the device is
+ *        indicated to be in a valid device state by reading device_state, the
+ *        user application may attempt to transition the device to any valid
+ *        state reachable from the current state or terminate itself.
+ *
+ *      device_state consists of 3 bits:
+ *      - If bit 0 is set, it indicates the _RUNNING state. If bit 0 is clear,
+ *        it indicates the _STOP state. When the device state is changed to
+ *        _STOP, driver should stop the device before write() returns.
+ *      - If bit 1 is set, it indicates the _SAVING state, which means that the
+ *        driver should start gathering device state information that will be
+ *        provided to the VFIO user application to save the device's state.
+ *      - If bit 2 is set, it indicates the _RESUMING state, which means that
+ *        the driver should prepare to resume the device. Data provided through
+ *        the migration region should be used to resume the device.
+ *      Bits 3 - 31 are reserved for future use. To preserve them, the user
+ *      application should perform a read-modify-write operation on this
+ *      field when modifying the specified bits.
+ *
+ *  +------- _RESUMING
+ *  |+------ _SAVING
+ *  ||+----- _RUNNING
+ *  |||
+ *  000b => Device Stopped, not saving or resuming
+ *  001b => Device running, which is the default state
+ *  010b => Stop the device & save the device state, stop-and-copy state
+ *  011b => Device running and save the device state, pre-copy state
+ *  100b => Device stopped and the device state is resuming
+ *  101b => Invalid state
+ *  110b => Error state
+ *  111b => Invalid state
+ *
+ * State transitions:
+ *
+ *              _RESUMING  _RUNNING    Pre-copy    Stop-and-copy   _STOP
+ *                (100b)     (001b)     (011b)        (010b)       (000b)
+ * 0. Running or default state
+ *                             |
+ *
+ * 1. Normal Shutdown (optional)
+ *                             |------------------------------------->|
+ *
+ * 2. Save the state or suspend
+ *                             |------------------------->|---------->|
+ *
+ * 3. Save the state during live migration
+ *                             |----------->|------------>|---------->|
+ *
+ * 4. Resuming
+ *                  |<---------|
+ *
+ * 5. Resumed
+ *                  |--------->|
+ *
+ * 0. Default state of VFIO device is _RUNNING when the user application starts.
+ * 1. During normal shutdown of the user application, the user application may
+ *    optionally change the VFIO device state from _RUNNING to _STOP. This
+ *    transition is optional. The vendor driver must support this transition but
+ *    must not require it.
+ * 2. When the user application saves state or suspends the application, the
+ *    device state transitions from _RUNNING to stop-and-copy and then to _STOP.
+ *    On state transition from _RUNNING to stop-and-copy, driver must stop the
+ *    device, save the device state and send it to the application through the
+ *    migration region. The sequence to be followed for such transition is given
+ *    below.
+ * 3. In live migration of user application, the state transitions from _RUNNING
+ *    to pre-copy, to stop-and-copy, and to _STOP.
+ *    On state transition from _RUNNING to pre-copy, the driver should start
+ *    gathering the device state while the application is still running and send
+ *    the device state data to application through the migration region.
+ *    On state transition from pre-copy to stop-and-copy, the driver must stop
+ *    the device, save the device state and send it to the user application
+ *    through the migration region.
+ *    Vendor drivers must support the pre-copy state even for implementations
+ *    where no data is provided to the user before the stop-and-copy state. The
+ *    user must not be required to consume all migration data before the device
+ *    transitions to a new state, including the stop-and-copy state.
+ *    The sequence to be followed for above two transitions is given below.
+ * 4. To start the resuming phase, the device state should be transitioned from
+ *    the _RUNNING to the _RESUMING state.
+ *    In the _RESUMING state, the driver should use the device state data
+ *    received through the migration region to resume the device.
+ * 5. After providing saved device data to the driver, the application should
+ *    change the state from _RESUMING to _RUNNING.
+ *
+ * reserved:
+ *      Reads on this field return zero and writes are ignored.
+ *
+ * pending_bytes: (read only)
+ *      The number of pending bytes still to be migrated from the vendor driver.
+ *
+ * data_offset: (read only)
+ *      The user application should read data_offset field from the migration
+ *      region. The user application should read the device data from this
+ *      offset within the migration region during the _SAVING state or write
+ *      the device data during the _RESUMING state. See below for details of
+ *      sequence to be followed.
+ *
+ * data_size: (read/write)
+ *      The user application should read data_size to get the size in bytes of
+ *      the data copied in the migration region during the _SAVING state and
+ *      write the size in bytes of the data copied in the migration region
+ *      during the _RESUMING state.
+ *
+ * The format of the migration region is as follows:
+ *  ------------------------------------------------------------------
+ * |vfio_device_migration_info|    data section                      |
+ * |                          |     ///////////////////////////////  |
+ * ------------------------------------------------------------------
+ *   ^                              ^
+ *  offset 0-trapped part        data_offset
+ *
+ * The structure vfio_device_migration_info is always followed by the data
+ * section in the region, so data_offset will always be nonzero. The offset
+ * from where the data is copied is decided by the kernel driver. The data
+ * section can be trapped, mmapped, or partitioned, depending on how the kernel
+ * driver defines the data section. The data section partition can be defined
+ * as mapped by the sparse mmap capability. If mmapped, data_offset must be
+ * page aligned, whereas initial section which contains the
+ * vfio_device_migration_info structure, might not end at the offset, which is
+ * page aligned. The user is not required to access through mmap regardless
+ * of the capabilities of the region mmap.
+ * The vendor driver should determine whether and how to partition the data
+ * section. The vendor driver should return data_offset accordingly.
+ *
+ * The sequence to be followed while in pre-copy state and stop-and-copy state
+ * is as follows:
+ * a. Read pending_bytes, indicating the start of a new iteration to get device
+ *    data. Repeated read on pending_bytes at this stage should have no side
+ *    effects.
+ *    If pending_bytes == 0, the user application should not iterate to get data
+ *    for that device.
+ *    If pending_bytes > 0, perform the following steps.
+ * b. Read data_offset, indicating that the vendor driver should make data
+ *    available through the data section. The vendor driver should return this
+ *    read operation only after data is available from (region + data_offset)
+ *    to (region + data_offset + data_size).
+ * c. Read data_size, which is the amount of data in bytes available through
+ *    the migration region.
+ *    Read on data_offset and data_size should return the offset and size of
+ *    the current buffer if the user application reads data_offset and
+ *    data_size more than once here.
+ * d. Read data_size bytes of data from (region + data_offset) from the
+ *    migration region.
+ * e. Process the data.
+ * f. Read pending_bytes, which indicates that the data from the previous
+ *    iteration has been read. If pending_bytes > 0, go to step b.
+ *
+ * The user application can transition from the _SAVING|_RUNNING
+ * (pre-copy state) to the _SAVING (stop-and-copy) state regardless of the
+ * number of pending bytes. The user application should iterate in _SAVING
+ * (stop-and-copy) until pending_bytes is 0.
+ *
+ * The sequence to be followed while _RESUMING device state is as follows:
+ * While data for this device is available, repeat the following steps:
+ * a. Read data_offset from where the user application should write data.
+ * b. Write migration data starting at the migration region + data_offset for
+ *    the length determined by data_size from the migration source.
+ * c. Write data_size, which indicates to the vendor driver that data is
+ *    written in the migration region. Vendor driver must return this write
+ *    operations on consuming data. Vendor driver should apply the
+ *    user-provided migration region data to the device resume state.
+ *
+ * If an error occurs during the above sequences, the vendor driver can return
+ * an error code for next read() or write() operation, which will terminate the
+ * loop. The user application should then take the next necessary action, for
+ * example, failing migration or terminating the user application.
+ *
+ * For the user application, data is opaque. The user application should write
+ * data in the same order as the data is received and the data should be of
+ * same transaction size at the source.
+ */
+
+struct vfio_device_migration_info {
+	__u32 device_state;         /* VFIO device state */
+#define VFIO_DEVICE_STATE_STOP      (0)
+#define VFIO_DEVICE_STATE_RUNNING   (1 << 0)
+#define VFIO_DEVICE_STATE_SAVING    (1 << 1)
+#define VFIO_DEVICE_STATE_RESUMING  (1 << 2)
+#define VFIO_DEVICE_STATE_MASK      (VFIO_DEVICE_STATE_RUNNING | \
+				     VFIO_DEVICE_STATE_SAVING |  \
+				     VFIO_DEVICE_STATE_RESUMING)
+
+#define VFIO_DEVICE_STATE_VALID(state) \
+	(state & VFIO_DEVICE_STATE_RESUMING ? \
+	(state & VFIO_DEVICE_STATE_MASK) == VFIO_DEVICE_STATE_RESUMING : 1)
+
+#define VFIO_DEVICE_STATE_IS_ERROR(state) \
+	((state & VFIO_DEVICE_STATE_MASK) == (VFIO_DEVICE_STATE_SAVING | \
+					      VFIO_DEVICE_STATE_RESUMING))
+
+#define VFIO_DEVICE_STATE_SET_ERROR(state) \
+	((state & ~VFIO_DEVICE_STATE_MASK) | VFIO_DEVICE_SATE_SAVING | \
+					     VFIO_DEVICE_STATE_RESUMING)
+
+	__u32 reserved;
+	__u64 pending_bytes;
+	__u64 data_offset;
+	__u64 data_size;
+};
+
+/*
+ * The MSIX mappable capability informs that MSIX data of a BAR can be mmapped
+ * which allows direct access to non-MSIX registers which happened to be within
+ * the same system page.
+ *
+ * Even though the userspace gets direct access to the MSIX data, the existing
+ * VFIO_DEVICE_SET_IRQS interface must still be used for MSIX configuration.
+ */
+#define VFIO_REGION_INFO_CAP_MSIX_MAPPABLE	3
+
+/*
+ * Capability with compressed real address (aka SSA - small system address)
+ * where GPU RAM is mapped on a system bus. Used by a GPU for DMA routing
+ * and by the userspace to associate a NVLink bridge with a GPU.
+ */
+#define VFIO_REGION_INFO_CAP_NVLINK2_SSATGT	4
+
+struct vfio_region_info_cap_nvlink2_ssatgt {
+	struct vfio_info_cap_header header;
+	__u64 tgt;
+};
+
+/*
+ * Capability with an NVLink link speed. The value is read by
+ * the NVlink2 bridge driver from the bridge's "ibm,nvlink-speed"
+ * property in the device tree. The value is fixed in the hardware
+ * and failing to provide the correct value results in the link
+ * not working with no indication from the driver why.
+ */
+#define VFIO_REGION_INFO_CAP_NVLINK2_LNKSPD	5
+
+struct vfio_region_info_cap_nvlink2_lnkspd {
+	struct vfio_info_cap_header header;
+	__u32 link_speed;
+	__u32 __pad;
+};
+
+/**
+ * VFIO_DEVICE_GET_IRQ_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 9,
+ *				    struct vfio_irq_info)
+ *
+ * Retrieve information about a device IRQ.  Caller provides
+ * struct vfio_irq_info with index value set.  Caller sets argsz.
+ * Implementation of IRQ mapping is bus driver specific.  Indexes
+ * using multiple IRQs are primarily intended to support MSI-like
+ * interrupt blocks.  Zero count irq blocks may be used to describe
+ * unimplemented interrupt types.
+ *
+ * The EVENTFD flag indicates the interrupt index supports eventfd based
+ * signaling.
+ *
+ * The MASKABLE flags indicates the index supports MASK and UNMASK
+ * actions described below.
+ *
+ * AUTOMASKED indicates that after signaling, the interrupt line is
+ * automatically masked by VFIO and the user needs to unmask the line
+ * to receive new interrupts.  This is primarily intended to distinguish
+ * level triggered interrupts.
+ *
+ * The NORESIZE flag indicates that the interrupt lines within the index
+ * are setup as a set and new subindexes cannot be enabled without first
+ * disabling the entire index.  This is used for interrupts like PCI MSI
+ * and MSI-X where the driver may only use a subset of the available
+ * indexes, but VFIO needs to enable a specific number of vectors
+ * upfront.  In the case of MSI-X, where the user can enable MSI-X and
+ * then add and unmask vectors, it's up to userspace to make the decision
+ * whether to allocate the maximum supported number of vectors or tear
+ * down setup and incrementally increase the vectors as each is enabled.
+ */
+struct vfio_irq_info {
+	__u32	argsz;
+	__u32	flags;
+#define VFIO_IRQ_INFO_EVENTFD		(1 << 0)
+#define VFIO_IRQ_INFO_MASKABLE		(1 << 1)
+#define VFIO_IRQ_INFO_AUTOMASKED	(1 << 2)
+#define VFIO_IRQ_INFO_NORESIZE		(1 << 3)
+	__u32	index;		/* IRQ index */
+	__u32	count;		/* Number of IRQs within this index */
+};
+#define VFIO_DEVICE_GET_IRQ_INFO	_IO(VFIO_TYPE, VFIO_BASE + 9)
+
+/**
+ * VFIO_DEVICE_SET_IRQS - _IOW(VFIO_TYPE, VFIO_BASE + 10, struct vfio_irq_set)
+ *
+ * Set signaling, masking, and unmasking of interrupts.  Caller provides
+ * struct vfio_irq_set with all fields set.  'start' and 'count' indicate
+ * the range of subindexes being specified.
+ *
+ * The DATA flags specify the type of data provided.  If DATA_NONE, the
+ * operation performs the specified action immediately on the specified
+ * interrupt(s).  For example, to unmask AUTOMASKED interrupt [0,0]:
+ * flags = (DATA_NONE|ACTION_UNMASK), index = 0, start = 0, count = 1.
+ *
+ * DATA_BOOL allows sparse support for the same on arrays of interrupts.
+ * For example, to mask interrupts [0,1] and [0,3] (but not [0,2]):
+ * flags = (DATA_BOOL|ACTION_MASK), index = 0, start = 1, count = 3,
+ * data = {1,0,1}
+ *
+ * DATA_EVENTFD binds the specified ACTION to the provided __s32 eventfd.
+ * A value of -1 can be used to either de-assign interrupts if already
+ * assigned or skip un-assigned interrupts.  For example, to set an eventfd
+ * to be trigger for interrupts [0,0] and [0,2]:
+ * flags = (DATA_EVENTFD|ACTION_TRIGGER), index = 0, start = 0, count = 3,
+ * data = {fd1, -1, fd2}
+ * If index [0,1] is previously set, two count = 1 ioctls calls would be
+ * required to set [0,0] and [0,2] without changing [0,1].
+ *
+ * Once a signaling mechanism is set, DATA_BOOL or DATA_NONE can be used
+ * with ACTION_TRIGGER to perform kernel level interrupt loopback testing
+ * from userspace (ie. simulate hardware triggering).
+ *
+ * Setting of an event triggering mechanism to userspace for ACTION_TRIGGER
+ * enables the interrupt index for the device.  Individual subindex interrupts
+ * can be disabled using the -1 value for DATA_EVENTFD or the index can be
+ * disabled as a whole with: flags = (DATA_NONE|ACTION_TRIGGER), count = 0.
+ *
+ * Note that ACTION_[UN]MASK specify user->kernel signaling (irqfds) while
+ * ACTION_TRIGGER specifies kernel->user signaling.
+ */
+struct vfio_irq_set {
+	__u32	argsz;
+	__u32	flags;
+#define VFIO_IRQ_SET_DATA_NONE		(1 << 0) /* Data not present */
+#define VFIO_IRQ_SET_DATA_BOOL		(1 << 1) /* Data is bool (u8) */
+#define VFIO_IRQ_SET_DATA_EVENTFD	(1 << 2) /* Data is eventfd (s32) */
+#define VFIO_IRQ_SET_ACTION_MASK	(1 << 3) /* Mask interrupt */
+#define VFIO_IRQ_SET_ACTION_UNMASK	(1 << 4) /* Unmask interrupt */
+#define VFIO_IRQ_SET_ACTION_TRIGGER	(1 << 5) /* Trigger interrupt */
+	__u32	index;
+	__u32	start;
+	__u32	count;
+	__u8	data[];
+};
+#define VFIO_DEVICE_SET_IRQS		_IO(VFIO_TYPE, VFIO_BASE + 10)
+
+#define VFIO_IRQ_SET_DATA_TYPE_MASK	(VFIO_IRQ_SET_DATA_NONE | \
+					 VFIO_IRQ_SET_DATA_BOOL | \
+					 VFIO_IRQ_SET_DATA_EVENTFD)
+#define VFIO_IRQ_SET_ACTION_TYPE_MASK	(VFIO_IRQ_SET_ACTION_MASK | \
+					 VFIO_IRQ_SET_ACTION_UNMASK | \
+					 VFIO_IRQ_SET_ACTION_TRIGGER)
+/**
+ * VFIO_DEVICE_RESET - _IO(VFIO_TYPE, VFIO_BASE + 11)
+ *
+ * Reset a device.
+ */
+#define VFIO_DEVICE_RESET		_IO(VFIO_TYPE, VFIO_BASE + 11)
+
+/*
+ * The VFIO-PCI bus driver makes use of the following fixed region and
+ * IRQ index mapping.  Unimplemented regions return a size of zero.
+ * Unimplemented IRQ types return a count of zero.
+ */
+
+enum {
+	VFIO_PCI_BAR0_REGION_INDEX,
+	VFIO_PCI_BAR1_REGION_INDEX,
+	VFIO_PCI_BAR2_REGION_INDEX,
+	VFIO_PCI_BAR3_REGION_INDEX,
+	VFIO_PCI_BAR4_REGION_INDEX,
+	VFIO_PCI_BAR5_REGION_INDEX,
+	VFIO_PCI_ROM_REGION_INDEX,
+	VFIO_PCI_CONFIG_REGION_INDEX,
+	/*
+	 * Expose VGA regions defined for PCI base class 03, subclass 00.
+	 * This includes I/O port ranges 0x3b0 to 0x3bb and 0x3c0 to 0x3df
+	 * as well as the MMIO range 0xa0000 to 0xbffff.  Each implemented
+	 * range is found at it's identity mapped offset from the region
+	 * offset, for example 0x3b0 is region_info.offset + 0x3b0.  Areas
+	 * between described ranges are unimplemented.
+	 */
+	VFIO_PCI_VGA_REGION_INDEX,
+	VFIO_PCI_NUM_REGIONS = 9 /* Fixed user ABI, region indexes >=9 use */
+				 /* device specific cap to define content. */
+};
+
+enum {
+	VFIO_PCI_INTX_IRQ_INDEX,
+	VFIO_PCI_MSI_IRQ_INDEX,
+	VFIO_PCI_MSIX_IRQ_INDEX,
+	VFIO_PCI_ERR_IRQ_INDEX,
+	VFIO_PCI_REQ_IRQ_INDEX,
+	VFIO_PCI_NUM_IRQS
+};
+
+/*
+ * The vfio-ccw bus driver makes use of the following fixed region and
+ * IRQ index mapping. Unimplemented regions return a size of zero.
+ * Unimplemented IRQ types return a count of zero.
+ */
+
+enum {
+	VFIO_CCW_CONFIG_REGION_INDEX,
+	VFIO_CCW_NUM_REGIONS
+};
+
+enum {
+	VFIO_CCW_IO_IRQ_INDEX,
+	VFIO_CCW_CRW_IRQ_INDEX,
+	VFIO_CCW_NUM_IRQS
+};
+
+/**
+ * VFIO_DEVICE_GET_PCI_HOT_RESET_INFO - _IORW(VFIO_TYPE, VFIO_BASE + 12,
+ *					      struct vfio_pci_hot_reset_info)
+ *
+ * Return: 0 on success, -errno on failure:
+ *	-enospc = insufficient buffer, -enodev = unsupported for device.
+ */
+struct vfio_pci_dependent_device {
+	__u32	group_id;
+	__u16	segment;
+	__u8	bus;
+	__u8	devfn; /* Use PCI_SLOT/PCI_FUNC */
+};
+
+struct vfio_pci_hot_reset_info {
+	__u32	argsz;
+	__u32	flags;
+	__u32	count;
+	struct vfio_pci_dependent_device	devices[];
+};
+
+#define VFIO_DEVICE_GET_PCI_HOT_RESET_INFO	_IO(VFIO_TYPE, VFIO_BASE + 12)
+
+/**
+ * VFIO_DEVICE_PCI_HOT_RESET - _IOW(VFIO_TYPE, VFIO_BASE + 13,
+ *				    struct vfio_pci_hot_reset)
+ *
+ * Return: 0 on success, -errno on failure.
+ */
+struct vfio_pci_hot_reset {
+	__u32	argsz;
+	__u32	flags;
+	__u32	count;
+	__s32	group_fds[];
+};
+
+#define VFIO_DEVICE_PCI_HOT_RESET	_IO(VFIO_TYPE, VFIO_BASE + 13)
+
+/**
+ * VFIO_DEVICE_QUERY_GFX_PLANE - _IOW(VFIO_TYPE, VFIO_BASE + 14,
+ *                                    struct vfio_device_query_gfx_plane)
+ *
+ * Set the drm_plane_type and flags, then retrieve the gfx plane info.
+ *
+ * flags supported:
+ * - VFIO_GFX_PLANE_TYPE_PROBE and VFIO_GFX_PLANE_TYPE_DMABUF are set
+ *   to ask if the mdev supports dma-buf. 0 on support, -EINVAL on no
+ *   support for dma-buf.
+ * - VFIO_GFX_PLANE_TYPE_PROBE and VFIO_GFX_PLANE_TYPE_REGION are set
+ *   to ask if the mdev supports region. 0 on support, -EINVAL on no
+ *   support for region.
+ * - VFIO_GFX_PLANE_TYPE_DMABUF or VFIO_GFX_PLANE_TYPE_REGION is set
+ *   with each call to query the plane info.
+ * - Others are invalid and return -EINVAL.
+ *
+ * Note:
+ * 1. Plane could be disabled by guest. In that case, success will be
+ *    returned with zero-initialized drm_format, size, width and height
+ *    fields.
+ * 2. x_hot/y_hot is set to 0xFFFFFFFF if no hotspot information available
+ *
+ * Return: 0 on success, -errno on other failure.
+ */
+struct vfio_device_gfx_plane_info {
+	__u32 argsz;
+	__u32 flags;
+#define VFIO_GFX_PLANE_TYPE_PROBE (1 << 0)
+#define VFIO_GFX_PLANE_TYPE_DMABUF (1 << 1)
+#define VFIO_GFX_PLANE_TYPE_REGION (1 << 2)
+	/* in */
+	__u32 drm_plane_type;	/* type of plane: DRM_PLANE_TYPE_* */
+	/* out */
+	__u32 drm_format;	/* drm format of plane */
+	__u64 drm_format_mod;   /* tiled mode */
+	__u32 width;	/* width of plane */
+	__u32 height;	/* height of plane */
+	__u32 stride;	/* stride of plane */
+	__u32 size;	/* size of plane in bytes, align on page*/
+	__u32 x_pos;	/* horizontal position of cursor plane */
+	__u32 y_pos;	/* vertical position of cursor plane*/
+	__u32 x_hot;    /* horizontal position of cursor hotspot */
+	__u32 y_hot;    /* vertical position of cursor hotspot */
+	union {
+		__u32 region_index;	/* region index */
+		__u32 dmabuf_id;	/* dma-buf id */
+	};
+};
+
+#define VFIO_DEVICE_QUERY_GFX_PLANE _IO(VFIO_TYPE, VFIO_BASE + 14)
+
+/**
+ * VFIO_DEVICE_GET_GFX_DMABUF - _IOW(VFIO_TYPE, VFIO_BASE + 15, __u32)
+ *
+ * Return a new dma-buf file descriptor for an exposed guest framebuffer
+ * described by the provided dmabuf_id. The dmabuf_id is returned from VFIO_
+ * DEVICE_QUERY_GFX_PLANE as a token of the exposed guest framebuffer.
+ */
+
+#define VFIO_DEVICE_GET_GFX_DMABUF _IO(VFIO_TYPE, VFIO_BASE + 15)
+
+/**
+ * VFIO_DEVICE_IOEVENTFD - _IOW(VFIO_TYPE, VFIO_BASE + 16,
+ *                              struct vfio_device_ioeventfd)
+ *
+ * Perform a write to the device at the specified device fd offset, with
+ * the specified data and width when the provided eventfd is triggered.
+ * vfio bus drivers may not support this for all regions, for all widths,
+ * or at all.  vfio-pci currently only enables support for BAR regions,
+ * excluding the MSI-X vector table.
+ *
+ * Return: 0 on success, -errno on failure.
+ */
+struct vfio_device_ioeventfd {
+	__u32	argsz;
+	__u32	flags;
+#define VFIO_DEVICE_IOEVENTFD_8		(1 << 0) /* 1-byte write */
+#define VFIO_DEVICE_IOEVENTFD_16	(1 << 1) /* 2-byte write */
+#define VFIO_DEVICE_IOEVENTFD_32	(1 << 2) /* 4-byte write */
+#define VFIO_DEVICE_IOEVENTFD_64	(1 << 3) /* 8-byte write */
+#define VFIO_DEVICE_IOEVENTFD_SIZE_MASK	(0xf)
+	__u64	offset;			/* device fd offset of write */
+	__u64	data;			/* data to be written */
+	__s32	fd;			/* -1 for de-assignment */
+};
+
+#define VFIO_DEVICE_IOEVENTFD		_IO(VFIO_TYPE, VFIO_BASE + 16)
+
+/**
+ * VFIO_DEVICE_FEATURE - _IORW(VFIO_TYPE, VFIO_BASE + 17,
+ *			       struct vfio_device_feature)
+ *
+ * Get, set, or probe feature data of the device.  The feature is selected
+ * using the FEATURE_MASK portion of the flags field.  Support for a feature
+ * can be probed by setting both the FEATURE_MASK and PROBE bits.  A probe
+ * may optionally include the GET and/or SET bits to determine read vs write
+ * access of the feature respectively.  Probing a feature will return success
+ * if the feature is supported and all of the optionally indicated GET/SET
+ * methods are supported.  The format of the data portion of the structure is
+ * specific to the given feature.  The data portion is not required for
+ * probing.  GET and SET are mutually exclusive, except for use with PROBE.
+ *
+ * Return 0 on success, -errno on failure.
+ */
+struct vfio_device_feature {
+	__u32	argsz;
+	__u32	flags;
+#define VFIO_DEVICE_FEATURE_MASK	(0xffff) /* 16-bit feature index */
+#define VFIO_DEVICE_FEATURE_GET		(1 << 16) /* Get feature into data[] */
+#define VFIO_DEVICE_FEATURE_SET		(1 << 17) /* Set feature from data[] */
+#define VFIO_DEVICE_FEATURE_PROBE	(1 << 18) /* Probe feature support */
+	__u8	data[];
+};
+
+#define VFIO_DEVICE_FEATURE		_IO(VFIO_TYPE, VFIO_BASE + 17)
+
+/*
+ * Provide support for setting a PCI VF Token, which is used as a shared
+ * secret between PF and VF drivers.  This feature may only be set on a
+ * PCI SR-IOV PF when SR-IOV is enabled on the PF and there are no existing
+ * open VFs.  Data provided when setting this feature is a 16-byte array
+ * (__u8 b[16]), representing a UUID.
+ */
+#define VFIO_DEVICE_FEATURE_PCI_VF_TOKEN	(0)
+
+/* -------- API for Type1 VFIO IOMMU -------- */
+
+/**
+ * VFIO_IOMMU_GET_INFO - _IOR(VFIO_TYPE, VFIO_BASE + 12, struct vfio_iommu_info)
+ *
+ * Retrieve information about the IOMMU object. Fills in provided
+ * struct vfio_iommu_info. Caller sets argsz.
+ *
+ * XXX Should we do these by CHECK_EXTENSION too?
+ */
+struct vfio_iommu_type1_info {
+	__u32	argsz;
+	__u32	flags;
+#define VFIO_IOMMU_INFO_PGSIZES (1 << 0)	/* supported page sizes info */
+#define VFIO_IOMMU_INFO_CAPS	(1 << 1)	/* Info supports caps */
+	__u64	iova_pgsizes;	/* Bitmap of supported page sizes */
+	__u32   cap_offset;	/* Offset within info struct of first cap */
+};
+
+/*
+ * The IOVA capability allows to report the valid IOVA range(s)
+ * excluding any non-relaxable reserved regions exposed by
+ * devices attached to the container. Any DMA map attempt
+ * outside the valid iova range will return error.
+ *
+ * The structures below define version 1 of this capability.
+ */
+#define VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE  1
+
+struct vfio_iova_range {
+	__u64	start;
+	__u64	end;
+};
+
+struct vfio_iommu_type1_info_cap_iova_range {
+	struct	vfio_info_cap_header header;
+	__u32	nr_iovas;
+	__u32	reserved;
+	struct	vfio_iova_range iova_ranges[];
+};
+
+/*
+ * The migration capability allows to report supported features for migration.
+ *
+ * The structures below define version 1 of this capability.
+ *
+ * The existence of this capability indicates that IOMMU kernel driver supports
+ * dirty page logging.
+ *
+ * pgsize_bitmap: Kernel driver returns bitmap of supported page sizes for dirty
+ * page logging.
+ * max_dirty_bitmap_size: Kernel driver returns maximum supported dirty bitmap
+ * size in bytes that can be used by user applications when getting the dirty
+ * bitmap.
+ */
+#define VFIO_IOMMU_TYPE1_INFO_CAP_MIGRATION  2
+
+struct vfio_iommu_type1_info_cap_migration {
+	struct	vfio_info_cap_header header;
+	__u32	flags;
+	__u64	pgsize_bitmap;
+	__u64	max_dirty_bitmap_size;		/* in bytes */
+};
+
+/*
+ * The DMA available capability allows to report the current number of
+ * simultaneously outstanding DMA mappings that are allowed.
+ *
+ * The structure below defines version 1 of this capability.
+ *
+ * avail: specifies the current number of outstanding DMA mappings allowed.
+ */
+#define VFIO_IOMMU_TYPE1_INFO_DMA_AVAIL 3
+
+struct vfio_iommu_type1_info_dma_avail {
+	struct	vfio_info_cap_header header;
+	__u32	avail;
+};
+
+#define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
+
+/**
+ * VFIO_IOMMU_MAP_DMA - _IOW(VFIO_TYPE, VFIO_BASE + 13, struct vfio_dma_map)
+ *
+ * Map process virtual addresses to IO virtual addresses using the
+ * provided struct vfio_dma_map. Caller sets argsz. READ &/ WRITE required.
+ */
+struct vfio_iommu_type1_dma_map {
+	__u32	argsz;
+	__u32	flags;
+#define VFIO_DMA_MAP_FLAG_READ (1 << 0)		/* readable from device */
+#define VFIO_DMA_MAP_FLAG_WRITE (1 << 1)	/* writable from device */
+	__u64	vaddr;				/* Process virtual address */
+	__u64	iova;				/* IO virtual address */
+	__u64	size;				/* Size of mapping (bytes) */
+};
+
+#define VFIO_IOMMU_MAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 13)
+
+struct vfio_bitmap {
+	__u64        pgsize;	/* page size for bitmap in bytes */
+	__u64        size;	/* in bytes */
+	__u64 __user *data;	/* one bit per page */
+};
+
+/**
+ * VFIO_IOMMU_UNMAP_DMA - _IOWR(VFIO_TYPE, VFIO_BASE + 14,
+ *							struct vfio_dma_unmap)
+ *
+ * Unmap IO virtual addresses using the provided struct vfio_dma_unmap.
+ * Caller sets argsz.  The actual unmapped size is returned in the size
+ * field.  No guarantee is made to the user that arbitrary unmaps of iova
+ * or size different from those used in the original mapping call will
+ * succeed.
+ * VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP should be set to get the dirty bitmap
+ * before unmapping IO virtual addresses. When this flag is set, the user must
+ * provide a struct vfio_bitmap in data[]. User must provide zero-allocated
+ * memory via vfio_bitmap.data and its size in the vfio_bitmap.size field.
+ * A bit in the bitmap represents one page, of user provided page size in
+ * vfio_bitmap.pgsize field, consecutively starting from iova offset. Bit set
+ * indicates that the page at that offset from iova is dirty. A Bitmap of the
+ * pages in the range of unmapped size is returned in the user-provided
+ * vfio_bitmap.data.
+ */
+struct vfio_iommu_type1_dma_unmap {
+	__u32	argsz;
+	__u32	flags;
+#define VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP (1 << 0)
+	__u64	iova;				/* IO virtual address */
+	__u64	size;				/* Size of mapping (bytes) */
+	__u8    data[];
+};
+
+#define VFIO_IOMMU_UNMAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 14)
+
+/*
+ * IOCTLs to enable/disable IOMMU container usage.
+ * No parameters are supported.
+ */
+#define VFIO_IOMMU_ENABLE	_IO(VFIO_TYPE, VFIO_BASE + 15)
+#define VFIO_IOMMU_DISABLE	_IO(VFIO_TYPE, VFIO_BASE + 16)
+
+/**
+ * VFIO_IOMMU_DIRTY_PAGES - _IOWR(VFIO_TYPE, VFIO_BASE + 17,
+ *                                     struct vfio_iommu_type1_dirty_bitmap)
+ * IOCTL is used for dirty pages logging.
+ * Caller should set flag depending on which operation to perform, details as
+ * below:
+ *
+ * Calling the IOCTL with VFIO_IOMMU_DIRTY_PAGES_FLAG_START flag set, instructs
+ * the IOMMU driver to log pages that are dirtied or potentially dirtied by
+ * the device; designed to be used when a migration is in progress. Dirty pages
+ * are logged until logging is disabled by user application by calling the IOCTL
+ * with VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP flag.
+ *
+ * Calling the IOCTL with VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP flag set, instructs
+ * the IOMMU driver to stop logging dirtied pages.
+ *
+ * Calling the IOCTL with VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP flag set
+ * returns the dirty pages bitmap for IOMMU container for a given IOVA range.
+ * The user must specify the IOVA range and the pgsize through the structure
+ * vfio_iommu_type1_dirty_bitmap_get in the data[] portion. This interface
+ * supports getting a bitmap of the smallest supported pgsize only and can be
+ * modified in future to get a bitmap of any specified supported pgsize. The
+ * user must provide a zeroed memory area for the bitmap memory and specify its
+ * size in bitmap.size. One bit is used to represent one page consecutively
+ * starting from iova offset. The user should provide page size in bitmap.pgsize
+ * field. A bit set in the bitmap indicates that the page at that offset from
+ * iova is dirty. The caller must set argsz to a value including the size of
+ * structure vfio_iommu_type1_dirty_bitmap_get, but excluding the size of the
+ * actual bitmap. If dirty pages logging is not enabled, an error will be
+ * returned.
+ *
+ * The VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP_NOCLEAR flag is almost same as
+ * VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP, except that it requires underlying
+ * dirty bitmap is not cleared automatically. The user can clear it manually by
+ * calling the IOCTL with VFIO_IOMMU_DIRTY_PAGES_FLAG_CLEAR_BITMAP flag set.
+ *
+ * Calling the IOCTL with VFIO_IOMMU_DIRTY_PAGES_FLAG_CLEAR_BITMAP flag set,
+ * instructs the IOMMU driver to clear the dirty status of pages in a bitmap
+ * for IOMMU container for a given IOVA range. The user must specify the IOVA
+ * range, the bitmap and the pgsize through the structure
+ * vfio_iommu_type1_dirty_bitmap_get in the data[] portion. This interface
+ * supports clearing a bitmap of the smallest supported pgsize only and can be
+ * modified in future to clear a bitmap of any specified supported pgsize. The
+ * user must provide a memory area for the bitmap memory and specify its size
+ * in bitmap.size. One bit is used to represent one page consecutively starting
+ * from iova offset. The user should provide page size in bitmap.pgsize field.
+ * A bit set in the bitmap indicates that the page at that offset from iova is
+ * cleared the dirty status, and dirty tracking is re-enabled for that page. The
+ * caller must set argsz to a value including the size of structure
+ * vfio_iommu_dirty_bitmap_get, but excluing the size of the actual bitmap. If
+ * dirty pages logging is not enabled, an error will be returned. Note: user
+ * should clear dirty log before handle corresponding dirty pages.
+ *
+ * Only one of the flags _START, _STOP, _GET, _GET_NOCLEAR_, and _CLEAR may be
+ * specified at a time.
+ */
+struct vfio_iommu_type1_dirty_bitmap {
+	__u32        argsz;
+	__u32        flags;
+#define VFIO_IOMMU_DIRTY_PAGES_FLAG_START	(1 << 0)
+#define VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP	(1 << 1)
+#define VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP	(1 << 2)
+#define VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP_NOCLEAR	(1 << 3)
+#define VFIO_IOMMU_DIRTY_PAGES_FLAG_CLEAR_BITMAP	(1 << 4)
+	__u8         data[];
+};
+
+struct vfio_iommu_type1_dirty_bitmap_get {
+	__u64              iova;	/* IO virtual address */
+	__u64              size;	/* Size of iova range */
+	struct vfio_bitmap bitmap;
+};
+
+#define VFIO_IOMMU_DIRTY_PAGES             _IO(VFIO_TYPE, VFIO_BASE + 17)
+
+/*
+ * VFIO_IOMMU_BIND_PROCESS
+ *
+ * Allocate a PASID for a process address space, and use it to attach this
+ * process to all devices in the container. Devices can then tag their DMA
+ * traffic with the returned @pasid to perform transactions on the associated
+ * virtual address space. Mapping and unmapping buffers is performed by standard
+ * functions such as mmap and malloc.
+ *
+ * If flag is VFIO_IOMMU_BIND_PID, @pid contains the pid of a foreign process to
+ * bind. Otherwise the current task is bound. Given that the caller owns the
+ * device, setting this flag grants the caller read and write permissions on the
+ * entire address space of foreign process described by @pid. Therefore,
+ * permission to perform the bind operation on a foreign process is governed by
+ * the ptrace access mode PTRACE_MODE_ATTACH_REALCREDS check. See man ptrace(2)
+ * for more information.
+ *
+ * On success, VFIO writes a Process Address Space ID (PASID) into @pasid. This
+ * ID is unique to a process and can be used on all devices in the container.
+ *
+ * On fork, the child inherits the device fd and can use the bonds setup by its
+ * parent. Consequently, the child has R/W access on the address spaces bound by
+ * its parent. After an execv, the device fd is closed and the child doesn't
+ * have access to the address space anymore.
+ *
+ * To remove a bond between process and container, VFIO_IOMMU_UNBIND ioctl is
+ * issued with the same parameters. If a pid was specified in VFIO_IOMMU_BIND,
+ * it should also be present for VFIO_IOMMU_UNBIND. Otherwise unbind the current
+ * task from the container.
+ */
+struct vfio_iommu_type1_bind_process {
+	__u32	flags;
+#define VFIO_IOMMU_BIND_PID		(1 << 0)
+	__u32	pasid;
+	__s32	pid;
+};
+
+/*
+ * Only mode supported at the moment is VFIO_IOMMU_BIND_PROCESS, which takes
+ * vfio_iommu_type1_bind_process in data.
+ */
+struct vfio_iommu_type1_bind {
+	__u32	argsz;
+	__u32	flags;
+#define VFIO_IOMMU_BIND_PROCESS		(1 << 0)
+	__u8	data[];
+};
+
+/*
+ * VFIO_IOMMU_BIND - _IOWR(VFIO_TYPE, VFIO_BASE + 22, struct vfio_iommu_bind)
+ *
+ * Manage address spaces of devices in this container. Initially a TYPE1
+ * container can only have one address space, managed with
+ * VFIO_IOMMU_MAP/UNMAP_DMA.
+ *
+ * An IOMMU of type VFIO_TYPE1_NESTING_IOMMU can be managed by both MAP/UNMAP
+ * and BIND ioctls at the same time. MAP/UNMAP acts on the stage-2 (host) page
+ * tables, and BIND manages the stage-1 (guest) page tables. Other types of
+ * IOMMU may allow MAP/UNMAP and BIND to coexist, where MAP/UNMAP controls
+ * non-PASID traffic and BIND controls PASID traffic. But this depends on the
+ * underlying IOMMU architecture and isn't guaranteed.
+ *
+ * Availability of this feature depends on the device, its bus, the underlying
+ * IOMMU and the CPU architecture.
+ *
+ * returns: 0 on success, -errno on failure.
+ */
+#define VFIO_IOMMU_BIND		_IO(VFIO_TYPE, VFIO_BASE + 22)
+
+/*
+ * VFIO_IOMMU_UNBIND - _IOWR(VFIO_TYPE, VFIO_BASE + 23, struct vfio_iommu_bind)
+ *
+ * Undo what was done by the corresponding VFIO_IOMMU_BIND ioctl.
+ */
+#define VFIO_IOMMU_UNBIND	_IO(VFIO_TYPE, VFIO_BASE + 23)
+
+/* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU -------- */
+
+/*
+ * The SPAPR TCE DDW info struct provides the information about
+ * the details of Dynamic DMA window capability.
+ *
+ * @pgsizes contains a page size bitmask, 4K/64K/16M are supported.
+ * @max_dynamic_windows_supported tells the maximum number of windows
+ * which the platform can create.
+ * @levels tells the maximum number of levels in multi-level IOMMU tables;
+ * this allows splitting a table into smaller chunks which reduces
+ * the amount of physically contiguous memory required for the table.
+ */
+struct vfio_iommu_spapr_tce_ddw_info {
+	__u64 pgsizes;			/* Bitmap of supported page sizes */
+	__u32 max_dynamic_windows_supported;
+	__u32 levels;
+};
+
+/*
+ * The SPAPR TCE info struct provides the information about the PCI bus
+ * address ranges available for DMA, these values are programmed into
+ * the hardware so the guest has to know that information.
+ *
+ * The DMA 32 bit window start is an absolute PCI bus address.
+ * The IOVA address passed via map/unmap ioctls are absolute PCI bus
+ * addresses too so the window works as a filter rather than an offset
+ * for IOVA addresses.
+ *
+ * Flags supported:
+ * - VFIO_IOMMU_SPAPR_INFO_DDW: informs the userspace that dynamic DMA windows
+ *   (DDW) support is present. @ddw is only supported when DDW is present.
+ */
+struct vfio_iommu_spapr_tce_info {
+	__u32 argsz;
+	__u32 flags;
+#define VFIO_IOMMU_SPAPR_INFO_DDW	(1 << 0)	/* DDW supported */
+	__u32 dma32_window_start;	/* 32 bit window start (bytes) */
+	__u32 dma32_window_size;	/* 32 bit window size (bytes) */
+	struct vfio_iommu_spapr_tce_ddw_info ddw;
+};
+
+#define VFIO_IOMMU_SPAPR_TCE_GET_INFO	_IO(VFIO_TYPE, VFIO_BASE + 12)
+
+/*
+ * EEH PE operation struct provides ways to:
+ * - enable/disable EEH functionality;
+ * - unfreeze IO/DMA for frozen PE;
+ * - read PE state;
+ * - reset PE;
+ * - configure PE;
+ * - inject EEH error.
+ */
+struct vfio_eeh_pe_err {
+	__u32 type;
+	__u32 func;
+	__u64 addr;
+	__u64 mask;
+};
+
+struct vfio_eeh_pe_op {
+	__u32 argsz;
+	__u32 flags;
+	__u32 op;
+	union {
+		struct vfio_eeh_pe_err err;
+	};
+};
+
+#define VFIO_EEH_PE_DISABLE		0	/* Disable EEH functionality */
+#define VFIO_EEH_PE_ENABLE		1	/* Enable EEH functionality  */
+#define VFIO_EEH_PE_UNFREEZE_IO		2	/* Enable IO for frozen PE   */
+#define VFIO_EEH_PE_UNFREEZE_DMA	3	/* Enable DMA for frozen PE  */
+#define VFIO_EEH_PE_GET_STATE		4	/* PE state retrieval        */
+#define  VFIO_EEH_PE_STATE_NORMAL	0	/* PE in functional state    */
+#define  VFIO_EEH_PE_STATE_RESET	1	/* PE reset in progress      */
+#define  VFIO_EEH_PE_STATE_STOPPED	2	/* Stopped DMA and IO        */
+#define  VFIO_EEH_PE_STATE_STOPPED_DMA	4	/* Stopped DMA only          */
+#define  VFIO_EEH_PE_STATE_UNAVAIL	5	/* State unavailable         */
+#define VFIO_EEH_PE_RESET_DEACTIVATE	5	/* Deassert PE reset         */
+#define VFIO_EEH_PE_RESET_HOT		6	/* Assert hot reset          */
+#define VFIO_EEH_PE_RESET_FUNDAMENTAL	7	/* Assert fundamental reset  */
+#define VFIO_EEH_PE_CONFIGURE		8	/* PE configuration          */
+#define VFIO_EEH_PE_INJECT_ERR		9	/* Inject EEH error          */
+
+#define VFIO_EEH_PE_OP			_IO(VFIO_TYPE, VFIO_BASE + 21)
+
+/**
+ * VFIO_IOMMU_SPAPR_REGISTER_MEMORY - _IOW(VFIO_TYPE, VFIO_BASE + 17, struct vfio_iommu_spapr_register_memory)
+ *
+ * Registers user space memory where DMA is allowed. It pins
+ * user pages and does the locked memory accounting so
+ * subsequent VFIO_IOMMU_MAP_DMA/VFIO_IOMMU_UNMAP_DMA calls
+ * get faster.
+ */
+struct vfio_iommu_spapr_register_memory {
+	__u32	argsz;
+	__u32	flags;
+	__u64	vaddr;				/* Process virtual address */
+	__u64	size;				/* Size of mapping (bytes) */
+};
+#define VFIO_IOMMU_SPAPR_REGISTER_MEMORY	_IO(VFIO_TYPE, VFIO_BASE + 17)
+
+/**
+ * VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY - _IOW(VFIO_TYPE, VFIO_BASE + 18, struct vfio_iommu_spapr_register_memory)
+ *
+ * Unregisters user space memory registered with
+ * VFIO_IOMMU_SPAPR_REGISTER_MEMORY.
+ * Uses vfio_iommu_spapr_register_memory for parameters.
+ */
+#define VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY	_IO(VFIO_TYPE, VFIO_BASE + 18)
+
+/**
+ * VFIO_IOMMU_SPAPR_TCE_CREATE - _IOWR(VFIO_TYPE, VFIO_BASE + 19, struct vfio_iommu_spapr_tce_create)
+ *
+ * Creates an additional TCE table and programs it (sets a new DMA window)
+ * to every IOMMU group in the container. It receives page shift, window
+ * size and number of levels in the TCE table being created.
+ *
+ * It allocates and returns an offset on a PCI bus of the new DMA window.
+ */
+struct vfio_iommu_spapr_tce_create {
+	__u32 argsz;
+	__u32 flags;
+	/* in */
+	__u32 page_shift;
+	__u32 __resv1;
+	__u64 window_size;
+	__u32 levels;
+	__u32 __resv2;
+	/* out */
+	__u64 start_addr;
+};
+#define VFIO_IOMMU_SPAPR_TCE_CREATE	_IO(VFIO_TYPE, VFIO_BASE + 19)
+
+/**
+ * VFIO_IOMMU_SPAPR_TCE_REMOVE - _IOW(VFIO_TYPE, VFIO_BASE + 20, struct vfio_iommu_spapr_tce_remove)
+ *
+ * Unprograms a TCE table from all groups in the container and destroys it.
+ * It receives a PCI bus offset as a window id.
+ */
+struct vfio_iommu_spapr_tce_remove {
+	__u32 argsz;
+	__u32 flags;
+	/* in */
+	__u64 start_addr;
+};
+#define VFIO_IOMMU_SPAPR_TCE_REMOVE	_IO(VFIO_TYPE, VFIO_BASE + 20)
+
+/* ***************************************************************** */
+
+#endif /* _UAPIVFIO_H */
diff --git a/KAEKernelDriver/KAEKernelDriver-OLK-6.6/Makefile b/KAEKernelDriver/KAEKernelDriver-OLK-6.6/Makefile
index 9328cee..e5c78b6 100644
--- a/KAEKernelDriver/KAEKernelDriver-OLK-6.6/Makefile
+++ b/KAEKernelDriver/KAEKernelDriver-OLK-6.6/Makefile
@@ -1,10 +1,10 @@
 KERNEL_VERSION_BY_BUILDENV :=`rpm -q --qf '%{VERSION}-%{RELEASE}.%{ARCH}\n' kernel-devel | head -n 1`
 KERNEL_PATH := /lib/modules/$(KERNEL_VERSION_BY_BUILDENV)/build
-KSP :=	$(shell if test -d /lib/modules/$(KERNEL_VERSION_BY_BUILDENV)/source; then  \
-                    echo /lib/modules/$(KERNEL_VERSION_BY_BUILDENV)/source; \
-	        else  \
-                    echo /lib/modules/$(KERNEL_VERSION_BY_BUILDENV)/build;  \
-                fi)
+# KSP :=	$(shell if test -d /lib/modules/$(KERNEL_VERSION_BY_BUILDENV)/source; then  \
+#                     echo /lib/modules/$(KERNEL_VERSION_BY_BUILDENV)/source; \
+# 	        else  \
+#                     echo /lib/modules/$(KERNEL_VERSION_BY_BUILDENV)/build;  \
+#                 fi)
 
 obj-m += uacce/
 obj-m += hisilicon/
@@ -13,9 +13,7 @@ DIRS := $(shell find . -maxdepth 3 -type d)
 TARGET = $(foreach dir,$(DIRS),$(wildcard \
 	$(dir)/*.o) $(dir)/*.ko $(dir)/*.tmp_versions $(dir)/*.depend $(dir)/*.mod.c $(dir)/*.order $(dir)/*.symvers)
 
-default: 
-	$(MAKE) -C $(KERNEL_PATH) M=$(shell pwd) modules \
-		CONFIG_CC_STACKPROTECTOR_STRONG=y \
+CONFIG_FLAGS = CONFIG_CC_STACKPROTECTOR_STRONG=y \
 		CONFIG_UACCE=m \
 		CONFIG_CRYPTO_QM_UACCE=m \
 		CONFIG_CRYPTO_DEV_HISI_SGL=m \
@@ -23,8 +21,17 @@ default:
 		CONFIG_CRYPTO_DEV_HISI_ZIP=m \
 		CONFIG_CRYPTO_DEV_HISI_HPRE=m \
 		CONFIG_CRYPTO_DEV_HISI_SEC2=m \
-		CONFIG_CRYPTO_DEV_HISI_TRNG=m \
-		CONFIG_HISI_ACC_VFIO_PCI=m
+		CONFIG_CRYPTO_DEV_HISI_TRNG=m
+
+ifeq ($(ENABLE_MIGRATION), y)
+CONFIG_FLAGS += CONFIG_HISI_ACC_VFIO_PCI=m
+else
+CONFIG_FLAGS += CONFIG_HISI_ACC_VFIO_PCI=n
+endif
+
+default: 
+	$(MAKE) -C $(KERNEL_PATH) M=$(shell pwd) modules $(CONFIG_FLAGS)
+
 #copy:
 #	cp -f $(shell pwd)/include_linux/uacce.h $(KSP)/include/linux
 #	cp -f $(shell pwd)/include_uapi_linux/uacce.h $(KSP)/include/uapi/linux
@@ -41,7 +48,9 @@ install:
 	-modprobe hisi_sec2 uacce_mode=1 pf_q_num=256
 	-modprobe hisi_hpre uacce_mode=1 pf_q_num=256
 	-modprobe hisi_zip  uacce_mode=1 pf_q_num=256
-	-modprobe hisi_acc_vfio_pci
+	$(shell if [ "$(ENABLE_MIGRATION)" = "y" ]; then \
+		modprobe hisi_acc_vfio_pci; \
+	fi)
 	-echo "options hisi_sec2 uacce_mode=1 pf_q_num=256" > /etc/modprobe.d/hisi_sec2.conf
 	-echo "options hisi_hpre uacce_mode=1 pf_q_num=256" > /etc/modprobe.d/hisi_hpre.conf
 	-echo "options hisi_zip  uacce_mode=1 pf_q_num=256" > /etc/modprobe.d/hisi_zip.conf
@@ -62,7 +71,9 @@ check:
 	done
 
 uninstall:
-	modprobe -r hisi_acc_vfio_pci
+	$(shell if [ "$(ENABLE_MIGRATION)" = "y" ]; then \
+		modprobe -r hisi_acc_vfio_pci; \
+	fi)
 	modprobe -r hisi_zip
 	modprobe -r hisi_hpre
 	modprobe -r hisi_sec2
@@ -73,7 +84,9 @@ uninstall:
 	rm -rf /lib/modules/$(KERNEL_VERSION_BY_BUILDENV)/extra/hisi_sec2.ko
 	rm -rf /lib/modules/$(KERNEL_VERSION_BY_BUILDENV)/extra/hisi_hpre.ko
 	rm -rf /lib/modules/$(KERNEL_VERSION_BY_BUILDENV)/extra/hisi_zip.ko
-	rm -rf /lib/modules/$(KERNEL_VERSION_BY_BUILDENV)/extra/hisi_acc_vfio_pci.ko
+	$(shell if [ "$(ENABLE_MIGRATION)" = "y" ]; then \
+		rm -rf /lib/modules/$(KERNEL_VERSION_BY_BUILDENV)/extra/hisi_acc_vfio_pci.ko; \
+	fi)
 	rm -rf /etc/modprobe.d/hisi_sec2.conf
 	rm -rf /etc/modprobe.d/hisi_hpre.conf
 	rm -rf /etc/modprobe.d/hisi_zip.conf
@@ -90,7 +103,9 @@ nosva:
 	-modprobe hisi_sec2 uacce_mode=2 pf_q_num=256
 	-modprobe hisi_hpre uacce_mode=2 pf_q_num=256
 	-modprobe hisi_zip  uacce_mode=2 pf_q_num=256
-	-modprobe hisi_acc_vfio_pci
+	$(shell if [ "$(ENABLE_MIGRATION)" = "y" ]; then \
+		modprobe hisi_acc_vfio_pci; \
+	fi)
 	-echo "options hisi_sec2 uacce_mode=2 pf_q_num=256" > /etc/modprobe.d/hisi_sec2.conf
 	-echo "options hisi_hpre uacce_mode=2 pf_q_num=256" > /etc/modprobe.d/hisi_hpre.conf
 	-echo "options hisi_zip  uacce_mode=2 pf_q_num=256" > /etc/modprobe.d/hisi_zip.conf
diff --git a/build.sh b/build.sh
index 6cc57d2..4f834b8 100644
--- a/build.sh
+++ b/build.sh
@@ -265,8 +265,14 @@ function build_driver()
 	lsmod | grep -q "^uacce"     && modprobe -r uacce
 
     cd ${KAE_KERNEL_DIR}
-    make -j
-    make nosva #默认使用nosva模式
+    
+    if [ "$1" = "migration" ]; then 
+        make ENABLE_MIGRATION=y -j
+        make nosva ENABLE_MIGRATION=y
+    else
+        make -j 
+        make nosva #默认使用nosva模式
+    fi
     # make install
     chmod 666 /dev/hisi_*
 }
@@ -276,14 +282,22 @@ function build_driver_sva()
     cd ${KAE_KERNEL_DIR}
     make -j
     # make nosva #默认使用nosva模式
-    make install
+    if [ "$1" = "migration" ]; then 
+        make install ENABLE_MIGRATION=y
+    else 
+        make install #默认使用nosva模式
+    fi
     chmod 666 /dev/hisi_*
 }
 
 function driver_clean()
 {
     cd ${KAE_KERNEL_DIR}
-    make uninstall
+    if [ "$1" = "migration" ]; then 
+        make uninstall ENABLE_MIGRATION=y
+    else 
+        make uninstall #默认使用nosva模式
+    fi
     make clean
 }
 
@@ -629,6 +643,9 @@ function help()
 	echo "sh build.sh driver -- install KAE driver"
 	echo "sh build.sh driver clean -- uninstall KAE driver"
 
+    echo "sh build.sh driver_migration -- install KAE driver, with migration driver"
+	echo "sh build.sh driver_migration clean -- uninstall KAE driver, with migration driver"
+
 	echo "sh build.sh uadk -- install uadk"
 	echo "sh build.sh uadk clean -- uninstall uadk"
 
@@ -721,6 +738,19 @@ main() {
             else  
                 build_driver
             fi  
+            ;;
+        "driver_migration")  
+            if [ "$2" = "clean" ]; then  
+                driver_clean migration
+            elif [ "$2" = "sva" ]; then  
+                build_driver_sva migration
+            elif [ "$2" = "check" ]; then  
+                driver_check
+            elif [ "$2" = "delete" ]; then  
+                driver_delete
+            else  
+                build_driver migration
+            fi  
             ;;  
         "uadk")  
             if [ "$2" = "clean" ]; then  
-- 
Gitee